mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-11-04 04:08:16 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			642 lines
		
	
	
		
			21 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			642 lines
		
	
	
		
			21 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
/*
 | 
						|
Copyright 2015 The Kubernetes Authors.
 | 
						|
 | 
						|
Licensed under the Apache License, Version 2.0 (the "License");
 | 
						|
you may not use this file except in compliance with the License.
 | 
						|
You may obtain a copy of the License at
 | 
						|
 | 
						|
    http://www.apache.org/licenses/LICENSE-2.0
 | 
						|
 | 
						|
Unless required by applicable law or agreed to in writing, software
 | 
						|
distributed under the License is distributed on an "AS IS" BASIS,
 | 
						|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
						|
See the License for the specific language governing permissions and
 | 
						|
limitations under the License.
 | 
						|
*/
 | 
						|
 | 
						|
package benchmark
 | 
						|
 | 
						|
import (
 | 
						|
	"bytes"
 | 
						|
	"encoding/json"
 | 
						|
	"flag"
 | 
						|
	"fmt"
 | 
						|
	"math"
 | 
						|
	"os"
 | 
						|
	"path"
 | 
						|
	"sort"
 | 
						|
	"strings"
 | 
						|
	"sync"
 | 
						|
	"time"
 | 
						|
 | 
						|
	v1 "k8s.io/api/core/v1"
 | 
						|
	"k8s.io/apimachinery/pkg/api/resource"
 | 
						|
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 | 
						|
	"k8s.io/apimachinery/pkg/labels"
 | 
						|
	"k8s.io/apimachinery/pkg/util/sets"
 | 
						|
	"k8s.io/client-go/informers"
 | 
						|
	coreinformers "k8s.io/client-go/informers/core/v1"
 | 
						|
	restclient "k8s.io/client-go/rest"
 | 
						|
	"k8s.io/client-go/tools/cache"
 | 
						|
	"k8s.io/component-base/featuregate"
 | 
						|
	"k8s.io/component-base/metrics/legacyregistry"
 | 
						|
	"k8s.io/component-base/metrics/testutil"
 | 
						|
	"k8s.io/klog/v2"
 | 
						|
	kubeschedulerconfigv1 "k8s.io/kube-scheduler/config/v1"
 | 
						|
	apiservertesting "k8s.io/kubernetes/cmd/kube-apiserver/app/testing"
 | 
						|
	"k8s.io/kubernetes/pkg/features"
 | 
						|
	"k8s.io/kubernetes/pkg/scheduler/apis/config"
 | 
						|
	kubeschedulerscheme "k8s.io/kubernetes/pkg/scheduler/apis/config/scheme"
 | 
						|
	frameworkruntime "k8s.io/kubernetes/pkg/scheduler/framework/runtime"
 | 
						|
	schedutil "k8s.io/kubernetes/pkg/scheduler/util"
 | 
						|
	"k8s.io/kubernetes/test/integration/framework"
 | 
						|
	"k8s.io/kubernetes/test/integration/util"
 | 
						|
	testutils "k8s.io/kubernetes/test/utils"
 | 
						|
	"k8s.io/kubernetes/test/utils/ktesting"
 | 
						|
)
 | 
						|
 | 
						|
const (
 | 
						|
	dateFormat               = "2006-01-02T15:04:05Z"
 | 
						|
	testNamespace            = "sched-test"
 | 
						|
	setupNamespace           = "sched-setup"
 | 
						|
	throughputSampleInterval = time.Second
 | 
						|
)
 | 
						|
 | 
						|
var dataItemsDir = flag.String("data-items-dir", "", "destination directory for storing generated data items for perf dashboard")
 | 
						|
 | 
						|
var runID = time.Now().Format(dateFormat)
 | 
						|
 | 
						|
func newDefaultComponentConfig() (*config.KubeSchedulerConfiguration, error) {
 | 
						|
	gvk := kubeschedulerconfigv1.SchemeGroupVersion.WithKind("KubeSchedulerConfiguration")
 | 
						|
	cfg := config.KubeSchedulerConfiguration{}
 | 
						|
	_, _, err := kubeschedulerscheme.Codecs.UniversalDecoder().Decode(nil, &gvk, &cfg)
 | 
						|
	if err != nil {
 | 
						|
		return nil, err
 | 
						|
	}
 | 
						|
	return &cfg, nil
 | 
						|
}
 | 
						|
 | 
						|
// mustSetupCluster starts the following components:
 | 
						|
// - k8s api server
 | 
						|
// - scheduler
 | 
						|
// - some of the kube-controller-manager controllers
 | 
						|
//
 | 
						|
// It returns regular and dynamic clients, and destroyFunc which should be used to
 | 
						|
// remove resources after finished.
 | 
						|
// Notes on rate limiter:
 | 
						|
//   - client rate limit is set to 5000.
 | 
						|
func mustSetupCluster(tCtx ktesting.TContext, config *config.KubeSchedulerConfiguration, enabledFeatures map[featuregate.Feature]bool, outOfTreePluginRegistry frameworkruntime.Registry) (informers.SharedInformerFactory, ktesting.TContext) {
 | 
						|
	// No alpha APIs (overrides api/all=true in https://github.com/kubernetes/kubernetes/blob/d647d19f6aef811bace300eec96a67644ff303d4/staging/src/k8s.io/apiextensions-apiserver/pkg/cmd/server/testing/testserver.go#L136),
 | 
						|
	// except for DRA API group when needed.
 | 
						|
	runtimeConfig := []string{"api/alpha=false"}
 | 
						|
	if enabledFeatures[features.DynamicResourceAllocation] {
 | 
						|
		runtimeConfig = append(runtimeConfig, "resource.k8s.io/v1alpha3=true")
 | 
						|
	}
 | 
						|
	customFlags := []string{
 | 
						|
		// Disable ServiceAccount admission plugin as we don't have serviceaccount controller running.
 | 
						|
		"--disable-admission-plugins=ServiceAccount,TaintNodesByCondition,Priority",
 | 
						|
		"--runtime-config=" + strings.Join(runtimeConfig, ","),
 | 
						|
	}
 | 
						|
	serverOpts := apiservertesting.NewDefaultTestServerOptions()
 | 
						|
	// Timeout sufficiently long to handle deleting pods of the largest test cases.
 | 
						|
	serverOpts.RequestTimeout = 10 * time.Minute
 | 
						|
	server, err := apiservertesting.StartTestServer(tCtx, serverOpts, customFlags, framework.SharedEtcd())
 | 
						|
	if err != nil {
 | 
						|
		tCtx.Fatalf("start apiserver: %v", err)
 | 
						|
	}
 | 
						|
	// Cleanup will be in reverse order: first the clients by canceling the
 | 
						|
	// child context (happens automatically), then the server.
 | 
						|
	tCtx.Cleanup(server.TearDownFn)
 | 
						|
	tCtx = ktesting.WithCancel(tCtx)
 | 
						|
 | 
						|
	// TODO: client connection configuration, such as QPS or Burst is configurable in theory, this could be derived from the `config`, need to
 | 
						|
	// support this when there is any testcase that depends on such configuration.
 | 
						|
	cfg := restclient.CopyConfig(server.ClientConfig)
 | 
						|
	cfg.QPS = 5000.0
 | 
						|
	cfg.Burst = 5000
 | 
						|
 | 
						|
	// use default component config if config here is nil
 | 
						|
	if config == nil {
 | 
						|
		var err error
 | 
						|
		config, err = newDefaultComponentConfig()
 | 
						|
		if err != nil {
 | 
						|
			tCtx.Fatalf("Error creating default component config: %v", err)
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	tCtx = ktesting.WithRESTConfig(tCtx, cfg)
 | 
						|
 | 
						|
	// Not all config options will be effective but only those mostly related with scheduler performance will
 | 
						|
	// be applied to start a scheduler, most of them are defined in `scheduler.schedulerOptions`.
 | 
						|
	_, informerFactory := util.StartScheduler(tCtx, tCtx.Client(), cfg, config, outOfTreePluginRegistry)
 | 
						|
	util.StartFakePVController(tCtx, tCtx.Client(), informerFactory)
 | 
						|
	runGC := util.CreateGCController(tCtx, tCtx, *cfg, informerFactory)
 | 
						|
	runNS := util.CreateNamespaceController(tCtx, tCtx, *cfg, informerFactory)
 | 
						|
 | 
						|
	runResourceClaimController := func() {}
 | 
						|
	if enabledFeatures[features.DynamicResourceAllocation] {
 | 
						|
		// Testing of DRA with inline resource claims depends on this
 | 
						|
		// controller for creating and removing ResourceClaims.
 | 
						|
		runResourceClaimController = util.CreateResourceClaimController(tCtx, tCtx, tCtx.Client(), informerFactory)
 | 
						|
	}
 | 
						|
 | 
						|
	informerFactory.Start(tCtx.Done())
 | 
						|
	informerFactory.WaitForCacheSync(tCtx.Done())
 | 
						|
	go runGC()
 | 
						|
	go runNS()
 | 
						|
	go runResourceClaimController()
 | 
						|
 | 
						|
	return informerFactory, tCtx
 | 
						|
}
 | 
						|
 | 
						|
func isAttempted(pod *v1.Pod) bool {
 | 
						|
	for _, cond := range pod.Status.Conditions {
 | 
						|
		if cond.Type == v1.PodScheduled {
 | 
						|
			return true
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return false
 | 
						|
}
 | 
						|
 | 
						|
// getScheduledPods returns the list of scheduled, attempted but unschedulable
 | 
						|
// and unattempted pods in the specified namespaces.
 | 
						|
// Label selector can be used to filter the pods.
 | 
						|
// Note that no namespaces specified matches all namespaces.
 | 
						|
func getScheduledPods(podInformer coreinformers.PodInformer, labelSelector map[string]string, namespaces ...string) ([]*v1.Pod, []*v1.Pod, []*v1.Pod, error) {
 | 
						|
	pods, err := podInformer.Lister().List(labels.Everything())
 | 
						|
	if err != nil {
 | 
						|
		return nil, nil, nil, err
 | 
						|
	}
 | 
						|
 | 
						|
	s := sets.New(namespaces...)
 | 
						|
	scheduled := make([]*v1.Pod, 0, len(pods))
 | 
						|
	attempted := make([]*v1.Pod, 0, len(pods))
 | 
						|
	unattempted := make([]*v1.Pod, 0, len(pods))
 | 
						|
	for i := range pods {
 | 
						|
		pod := pods[i]
 | 
						|
		if (len(s) == 0 || s.Has(pod.Namespace)) && labelsMatch(pod.Labels, labelSelector) {
 | 
						|
			if len(pod.Spec.NodeName) > 0 {
 | 
						|
				scheduled = append(scheduled, pod)
 | 
						|
			} else if isAttempted(pod) {
 | 
						|
				attempted = append(attempted, pod)
 | 
						|
			} else {
 | 
						|
				unattempted = append(unattempted, pod)
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return scheduled, attempted, unattempted, nil
 | 
						|
}
 | 
						|
 | 
						|
// DataItem is the data point.
 | 
						|
type DataItem struct {
 | 
						|
	// Data is a map from bucket to real data point (e.g. "Perc90" -> 23.5). Notice
 | 
						|
	// that all data items with the same label combination should have the same buckets.
 | 
						|
	Data map[string]float64 `json:"data"`
 | 
						|
	// Unit is the data unit. Notice that all data items with the same label combination
 | 
						|
	// should have the same unit.
 | 
						|
	Unit string `json:"unit"`
 | 
						|
	// Labels is the labels of the data item.
 | 
						|
	Labels map[string]string `json:"labels,omitempty"`
 | 
						|
 | 
						|
	// progress contains number of scheduled pods over time.
 | 
						|
	progress []podScheduling
 | 
						|
	start    time.Time
 | 
						|
}
 | 
						|
 | 
						|
// DataItems is the data point set. It is the struct that perf dashboard expects.
 | 
						|
type DataItems struct {
 | 
						|
	Version   string     `json:"version"`
 | 
						|
	DataItems []DataItem `json:"dataItems"`
 | 
						|
}
 | 
						|
 | 
						|
type podScheduling struct {
 | 
						|
	ts            time.Time
 | 
						|
	attempts      int
 | 
						|
	completed     int
 | 
						|
	observedTotal int
 | 
						|
	observedRate  float64
 | 
						|
}
 | 
						|
 | 
						|
// makeBasePod creates a Pod object to be used as a template.
 | 
						|
func makeBasePod() *v1.Pod {
 | 
						|
	basePod := &v1.Pod{
 | 
						|
		ObjectMeta: metav1.ObjectMeta{
 | 
						|
			GenerateName: "pod-",
 | 
						|
		},
 | 
						|
		Spec: testutils.MakePodSpec(),
 | 
						|
	}
 | 
						|
	return basePod
 | 
						|
}
 | 
						|
 | 
						|
// makeBaseNode creates a Node object with given nodeNamePrefix to be used as a template.
 | 
						|
func makeBaseNode(nodeNamePrefix string) *v1.Node {
 | 
						|
	return &v1.Node{
 | 
						|
		ObjectMeta: metav1.ObjectMeta{
 | 
						|
			GenerateName: nodeNamePrefix,
 | 
						|
		},
 | 
						|
		Status: v1.NodeStatus{
 | 
						|
			Capacity: v1.ResourceList{
 | 
						|
				v1.ResourcePods:   *resource.NewQuantity(110, resource.DecimalSI),
 | 
						|
				v1.ResourceCPU:    resource.MustParse("4"),
 | 
						|
				v1.ResourceMemory: resource.MustParse("32Gi"),
 | 
						|
			},
 | 
						|
			Phase: v1.NodeRunning,
 | 
						|
			Conditions: []v1.NodeCondition{
 | 
						|
				{Type: v1.NodeReady, Status: v1.ConditionTrue},
 | 
						|
			},
 | 
						|
		},
 | 
						|
	}
 | 
						|
 | 
						|
}
 | 
						|
 | 
						|
func dataItems2JSONFile(dataItems DataItems, namePrefix string) error {
 | 
						|
	// perfdash expects all data items to have the same set of labels.  It
 | 
						|
	// then renders drop-down buttons for each label with all values found
 | 
						|
	// for each label. If we were to store data items that don't have a
 | 
						|
	// certain label, then perfdash will never show those data items
 | 
						|
	// because it will only show data items that have the currently
 | 
						|
	// selected label value. To avoid that, we collect all labels used
 | 
						|
	// anywhere and then add missing labels with "not applicable" as value.
 | 
						|
	labels := sets.New[string]()
 | 
						|
	for _, item := range dataItems.DataItems {
 | 
						|
		for label := range item.Labels {
 | 
						|
			labels.Insert(label)
 | 
						|
		}
 | 
						|
	}
 | 
						|
	for _, item := range dataItems.DataItems {
 | 
						|
		for label := range labels {
 | 
						|
			if _, ok := item.Labels[label]; !ok {
 | 
						|
				item.Labels[label] = "not applicable"
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	b, err := json.Marshal(dataItems)
 | 
						|
	if err != nil {
 | 
						|
		return err
 | 
						|
	}
 | 
						|
 | 
						|
	destFile := fmt.Sprintf("%v_%v.json", namePrefix, time.Now().Format(dateFormat))
 | 
						|
	if *dataItemsDir != "" {
 | 
						|
		// Ensure the "dataItemsDir" path to be valid.
 | 
						|
		if err := os.MkdirAll(*dataItemsDir, 0750); err != nil {
 | 
						|
			return fmt.Errorf("dataItemsDir path %v does not exist and cannot be created: %v", *dataItemsDir, err)
 | 
						|
		}
 | 
						|
		destFile = path.Join(*dataItemsDir, destFile)
 | 
						|
	}
 | 
						|
	formatted := &bytes.Buffer{}
 | 
						|
	if err := json.Indent(formatted, b, "", "  "); err != nil {
 | 
						|
		return fmt.Errorf("indenting error: %v", err)
 | 
						|
	}
 | 
						|
	return os.WriteFile(destFile, formatted.Bytes(), 0644)
 | 
						|
}
 | 
						|
 | 
						|
func dataFilename(destFile string) (string, error) {
 | 
						|
	if *dataItemsDir != "" {
 | 
						|
		// Ensure the "dataItemsDir" path is valid.
 | 
						|
		if err := os.MkdirAll(*dataItemsDir, 0750); err != nil {
 | 
						|
			return "", fmt.Errorf("dataItemsDir path %v does not exist and cannot be created: %w", *dataItemsDir, err)
 | 
						|
		}
 | 
						|
		destFile = path.Join(*dataItemsDir, destFile)
 | 
						|
	}
 | 
						|
	return destFile, nil
 | 
						|
}
 | 
						|
 | 
						|
type labelValues struct {
 | 
						|
	Label  string
 | 
						|
	Values []string
 | 
						|
}
 | 
						|
 | 
						|
// metricsCollectorConfig is the config to be marshalled to YAML config file.
 | 
						|
// NOTE: The mapping here means only one filter is supported, either value in the list of `values` is able to be collected.
 | 
						|
type metricsCollectorConfig struct {
 | 
						|
	Metrics map[string][]*labelValues
 | 
						|
}
 | 
						|
 | 
						|
// metricsCollector collects metrics from legacyregistry.DefaultGatherer.Gather() endpoint.
 | 
						|
// Currently only Histogram metrics are supported.
 | 
						|
type metricsCollector struct {
 | 
						|
	*metricsCollectorConfig
 | 
						|
	labels map[string]string
 | 
						|
}
 | 
						|
 | 
						|
func newMetricsCollector(config *metricsCollectorConfig, labels map[string]string) *metricsCollector {
 | 
						|
	return &metricsCollector{
 | 
						|
		metricsCollectorConfig: config,
 | 
						|
		labels:                 labels,
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func (mc *metricsCollector) init() error {
 | 
						|
	// Reset the metrics so that the measurements do not interfere with those collected during the previous steps.
 | 
						|
	m, err := legacyregistry.DefaultGatherer.Gather()
 | 
						|
	if err != nil {
 | 
						|
		return fmt.Errorf("failed to gather metrics to reset: %w", err)
 | 
						|
	}
 | 
						|
	for _, mFamily := range m {
 | 
						|
		// Reset only metrics defined in the collector.
 | 
						|
		if _, ok := mc.Metrics[mFamily.GetName()]; ok {
 | 
						|
			mFamily.Reset()
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return nil
 | 
						|
}
 | 
						|
 | 
						|
func (*metricsCollector) run(tCtx ktesting.TContext) {
 | 
						|
	// metricCollector doesn't need to start before the tests, so nothing to do here.
 | 
						|
}
 | 
						|
 | 
						|
func (mc *metricsCollector) collect() []DataItem {
 | 
						|
	var dataItems []DataItem
 | 
						|
	for metric, labelValsSlice := range mc.Metrics {
 | 
						|
		// no filter is specified, aggregate all the metrics within the same metricFamily.
 | 
						|
		if labelValsSlice == nil {
 | 
						|
			dataItem := collectHistogramVec(metric, mc.labels, nil)
 | 
						|
			if dataItem != nil {
 | 
						|
				dataItems = append(dataItems, *dataItem)
 | 
						|
			}
 | 
						|
		} else {
 | 
						|
			for _, lvMap := range uniqueLVCombos(labelValsSlice) {
 | 
						|
				dataItem := collectHistogramVec(metric, mc.labels, lvMap)
 | 
						|
				if dataItem != nil {
 | 
						|
					dataItems = append(dataItems, *dataItem)
 | 
						|
				}
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return dataItems
 | 
						|
}
 | 
						|
 | 
						|
// uniqueLVCombos lists up all possible label values combinations.
 | 
						|
// e.g., if there are 3 labelValues, each of which has 2 values,
 | 
						|
// the result would be {A: a1, B: b1, C: c1}, {A: a2, B: b1, C: c1}, {A: a1, B: b2, C: c1}, ... (2^3 = 8 combinations).
 | 
						|
func uniqueLVCombos(lvs []*labelValues) []map[string]string {
 | 
						|
	if len(lvs) == 0 {
 | 
						|
		return []map[string]string{{}}
 | 
						|
	}
 | 
						|
 | 
						|
	remainingCombos := uniqueLVCombos(lvs[1:])
 | 
						|
 | 
						|
	results := make([]map[string]string, 0)
 | 
						|
 | 
						|
	current := lvs[0]
 | 
						|
	for _, value := range current.Values {
 | 
						|
		for _, combo := range remainingCombos {
 | 
						|
			newCombo := make(map[string]string, len(combo)+1)
 | 
						|
			for k, v := range combo {
 | 
						|
				newCombo[k] = v
 | 
						|
			}
 | 
						|
			newCombo[current.Label] = value
 | 
						|
			results = append(results, newCombo)
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return results
 | 
						|
}
 | 
						|
 | 
						|
func collectHistogramVec(metric string, labels map[string]string, lvMap map[string]string) *DataItem {
 | 
						|
	vec, err := testutil.GetHistogramVecFromGatherer(legacyregistry.DefaultGatherer, metric, lvMap)
 | 
						|
	if err != nil {
 | 
						|
		klog.Error(err)
 | 
						|
		return nil
 | 
						|
	}
 | 
						|
 | 
						|
	if err := vec.Validate(); err != nil {
 | 
						|
		klog.ErrorS(err, "the validation for HistogramVec is failed. The data for this metric won't be stored in a benchmark result file", "metric", metric, "labels", labels)
 | 
						|
		return nil
 | 
						|
	}
 | 
						|
 | 
						|
	if vec.GetAggregatedSampleCount() == 0 {
 | 
						|
		return nil
 | 
						|
	}
 | 
						|
 | 
						|
	q50 := vec.Quantile(0.50)
 | 
						|
	q90 := vec.Quantile(0.90)
 | 
						|
	q95 := vec.Quantile(0.95)
 | 
						|
	q99 := vec.Quantile(0.99)
 | 
						|
	avg := vec.Average()
 | 
						|
 | 
						|
	msFactor := float64(time.Second) / float64(time.Millisecond)
 | 
						|
 | 
						|
	// Copy labels and add "Metric" label for this metric.
 | 
						|
	labelMap := map[string]string{"Metric": metric}
 | 
						|
	for k, v := range labels {
 | 
						|
		labelMap[k] = v
 | 
						|
	}
 | 
						|
	for k, v := range lvMap {
 | 
						|
		labelMap[k] = v
 | 
						|
	}
 | 
						|
	return &DataItem{
 | 
						|
		Labels: labelMap,
 | 
						|
		Data: map[string]float64{
 | 
						|
			"Perc50":  q50 * msFactor,
 | 
						|
			"Perc90":  q90 * msFactor,
 | 
						|
			"Perc95":  q95 * msFactor,
 | 
						|
			"Perc99":  q99 * msFactor,
 | 
						|
			"Average": avg * msFactor,
 | 
						|
		},
 | 
						|
		Unit: "ms",
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
type throughputCollector struct {
 | 
						|
	podInformer           coreinformers.PodInformer
 | 
						|
	schedulingThroughputs []float64
 | 
						|
	labelSelector         map[string]string
 | 
						|
	resultLabels          map[string]string
 | 
						|
	namespaces            sets.Set[string]
 | 
						|
	errorMargin           float64
 | 
						|
 | 
						|
	progress []podScheduling
 | 
						|
	start    time.Time
 | 
						|
}
 | 
						|
 | 
						|
func newThroughputCollector(podInformer coreinformers.PodInformer, resultLabels map[string]string, labelSelector map[string]string, namespaces []string, errorMargin float64) *throughputCollector {
 | 
						|
	return &throughputCollector{
 | 
						|
		podInformer:   podInformer,
 | 
						|
		labelSelector: labelSelector,
 | 
						|
		resultLabels:  resultLabels,
 | 
						|
		namespaces:    sets.New(namespaces...),
 | 
						|
		errorMargin:   errorMargin,
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func (tc *throughputCollector) init() error {
 | 
						|
	return nil
 | 
						|
}
 | 
						|
 | 
						|
func (tc *throughputCollector) run(tCtx ktesting.TContext) {
 | 
						|
	// The collector is based on informer cache events instead of periodically listing pods because:
 | 
						|
	// - polling causes more overhead
 | 
						|
	// - it does not work when pods get created, scheduled and deleted quickly
 | 
						|
	//
 | 
						|
	// Normally, informers cannot be used to observe state changes reliably.
 | 
						|
	// They only guarantee that the *some* updates get reported, but not *all*.
 | 
						|
	// But in scheduler_perf, the scheduler and the test share the same informer,
 | 
						|
	// therefore we are guaranteed to see a new pod without NodeName (because
 | 
						|
	// that is what the scheduler needs to see to schedule it) and then the updated
 | 
						|
	// pod with NodeName (because nothing makes further changes to it).
 | 
						|
	var mutex sync.Mutex
 | 
						|
	scheduledPods := 0
 | 
						|
	getScheduledPods := func() int {
 | 
						|
		mutex.Lock()
 | 
						|
		defer mutex.Unlock()
 | 
						|
		return scheduledPods
 | 
						|
	}
 | 
						|
	onPodChange := func(oldObj, newObj any) {
 | 
						|
		oldPod, newPod, err := schedutil.As[*v1.Pod](oldObj, newObj)
 | 
						|
		if err != nil {
 | 
						|
			tCtx.Errorf("unexpected pod events: %v", err)
 | 
						|
			return
 | 
						|
		}
 | 
						|
 | 
						|
		if !tc.namespaces.Has(newPod.Namespace) || !labelsMatch(newPod.Labels, tc.labelSelector) {
 | 
						|
			return
 | 
						|
		}
 | 
						|
 | 
						|
		mutex.Lock()
 | 
						|
		defer mutex.Unlock()
 | 
						|
		if (oldPod == nil || oldPod.Spec.NodeName == "") && newPod.Spec.NodeName != "" {
 | 
						|
			// Got scheduled.
 | 
						|
			scheduledPods++
 | 
						|
		}
 | 
						|
	}
 | 
						|
	handle, err := tc.podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
 | 
						|
		AddFunc: func(obj any) {
 | 
						|
			onPodChange(nil, obj)
 | 
						|
		},
 | 
						|
		UpdateFunc: func(oldObj, newObj any) {
 | 
						|
			onPodChange(oldObj, newObj)
 | 
						|
		},
 | 
						|
	})
 | 
						|
	if err != nil {
 | 
						|
		tCtx.Fatalf("register pod event handler: %v", err)
 | 
						|
	}
 | 
						|
	defer func() {
 | 
						|
		tCtx.ExpectNoError(tc.podInformer.Informer().RemoveEventHandler(handle), "remove event handler")
 | 
						|
	}()
 | 
						|
 | 
						|
	// Waiting for the initial sync didn't work, `handle.HasSynced` always returned
 | 
						|
	// false - perhaps because the event handlers get added to a running informer.
 | 
						|
	// That's okay(ish), throughput is typically measured within an empty namespace.
 | 
						|
	//
 | 
						|
	// syncTicker := time.NewTicker(time.Millisecond)
 | 
						|
	// defer syncTicker.Stop()
 | 
						|
	// for {
 | 
						|
	// 	select {
 | 
						|
	// 	case <-syncTicker.C:
 | 
						|
	// 		if handle.HasSynced() {
 | 
						|
	// 			break
 | 
						|
	// 		}
 | 
						|
	// 	case <-tCtx.Done():
 | 
						|
	// 		return
 | 
						|
	// 	}
 | 
						|
	// }
 | 
						|
	tCtx.Logf("Started pod throughput collector for namespace(s) %s, %d pods scheduled so far", sets.List(tc.namespaces), getScheduledPods())
 | 
						|
 | 
						|
	lastScheduledCount := getScheduledPods()
 | 
						|
	ticker := time.NewTicker(throughputSampleInterval)
 | 
						|
	defer ticker.Stop()
 | 
						|
	lastSampleTime := time.Now()
 | 
						|
	started := false
 | 
						|
	skipped := 0
 | 
						|
 | 
						|
	for {
 | 
						|
		select {
 | 
						|
		case <-tCtx.Done():
 | 
						|
			return
 | 
						|
		case <-ticker.C:
 | 
						|
			now := time.Now()
 | 
						|
 | 
						|
			scheduled := getScheduledPods()
 | 
						|
			// Only do sampling if number of scheduled pods is greater than zero.
 | 
						|
			if scheduled == 0 {
 | 
						|
				continue
 | 
						|
			}
 | 
						|
			if !started {
 | 
						|
				started = true
 | 
						|
				// Skip the initial sample. It's likely to be an outlier because
 | 
						|
				// sampling and creating pods get started independently.
 | 
						|
				lastScheduledCount = scheduled
 | 
						|
				lastSampleTime = now
 | 
						|
				tc.start = now
 | 
						|
				continue
 | 
						|
			}
 | 
						|
 | 
						|
			newScheduled := scheduled - lastScheduledCount
 | 
						|
			if newScheduled == 0 {
 | 
						|
				// Throughput would be zero for the interval.
 | 
						|
				// Instead of recording 0 pods/s, keep waiting
 | 
						|
				// until we see at least one additional pod
 | 
						|
				// being scheduled.
 | 
						|
				skipped++
 | 
						|
				continue
 | 
						|
			}
 | 
						|
 | 
						|
			// This should be roughly equal to
 | 
						|
			// throughputSampleInterval * (skipped + 1), but we
 | 
						|
			// don't count on that because the goroutine might not
 | 
						|
			// be scheduled immediately when the timer
 | 
						|
			// triggers. Instead we track the actual time stamps.
 | 
						|
			duration := now.Sub(lastSampleTime)
 | 
						|
			expectedDuration := throughputSampleInterval * time.Duration(skipped+1)
 | 
						|
			errorMargin := (duration - expectedDuration).Seconds() / expectedDuration.Seconds() * 100
 | 
						|
			if tc.errorMargin > 0 && math.Abs(errorMargin) > tc.errorMargin {
 | 
						|
				// This might affect the result, report it.
 | 
						|
				klog.Infof("WARNING: Expected throughput collector to sample at regular time intervals. The %d most recent intervals took %s instead of %s, a difference of %0.1f%%.", skipped+1, duration, expectedDuration, errorMargin)
 | 
						|
			}
 | 
						|
 | 
						|
			// To keep percentiles accurate, we have to record multiple samples with the same
 | 
						|
			// throughput value if we skipped some intervals.
 | 
						|
			throughput := float64(newScheduled) / duration.Seconds()
 | 
						|
			for i := 0; i <= skipped; i++ {
 | 
						|
				tc.schedulingThroughputs = append(tc.schedulingThroughputs, throughput)
 | 
						|
			}
 | 
						|
 | 
						|
			// Record the metric sample.
 | 
						|
			counters, err := testutil.GetCounterValuesFromGatherer(legacyregistry.DefaultGatherer, "scheduler_schedule_attempts_total", map[string]string{"profile": "default-scheduler"}, "result")
 | 
						|
			if err != nil {
 | 
						|
				klog.Error(err)
 | 
						|
			}
 | 
						|
			tc.progress = append(tc.progress, podScheduling{
 | 
						|
				ts:            now,
 | 
						|
				attempts:      int(counters["unschedulable"] + counters["error"] + counters["scheduled"]),
 | 
						|
				completed:     int(counters["scheduled"]),
 | 
						|
				observedTotal: scheduled,
 | 
						|
				observedRate:  throughput,
 | 
						|
			})
 | 
						|
 | 
						|
			lastScheduledCount = scheduled
 | 
						|
			klog.Infof("%d pods have been scheduled successfully", lastScheduledCount)
 | 
						|
			skipped = 0
 | 
						|
			lastSampleTime = now
 | 
						|
		}
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func (tc *throughputCollector) collect() []DataItem {
 | 
						|
	throughputSummary := DataItem{
 | 
						|
		Labels:   tc.resultLabels,
 | 
						|
		progress: tc.progress,
 | 
						|
		start:    tc.start,
 | 
						|
	}
 | 
						|
	if length := len(tc.schedulingThroughputs); length > 0 {
 | 
						|
		sort.Float64s(tc.schedulingThroughputs)
 | 
						|
		sum := 0.0
 | 
						|
		for i := range tc.schedulingThroughputs {
 | 
						|
			sum += tc.schedulingThroughputs[i]
 | 
						|
		}
 | 
						|
 | 
						|
		throughputSummary.Labels["Metric"] = "SchedulingThroughput"
 | 
						|
		throughputSummary.Data = map[string]float64{
 | 
						|
			"Average": sum / float64(length),
 | 
						|
			"Perc50":  tc.schedulingThroughputs[int(math.Ceil(float64(length*50)/100))-1],
 | 
						|
			"Perc90":  tc.schedulingThroughputs[int(math.Ceil(float64(length*90)/100))-1],
 | 
						|
			"Perc95":  tc.schedulingThroughputs[int(math.Ceil(float64(length*95)/100))-1],
 | 
						|
			"Perc99":  tc.schedulingThroughputs[int(math.Ceil(float64(length*99)/100))-1],
 | 
						|
		}
 | 
						|
		throughputSummary.Unit = "pods/s"
 | 
						|
	}
 | 
						|
 | 
						|
	return []DataItem{throughputSummary}
 | 
						|
}
 |