From 62ff0c0b39943b7fa82588ae2c37e72c4e55bb18 Mon Sep 17 00:00:00 2001 From: Timofei Larkin Date: Tue, 28 Oct 2025 18:28:03 +0300 Subject: [PATCH 1/2] [api,lineage] Ensure node-local traffic Since 0.37, many requests to the k8s API now go through a mutating webhook (lineage-controller-webhook). Since the lineage webhook makes multiple requests to the k8s API and, indirectly, to the Cozystack API server, each request for, e.g., creating a secret now causes a lot of chatter between the webhook, the k8s API, and the Cozystack API. When this happens cross-node or, worse yet, cross-zone, this can blow up the latency for simple requests. This patch changes the Cozystack API to a DaemonSet targetting controlplane nodes, configures its service for an `Local` internal traffic policy and adds environment variables indicating that the k8s API server is to be found at :6443, **not only for the Cozystack API, but also for the lineage-controller-webhook.** This is a valid configuration in most scenarios, including the default installation method on top of Talos Linux in Cozystack, however, if this is not valid in your environment, you must now set the values `.lineageControllerWebhook.localK8sAPIEndpoint.enabled` and `.cozystackAPI.localK8sAPIEndpoint.enabled` to `false` in the respective system Helm releases. ```release-note [api,lineage] Configure all chatter between the Lineage webhook, the Cozystack API server and the Kubernetes API server to be confined to a single controlplane node, improving k8s API latency. ``` Signed-off-by: Timofei Larkin --- .../cozystack-api/templates/apiservice.yaml | 6 ++- .../cozystack-api/templates/certmanager.yaml | 45 +++++++++++++++++++ .../cozystack-api/templates/deployment.yaml | 37 ++++++++++++++- .../cozystack-api/templates/service.yaml | 5 ++- packages/system/cozystack-api/values.yaml | 3 ++ .../templates/daemonset.yaml | 10 +++++ .../lineage-controller-webhook/values.yaml | 2 + 7 files changed, 104 insertions(+), 4 deletions(-) create mode 100644 packages/system/cozystack-api/templates/certmanager.yaml diff --git a/packages/system/cozystack-api/templates/apiservice.yaml b/packages/system/cozystack-api/templates/apiservice.yaml index d0ab1185..3cd3665b 100644 --- a/packages/system/cozystack-api/templates/apiservice.yaml +++ b/packages/system/cozystack-api/templates/apiservice.yaml @@ -1,9 +1,10 @@ apiVersion: apiregistration.k8s.io/v1 kind: APIService metadata: + annotations: + cert-manager.io/inject-ca-from: "{{ .Release.Namespace }}/cozystack-api" name: v1alpha1.apps.cozystack.io spec: - insecureSkipTLSVerify: true group: apps.cozystack.io groupPriorityMinimum: 1000 versionPriority: 15 @@ -15,9 +16,10 @@ spec: apiVersion: apiregistration.k8s.io/v1 kind: APIService metadata: + annotations: + cert-manager.io/inject-ca-from: "{{ .Release.Namespace }}/cozystack-api" name: v1alpha1.core.cozystack.io spec: - insecureSkipTLSVerify: true group: core.cozystack.io groupPriorityMinimum: 1000 versionPriority: 15 diff --git a/packages/system/cozystack-api/templates/certmanager.yaml b/packages/system/cozystack-api/templates/certmanager.yaml new file mode 100644 index 00000000..def27bd1 --- /dev/null +++ b/packages/system/cozystack-api/templates/certmanager.yaml @@ -0,0 +1,45 @@ +apiVersion: cert-manager.io/v1 +kind: Issuer +metadata: + name: cozystack-api-selfsigned + namespace: {{ .Release.Namespace }} +spec: + selfSigned: {} +--- +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: cozystack-api-ca + namespace: {{ .Release.Namespace }} +spec: + secretName: cozystack-api-ca + duration: 43800h # 5 years + commonName: cozystack-api-ca + issuerRef: + name: cozystack-api-selfsigned + isCA: true +--- +apiVersion: cert-manager.io/v1 +kind: Issuer +metadata: + name: cozystack-api-ca + namespace: {{ .Release.Namespace }} +spec: + ca: + secretName: cozystack-api-ca +--- +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: cozystack-api + namespace: {{ .Release.Namespace }} +spec: + secretName: cozystack-api-cert + duration: 8760h + renewBefore: 720h + issuerRef: + name: cozystack-api-ca + commonName: cozystack-api + dnsNames: + - cozystack-api + - cozystack-api.{{ .Release.Namespace }}.svc diff --git a/packages/system/cozystack-api/templates/deployment.yaml b/packages/system/cozystack-api/templates/deployment.yaml index 46779d2b..1a63a0e0 100644 --- a/packages/system/cozystack-api/templates/deployment.yaml +++ b/packages/system/cozystack-api/templates/deployment.yaml @@ -1,12 +1,18 @@ apiVersion: apps/v1 +{{- if .Values.cozystackAPI.localK8sAPIEndpoint.enabled }} +kind: DaemonSet +{{- else }} kind: Deployment +{{- end }} metadata: name: cozystack-api namespace: cozy-system labels: app: cozystack-api spec: - replicas: 2 + {{- if not .Values.cozystackAPI.localK8sAPIEndpoint.enabled }} + replicas: {{ .Values.cozystackAPI.replicas }} + {{- end }} selector: matchLabels: app: cozystack-api @@ -16,6 +22,35 @@ spec: app: cozystack-api spec: serviceAccountName: cozystack-api + {{- if .Values.cozystackAPI.localK8sAPIEndpoint.enabled }} + nodeSelector: + node-role.kubernetes.io/control-plane: "" + {{- end }} containers: - name: cozystack-api + args: + - --tls-cert-file=/tmp/cozystack-api-certs/tls.crt + - --tls-private-key-file=/tmp/cozystack-api-certs/tls.key + {{- if .Values.cozystackAPI.localK8sAPIEndpoint.enabled }} + env: + - name: KUBERNETES_SERVICE_HOST + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.hostIP + - name: KUBERNETES_SERVICE_PORT + value: "6443" + {{- end }} image: "{{ .Values.cozystackAPI.image }}" + ports: + - containerPort: 443 + name: https + volumeMounts: + - name: cozystack-api-certs + mountPath: /tmp/cozystack-api-certs + readOnly: true + volumes: + - name: cozystack-api-certs + secret: + secretName: cozystack-api-cert + defaultMode: 0400 diff --git a/packages/system/cozystack-api/templates/service.yaml b/packages/system/cozystack-api/templates/service.yaml index 2dcd618b..abe67abc 100644 --- a/packages/system/cozystack-api/templates/service.yaml +++ b/packages/system/cozystack-api/templates/service.yaml @@ -4,9 +4,12 @@ metadata: name: cozystack-api namespace: cozy-system spec: + {{- if .Values.cozystackAPI.localK8sAPIEndpoint.enabled }} + internalTrafficPolicy: Local + {{- end }} ports: - port: 443 protocol: TCP - targetPort: 443 + targetPort: https selector: app: cozystack-api diff --git a/packages/system/cozystack-api/values.yaml b/packages/system/cozystack-api/values.yaml index 1b68eff4..77b4d7a8 100644 --- a/packages/system/cozystack-api/values.yaml +++ b/packages/system/cozystack-api/values.yaml @@ -1,2 +1,5 @@ cozystackAPI: image: ghcr.io/cozystack/cozystack/cozystack-api:v0.37.0@sha256:19d89e8afb90ce38ab7e42ecedfc28402f7c0b56f30957db957c5415132ff6ca + localK8sAPIEndpoint: + enabled: true + replicas: 2 diff --git a/packages/system/lineage-controller-webhook/templates/daemonset.yaml b/packages/system/lineage-controller-webhook/templates/daemonset.yaml index 177bcd8b..22074e1d 100644 --- a/packages/system/lineage-controller-webhook/templates/daemonset.yaml +++ b/packages/system/lineage-controller-webhook/templates/daemonset.yaml @@ -26,6 +26,16 @@ spec: containers: - name: lineage-controller-webhook image: "{{ .Values.lineageControllerWebhook.image }}" + {{- if .Values.lineageControllerWebhook.localK8sAPIEndpoint.enabled }} + env: + - name: KUBERNETES_SERVICE_HOST + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.hostIP + - name: KUBERNETES_SERVICE_PORT + value: "6443" + {{- end }} args: {{- if .Values.lineageControllerWebhook.debug }} - --zap-log-level=debug diff --git a/packages/system/lineage-controller-webhook/values.yaml b/packages/system/lineage-controller-webhook/values.yaml index 068de2d6..518ff29c 100644 --- a/packages/system/lineage-controller-webhook/values.yaml +++ b/packages/system/lineage-controller-webhook/values.yaml @@ -1,3 +1,5 @@ lineageControllerWebhook: image: ghcr.io/cozystack/cozystack/lineage-controller-webhook:v0.37.0@sha256:845b8e68cbc277c2303080bcd55597e4334610d396dad258ad56fd906530acc3 debug: false + localK8sAPIEndpoint: + enabled: true From 7cbe564ff1f7f4030d1606551f398a0509884e31 Mon Sep 17 00:00:00 2001 From: Timofei Larkin Date: Wed, 29 Oct 2025 08:56:10 +0300 Subject: [PATCH 2/2] [controller] Remove crdmem, handle DaemonSet This patch drops the custom caching of the Cozystack resource definitions in favor of the informer cache and adds a flag to the Cozystack controller to select, whether it restarts the cozystack-api deployment or the cozystack-api daemonset. As with the new default behavior of using a local endpoint for the k8s API by the lineage webhook and the Cozystack API, the Cozystack controller now also defaults to restarting a Cozystack API DaemonSet instead of a Deployment. To revert to the old behavior, disable the local k8s API endpoint on the webhook and cozystack API and set the `cozystackController.cozystackAPIKind` value in the Cozystack controller system Helm chart to "Deployment". ```release-note [controller] Use informer cache instead of the older bespoke implementation and add support for running the Cozystack API as a DaemonSet. ``` Signed-off-by: Timofei Larkin --- cmd/cozystack-controller/main.go | 12 +- .../cozystackresource_controller.go | 201 +++++------------- .../templates/deployment.yaml | 3 + .../cozystack-controller/templates/role.yaml | 2 +- .../system/cozystack-controller/values.yaml | 1 + 5 files changed, 72 insertions(+), 147 deletions(-) diff --git a/cmd/cozystack-controller/main.go b/cmd/cozystack-controller/main.go index c2ceb451..737c7775 100644 --- a/cmd/cozystack-controller/main.go +++ b/cmd/cozystack-controller/main.go @@ -69,6 +69,7 @@ func main() { var telemetryEndpoint string var telemetryInterval string var cozystackVersion string + var reconcileDeployment bool var tlsOpts []func(*tls.Config) flag.StringVar(&metricsAddr, "metrics-bind-address", "0", "The address the metrics endpoint binds to. "+ "Use :8443 for HTTPS or :8080 for HTTP, or leave as 0 to disable the metrics service.") @@ -88,6 +89,8 @@ func main() { "Interval between telemetry data collection (e.g. 15m, 1h)") flag.StringVar(&cozystackVersion, "cozystack-version", "unknown", "Version of Cozystack") + flag.BoolVar(&reconcileDeployment, "reconcile-deployment", false, + "If set, the Cozystack API server is assumed to run as a Deployment, else as a DaemonSet.") opts := zap.Options{ Development: false, } @@ -213,9 +216,14 @@ func main() { os.Exit(1) } + cozyAPIKind := "DaemonSet" + if reconcileDeployment { + cozyAPIKind = "Deployment" + } if err = (&controller.CozystackResourceDefinitionReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + CozystackAPIKind: cozyAPIKind, }).SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to create controller", "controller", "CozystackResourceDefinitionReconciler") os.Exit(1) diff --git a/internal/controller/cozystackresource_controller.go b/internal/controller/cozystackresource_controller.go index 0f02b610..46884418 100644 --- a/internal/controller/cozystackresource_controller.go +++ b/internal/controller/cozystackresource_controller.go @@ -5,28 +5,21 @@ import ( "crypto/sha256" "encoding/hex" "encoding/json" - "sort" + "slices" "sync" "time" - "github.com/cozystack/cozystack/internal/controller/dashboard" - "github.com/cozystack/cozystack/internal/shared/crdmem" - cozyv1alpha1 "github.com/cozystack/cozystack/api/v1alpha1" - "github.com/go-logr/logr" appsv1 "k8s.io/api/apps/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/controller" "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/log" - "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/controller-runtime/pkg/reconcile" ) @@ -40,128 +33,20 @@ type CozystackResourceDefinitionReconciler struct { lastEvent time.Time lastHandled time.Time - mem *crdmem.Memory - - // Track static resources initialization - staticResourcesInitialized bool - staticResourcesMutex sync.Mutex + CozystackAPIKind string } func (r *CozystackResourceDefinitionReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - logger := log.FromContext(ctx) - - crd := &cozyv1alpha1.CozystackResourceDefinition{} - err := r.Get(ctx, types.NamespacedName{Name: req.Name}, crd) - if err == nil { - if r.mem != nil { - r.mem.Upsert(crd) - } - - mgr := dashboard.NewManager( - r.Client, - r.Scheme, - dashboard.WithCRDListFunc(func(c context.Context) ([]cozyv1alpha1.CozystackResourceDefinition, error) { - if r.mem != nil { - return r.mem.ListFromCacheOrAPI(c, r.Client) - } - var list cozyv1alpha1.CozystackResourceDefinitionList - if err := r.Client.List(c, &list); err != nil { - return nil, err - } - return list.Items, nil - }), - ) - - if res, derr := mgr.EnsureForCRD(ctx, crd); derr != nil || res.Requeue || res.RequeueAfter > 0 { - return res, derr - } - - // After processing CRD, perform cleanup of orphaned resources - // This should be done after cache warming to ensure all current resources are known - if cleanupErr := mgr.CleanupOrphanedResources(ctx); cleanupErr != nil { - logger.Error(cleanupErr, "Failed to cleanup orphaned dashboard resources") - // Don't fail the reconciliation, just log the error - } - - r.mu.Lock() - r.lastEvent = time.Now() - r.mu.Unlock() - return ctrl.Result{}, nil - } - - // Handle error cases (err is guaranteed to be non-nil here) - if !apierrors.IsNotFound(err) { - return ctrl.Result{}, err - } - // If resource is not found, clean up from memory - if r.mem != nil { - r.mem.Delete(req.Name) - } - if req.Namespace == "cozy-system" && req.Name == "cozystack-api" { - return r.debouncedRestart(ctx, logger) - } - return ctrl.Result{}, nil -} - -// initializeStaticResourcesOnce ensures static resources are created only once -func (r *CozystackResourceDefinitionReconciler) initializeStaticResourcesOnce(ctx context.Context) error { - r.staticResourcesMutex.Lock() - defer r.staticResourcesMutex.Unlock() - - if r.staticResourcesInitialized { - return nil // Already initialized - } - - // Create dashboard manager and initialize static resources - mgr := dashboard.NewManager( - r.Client, - r.Scheme, - dashboard.WithCRDListFunc(func(c context.Context) ([]cozyv1alpha1.CozystackResourceDefinition, error) { - if r.mem != nil { - return r.mem.ListFromCacheOrAPI(c, r.Client) - } - var list cozyv1alpha1.CozystackResourceDefinitionList - if err := r.Client.List(c, &list); err != nil { - return nil, err - } - return list.Items, nil - }), - ) - - if err := mgr.InitializeStaticResources(ctx); err != nil { - return err - } - - r.staticResourcesInitialized = true - log.FromContext(ctx).Info("Static dashboard resources initialized successfully") - return nil + return r.debouncedRestart(ctx) } func (r *CozystackResourceDefinitionReconciler) SetupWithManager(mgr ctrl.Manager) error { if r.Debounce == 0 { r.Debounce = 5 * time.Second } - if r.mem == nil { - r.mem = crdmem.Global() - } - if err := r.mem.EnsurePrimingWithManager(mgr); err != nil { - return err - } - - // Initialize static resources once during controller startup using manager.Runnable - if err := mgr.Add(manager.RunnableFunc(func(ctx context.Context) error { - if err := r.initializeStaticResourcesOnce(ctx); err != nil { - log.FromContext(ctx).Error(err, "Failed to initialize static resources") - return err - } - return nil - })); err != nil { - return err - } return ctrl.NewControllerManagedBy(mgr). Named("cozystackresource-controller"). - For(&cozyv1alpha1.CozystackResourceDefinition{}, builder.WithPredicates()). Watches( &cozyv1alpha1.CozystackResourceDefinition{}, handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, obj client.Object) []reconcile.Request { @@ -176,9 +61,6 @@ func (r *CozystackResourceDefinitionReconciler) SetupWithManager(mgr ctrl.Manage }} }), ). - WithOptions(controller.Options{ - MaxConcurrentReconciles: 5, // Allow more concurrent reconciles with proper rate limiting - }). Complete(r) } @@ -188,22 +70,18 @@ type crdHashView struct { } func (r *CozystackResourceDefinitionReconciler) computeConfigHash(ctx context.Context) (string, error) { - var items []cozyv1alpha1.CozystackResourceDefinition - if r.mem != nil { - list, err := r.mem.ListFromCacheOrAPI(ctx, r.Client) - if err != nil { - return "", err - } - items = list + list := &cozyv1alpha1.CozystackResourceDefinitionList{} + if err := r.List(ctx, list); err != nil { + return "", err } - sort.Slice(items, func(i, j int) bool { return items[i].Name < items[j].Name }) + slices.SortFunc(list.Items, sortCozyRDs) - views := make([]crdHashView, 0, len(items)) - for i := range items { + views := make([]crdHashView, 0, len(list.Items)) + for i := range list.Items { views = append(views, crdHashView{ - Name: items[i].Name, - Spec: items[i].Spec, + Name: list.Items[i].Name, + Spec: list.Items[i].Spec, }) } b, err := json.Marshal(views) @@ -214,7 +92,9 @@ func (r *CozystackResourceDefinitionReconciler) computeConfigHash(ctx context.Co return hex.EncodeToString(sum[:]), nil } -func (r *CozystackResourceDefinitionReconciler) debouncedRestart(ctx context.Context, logger logr.Logger) (ctrl.Result, error) { +func (r *CozystackResourceDefinitionReconciler) debouncedRestart(ctx context.Context) (ctrl.Result, error) { + logger := log.FromContext(ctx) + r.mu.Lock() le := r.lastEvent lh := r.lastHandled @@ -239,15 +119,12 @@ func (r *CozystackResourceDefinitionReconciler) debouncedRestart(ctx context.Con return ctrl.Result{}, err } - deploy := &appsv1.Deployment{} - if err := r.Get(ctx, types.NamespacedName{Namespace: "cozy-system", Name: "cozystack-api"}, deploy); err != nil { + tpl, obj, patch, err := r.getWorkload(ctx, types.NamespacedName{Namespace: "cozy-system", Name: "cozystack-api"}) + if err != nil { return ctrl.Result{}, client.IgnoreNotFound(err) } - if deploy.Spec.Template.Annotations == nil { - deploy.Spec.Template.Annotations = map[string]string{} - } - oldHash := deploy.Spec.Template.Annotations["cozystack.io/config-hash"] + oldHash := tpl.Annotations["cozystack.io/config-hash"] if oldHash == newHash && oldHash != "" { r.mu.Lock() @@ -257,10 +134,9 @@ func (r *CozystackResourceDefinitionReconciler) debouncedRestart(ctx context.Con return ctrl.Result{}, nil } - patch := client.MergeFrom(deploy.DeepCopy()) - deploy.Spec.Template.Annotations["cozystack.io/config-hash"] = newHash + tpl.Annotations["cozystack.io/config-hash"] = newHash - if err := r.Patch(ctx, deploy, patch); err != nil { + if err := r.Patch(ctx, obj, patch); err != nil { return ctrl.Result{}, err } @@ -272,3 +148,40 @@ func (r *CozystackResourceDefinitionReconciler) debouncedRestart(ctx context.Con "old", oldHash, "new", newHash) return ctrl.Result{}, nil } + +func (r *CozystackResourceDefinitionReconciler) getWorkload( + ctx context.Context, + key types.NamespacedName, +) (tpl *corev1.PodTemplateSpec, obj client.Object, patch client.Patch, err error) { + if r.CozystackAPIKind == "Deployment" { + dep := &appsv1.Deployment{} + if err := r.Get(ctx, key, dep); err != nil { + return nil, nil, nil, err + } + obj = dep + tpl = &dep.Spec.Template + patch = client.MergeFrom(dep.DeepCopy()) + } else { + ds := &appsv1.DaemonSet{} + if err := r.Get(ctx, key, ds); err != nil { + return nil, nil, nil, err + } + obj = ds + tpl = &ds.Spec.Template + patch = client.MergeFrom(ds.DeepCopy()) + } + if tpl.Annotations == nil { + tpl.Annotations = make(map[string]string) + } + return tpl, obj, patch, nil +} + +func sortCozyRDs(a, b cozyv1alpha1.CozystackResourceDefinition) int { + if a.Name == b.Name { + return 0 + } + if a.Name < b.Name { + return -1 + } + return 1 +} diff --git a/packages/system/cozystack-controller/templates/deployment.yaml b/packages/system/cozystack-controller/templates/deployment.yaml index bac865ef..6dc21b1c 100644 --- a/packages/system/cozystack-controller/templates/deployment.yaml +++ b/packages/system/cozystack-controller/templates/deployment.yaml @@ -28,3 +28,6 @@ spec: {{- if .Values.cozystackController.disableTelemetry }} - --disable-telemetry {{- end }} + {{- if eq .Values.cozystackController.cozystackAPIKind "Deployment" }} + - --reconcile-deployment + {{- end }} diff --git a/packages/system/cozystack-controller/templates/role.yaml b/packages/system/cozystack-controller/templates/role.yaml index 96bfc9a5..734ba95b 100644 --- a/packages/system/cozystack-controller/templates/role.yaml +++ b/packages/system/cozystack-controller/templates/role.yaml @@ -5,7 +5,7 @@ metadata: namespace: cozy-system rules: - apiGroups: ["apps"] - resources: ["deployments"] + resources: ["deployments", "daemonsets"] resourceNames: ["cozystack-api"] verbs: ["patch", "update"] diff --git a/packages/system/cozystack-controller/values.yaml b/packages/system/cozystack-controller/values.yaml index f651807e..0ad7a02e 100644 --- a/packages/system/cozystack-controller/values.yaml +++ b/packages/system/cozystack-controller/values.yaml @@ -3,3 +3,4 @@ cozystackController: debug: false disableTelemetry: false cozystackVersion: "v0.37.0" + cozystackAPIKind: "DaemonSet"