Files
kamaji/controllers/datastore_controller.go
Dario Tranchitella b027e23b99 feat: enhancing concurrent reconciliations (#790)
* feat: buffered channels for generic events

Channels used for GenericEvent feeding for cross controllers triggers
are now buffered according to the --max-concurrent-tcp-reconciles: this
is required to avoid channel full errors when dealing with large
management clusters serving a sizeable amount of Tenant Control Planes.

Increasing this value will put more pressure on memory (mostly for GC)
and CPU (provisioning multiple certificates at the same time).

Signed-off-by: Dario Tranchitella <dario@tranchitella.eu>

* refactor: retrying datastore status update

Signed-off-by: Dario Tranchitella <dario@tranchitella.eu>

* feat(performance): reducing memory consumption for channel triggers

Signed-off-by: Dario Tranchitella <dario@tranchitella.eu>

* feat(datastore): reconcile events only for root object changes

Signed-off-by: Dario Tranchitella <dario@tranchitella.eu>

* feat: waiting soot manager exit before termination

This change introduces a grace period of 10 seconds before abruptly
terminating the Tenant Control Plane deployment, allowing the soot
manager to complete its exit procedure and avoid false positive errors
due to API Server being unresponsive due to user deletion.

Aim of this change is reducing the amount of false positive errors upon
mass deletion of Tenant COntrol Plane objects.

Signed-off-by: Dario Tranchitella <dario@tranchitella.eu>

* refactor: unbuffered channel with timeout

WatchesRawSource is non blocking, no need to check if channel is full.
To prevent deadlocks a WithTimeout check has been introduced.

Signed-off-by: Dario Tranchitella <dario@tranchitella.eu>

---------

Signed-off-by: Dario Tranchitella <dario@tranchitella.eu>
2025-04-23 21:00:29 +02:00

125 lines
4.6 KiB
Go

// Copyright 2022 Clastix Labs
// SPDX-License-Identifier: Apache-2.0
package controllers
import (
"context"
"github.com/pkg/errors"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/fields"
k8stypes "k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/util/retry"
"k8s.io/client-go/util/workqueue"
controllerruntime "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/event"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/predicate"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
kamajiv1alpha1 "github.com/clastix/kamaji/api/v1alpha1"
"github.com/clastix/kamaji/controllers/utils"
)
type DataStore struct {
Client client.Client
// TenantControlPlaneTrigger is the channel used to communicate across the controllers:
// if a Data Source is updated, we have to be sure that the reconciliation of the certificates content
// for each Tenant Control Plane is put in place properly.
TenantControlPlaneTrigger chan event.GenericEvent
}
//+kubebuilder:rbac:groups=kamaji.clastix.io,resources=datastores,verbs=get;list;watch;create;update;patch;delete
//+kubebuilder:rbac:groups=kamaji.clastix.io,resources=datastores/status,verbs=get;update;patch
func (r *DataStore) Reconcile(ctx context.Context, request reconcile.Request) (reconcile.Result, error) {
logger := log.FromContext(ctx)
var ds kamajiv1alpha1.DataStore
if err := r.Client.Get(ctx, request.NamespacedName, &ds); err != nil {
if k8serrors.IsNotFound(err) {
logger.Info("resource have been deleted, skipping")
return reconcile.Result{}, nil
}
logger.Error(err, "cannot retrieve the required resource")
return reconcile.Result{}, err
}
var tcpList kamajiv1alpha1.TenantControlPlaneList
updateErr := retry.RetryOnConflict(retry.DefaultRetry, func() error {
if lErr := r.Client.List(ctx, &tcpList, client.MatchingFieldsSelector{
Selector: fields.OneTermEqualSelector(kamajiv1alpha1.TenantControlPlaneUsedDataStoreKey, ds.GetName()),
}); lErr != nil {
return errors.Wrap(lErr, "cannot retrieve list of the Tenant Control Plane using the following instance")
}
// Updating the status with the list of Tenant Control Plane using the following Data Source
tcpSets := sets.NewString()
for _, tcp := range tcpList.Items {
tcpSets.Insert(getNamespacedName(tcp.GetNamespace(), tcp.GetName()).String())
}
ds.Status.UsedBy = tcpSets.List()
if sErr := r.Client.Status().Update(ctx, &ds); sErr != nil {
return errors.Wrap(sErr, "cannot update the status for the given instance")
}
return nil
})
if updateErr != nil {
logger.Error(updateErr, "cannot update DataStore status")
return reconcile.Result{}, updateErr
}
// Triggering the reconciliation of the Tenant Control Plane upon a Secret change
for _, tcp := range tcpList.Items {
var shrunkTCP kamajiv1alpha1.TenantControlPlane
shrunkTCP.Name = tcp.Name
shrunkTCP.Namespace = tcp.Namespace
go utils.TriggerChannel(ctx, r.TenantControlPlaneTrigger, shrunkTCP)
}
return reconcile.Result{}, nil
}
func (r *DataStore) SetupWithManager(mgr controllerruntime.Manager) error {
enqueueFn := func(tcp *kamajiv1alpha1.TenantControlPlane, limitingInterface workqueue.TypedRateLimitingInterface[reconcile.Request]) {
if dataStoreName := tcp.Status.Storage.DataStoreName; len(dataStoreName) > 0 {
limitingInterface.AddRateLimited(reconcile.Request{
NamespacedName: k8stypes.NamespacedName{
Name: dataStoreName,
},
})
}
}
//nolint:forcetypeassert
return controllerruntime.NewControllerManagedBy(mgr).
For(&kamajiv1alpha1.DataStore{}, builder.WithPredicates(
predicate.GenerationChangedPredicate{},
)).
Watches(&kamajiv1alpha1.TenantControlPlane{}, handler.Funcs{
CreateFunc: func(_ context.Context, createEvent event.TypedCreateEvent[client.Object], w workqueue.TypedRateLimitingInterface[reconcile.Request]) {
enqueueFn(createEvent.Object.(*kamajiv1alpha1.TenantControlPlane), w)
},
UpdateFunc: func(_ context.Context, updateEvent event.TypedUpdateEvent[client.Object], w workqueue.TypedRateLimitingInterface[reconcile.Request]) {
enqueueFn(updateEvent.ObjectOld.(*kamajiv1alpha1.TenantControlPlane), w)
enqueueFn(updateEvent.ObjectNew.(*kamajiv1alpha1.TenantControlPlane), w)
},
DeleteFunc: func(_ context.Context, deleteEvent event.TypedDeleteEvent[client.Object], w workqueue.TypedRateLimitingInterface[reconcile.Request]) {
enqueueFn(deleteEvent.Object.(*kamajiv1alpha1.TenantControlPlane), w)
},
}).
Complete(r)
}