From d3fb03a75242270cd75bb3f27690f73ab84831ed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C3=A9onard=20Suslian?=
<58216931+synthe102@users.noreply.github.com>
Date: Fri, 12 Dec 2025 12:10:02 +0100
Subject: [PATCH] feat: add support for multiple Datastores (#961)
* feat: add support for multiple Datastores
* docs: add guide for datastore overrides
* feat(datastore): add e2e test for dataStoreOverrides
* ci: reclaim disk space from runner to fix flaky tests
---
.github/workflows/e2e.yaml | 3 +
Makefile | 10 +-
api/v1alpha1/tenantcontrolplane_types.go | 14 ++-
api/v1alpha1/zz_generated.deepcopy.go | 20 ++++
...i.clastix.io_tenantcontrolplanes_spec.yaml | 13 +++
...kamaji.clastix.io_tenantcontrolplanes.yaml | 13 +++
controllers/resources.go | 61 +++++++++---
controllers/tenantcontrolplane_controller.go | 62 +++++++++---
docs/content/guides/datastore-overrides.md | 78 +++++++++++++++
docs/content/reference/api.md | 39 ++++++++
docs/mkdocs.yml | 1 +
e2e/tcp_datastore_overrides_test.go | 68 ++++++++++++++
internal/builders/controlplane/deployment.go | 24 +++++
.../builders/controlplane/deployment_test.go | 46 +++++++++
.../datastore/datastore_storage_config.go | 11 ++-
internal/resources/k8s_deployment_resource.go | 2 +
internal/webhook/handlers/tcp_datastore.go | 18 +++-
.../webhook/handlers/tcp_datastore_test.go | 94 +++++++++++++++++++
internal/webhook/handlers/tcp_deployment.go | 14 +++
19 files changed, 555 insertions(+), 36 deletions(-)
create mode 100644 docs/content/guides/datastore-overrides.md
create mode 100644 e2e/tcp_datastore_overrides_test.go
create mode 100644 internal/builders/controlplane/deployment_test.go
create mode 100644 internal/webhook/handlers/tcp_datastore_test.go
diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml
index 29131e0..1edea2c 100644
--- a/.github/workflows/e2e.yaml
+++ b/.github/workflows/e2e.yaml
@@ -41,6 +41,9 @@ jobs:
- uses: actions/setup-go@v6
with:
go-version-file: go.mod
+ - name: reclaim disk space from runner
+ run: |
+ sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache/CodeQL
- run: |
sudo apt-get update
sudo apt-get install -y golang-cfssl
diff --git a/Makefile b/Makefile
index f410573..7c61fe5 100644
--- a/Makefile
+++ b/Makefile
@@ -177,7 +177,7 @@ datastore-postgres:
$(MAKE) NAME=gold _datastore-postgres
_datastore-etcd:
- $(HELM) upgrade --install etcd-$(NAME) clastix/kamaji-etcd --create-namespace -n etcd-system --set datastore.enabled=true --set fullnameOverride=etcd-$(NAME)
+ $(HELM) upgrade --install etcd-$(NAME) clastix/kamaji-etcd --create-namespace -n $(NAMESPACE) --set datastore.enabled=true --set fullnameOverride=etcd-$(NAME) $(EXTRA_ARGS)
_datastore-nats:
$(MAKE) NAME=$(NAME) NAMESPACE=nats-system -C deploy/kine/nats nats
@@ -186,9 +186,11 @@ _datastore-nats:
datastore-etcd: helm
$(HELM) repo add clastix https://clastix.github.io/charts
$(HELM) repo update
- $(MAKE) NAME=bronze _datastore-etcd
- $(MAKE) NAME=silver _datastore-etcd
- $(MAKE) NAME=gold _datastore-etcd
+ $(MAKE) NAME=bronze NAMESPACE=etcd-system _datastore-etcd
+ $(MAKE) NAME=silver NAMESPACE=etcd-system _datastore-etcd
+ $(MAKE) NAME=gold NAMESPACE=etcd-system _datastore-etcd
+ $(MAKE) NAME=primary NAMESPACE=kamaji-system EXTRA_ARGS='--set certManager.enabled=true --set certManager.issuerRef.kind=Issuer --set certManager.issuerRef.name=kamaji-selfsigned-issuer --set selfSignedCertificates.enabled=false' _datastore-etcd
+ $(MAKE) NAME=secondary NAMESPACE=kamaji-system EXTRA_ARGS='--set certManager.enabled=true --set certManager.ca.create=false --set certManager.ca.nameOverride=etcd-primary-ca --set certManager.issuerRef.kind=Issuer --set certManager.issuerRef.name=kamaji-selfsigned-issuer --set selfSignedCertificates.enabled=false' _datastore-etcd
datastore-nats: helm
$(HELM) repo add nats https://nats-io.github.io/k8s/helm/charts/
diff --git a/api/v1alpha1/tenantcontrolplane_types.go b/api/v1alpha1/tenantcontrolplane_types.go
index 0a8037d..cda9511 100644
--- a/api/v1alpha1/tenantcontrolplane_types.go
+++ b/api/v1alpha1/tenantcontrolplane_types.go
@@ -355,6 +355,14 @@ func (p *Permissions) HasAnyLimitation() bool {
return false
}
+// DataStoreOverride defines which kubernetes resource will be stored in a dedicated datastore.
+type DataStoreOverride struct {
+ // Resource specifies which kubernetes resource to target.
+ Resource string `json:"resource,omitempty"`
+ // DataStore specifies the DataStore that should be used to store the Kubernetes data for the given Resource.
+ DataStore string `json:"dataStore,omitempty"`
+}
+
// TenantControlPlaneSpec defines the desired state of TenantControlPlane.
// +kubebuilder:validation:XValidation:rule="!has(oldSelf.dataStore) || has(self.dataStore)", message="unsetting the dataStore is not supported"
// +kubebuilder:validation:XValidation:rule="!has(oldSelf.dataStoreSchema) || has(self.dataStoreSchema)", message="unsetting the dataStoreSchema is not supported"
@@ -389,8 +397,10 @@ type TenantControlPlaneSpec struct {
// to the user to avoid clashes between different TenantControlPlanes. If not set upon creation, Kamaji will default the
// DataStoreUsername by concatenating the namespace and name of the TenantControlPlane.
// +kubebuilder:validation:XValidation:rule="self == oldSelf",message="changing the dataStoreUsername is not supported"
- DataStoreUsername string `json:"dataStoreUsername,omitempty"`
- ControlPlane ControlPlane `json:"controlPlane"`
+ DataStoreUsername string `json:"dataStoreUsername,omitempty"`
+ // DataStoreOverride defines which kubernetes resources will be stored in dedicated datastores.
+ DataStoreOverrides []DataStoreOverride `json:"dataStoreOverrides,omitempty"`
+ ControlPlane ControlPlane `json:"controlPlane"`
// Kubernetes specification for tenant control plane
Kubernetes KubernetesSpec `json:"kubernetes"`
// NetworkProfile specifies how the network is
diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go
index 478b149..d47eb1e 100644
--- a/api/v1alpha1/zz_generated.deepcopy.go
+++ b/api/v1alpha1/zz_generated.deepcopy.go
@@ -538,6 +538,21 @@ func (in *DataStoreList) DeepCopyObject() runtime.Object {
return nil
}
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *DataStoreOverride) DeepCopyInto(out *DataStoreOverride) {
+ *out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DataStoreOverride.
+func (in *DataStoreOverride) DeepCopy() *DataStoreOverride {
+ if in == nil {
+ return nil
+ }
+ out := new(DataStoreOverride)
+ in.DeepCopyInto(out)
+ return out
+}
+
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *DataStoreSetupStatus) DeepCopyInto(out *DataStoreSetupStatus) {
*out = *in
@@ -1591,6 +1606,11 @@ func (in *TenantControlPlaneList) DeepCopyObject() runtime.Object {
func (in *TenantControlPlaneSpec) DeepCopyInto(out *TenantControlPlaneSpec) {
*out = *in
out.WritePermissions = in.WritePermissions
+ if in.DataStoreOverrides != nil {
+ in, out := &in.DataStoreOverrides, &out.DataStoreOverrides
+ *out = make([]DataStoreOverride, len(*in))
+ copy(*out, *in)
+ }
in.ControlPlane.DeepCopyInto(&out.ControlPlane)
in.Kubernetes.DeepCopyInto(&out.Kubernetes)
in.NetworkProfile.DeepCopyInto(&out.NetworkProfile)
diff --git a/charts/kamaji-crds/hack/kamaji.clastix.io_tenantcontrolplanes_spec.yaml b/charts/kamaji-crds/hack/kamaji.clastix.io_tenantcontrolplanes_spec.yaml
index 9c3508b..e07834e 100644
--- a/charts/kamaji-crds/hack/kamaji.clastix.io_tenantcontrolplanes_spec.yaml
+++ b/charts/kamaji-crds/hack/kamaji.clastix.io_tenantcontrolplanes_spec.yaml
@@ -6985,6 +6985,19 @@ versions:
Migration from one DataStore to another backed by the same Driver is possible. See: https://kamaji.clastix.io/guides/datastore-migration/
Migration from one DataStore to another backed by a different Driver is not supported.
type: string
+ dataStoreOverrides:
+ description: DataStoreOverride defines which kubernetes resources will be stored in dedicated datastores.
+ items:
+ description: DataStoreOverride defines which kubernetes resource will be stored in a dedicated datastore.
+ properties:
+ dataStore:
+ description: DataStore specifies the DataStore that should be used to store the Kubernetes data for the given Resource.
+ type: string
+ resource:
+ description: Resource specifies which kubernetes resource to target.
+ type: string
+ type: object
+ type: array
dataStoreSchema:
description: |-
DataStoreSchema allows to specify the name of the database (for relational DataStores) or the key prefix (for etcd). This
diff --git a/charts/kamaji/crds/kamaji.clastix.io_tenantcontrolplanes.yaml b/charts/kamaji/crds/kamaji.clastix.io_tenantcontrolplanes.yaml
index 18c1e86..b34de10 100644
--- a/charts/kamaji/crds/kamaji.clastix.io_tenantcontrolplanes.yaml
+++ b/charts/kamaji/crds/kamaji.clastix.io_tenantcontrolplanes.yaml
@@ -6993,6 +6993,19 @@ spec:
Migration from one DataStore to another backed by the same Driver is possible. See: https://kamaji.clastix.io/guides/datastore-migration/
Migration from one DataStore to another backed by a different Driver is not supported.
type: string
+ dataStoreOverrides:
+ description: DataStoreOverride defines which kubernetes resources will be stored in dedicated datastores.
+ items:
+ description: DataStoreOverride defines which kubernetes resource will be stored in a dedicated datastore.
+ properties:
+ dataStore:
+ description: DataStore specifies the DataStore that should be used to store the Kubernetes data for the given Resource.
+ type: string
+ resource:
+ description: Resource specifies which kubernetes resource to target.
+ type: string
+ type: object
+ type: array
dataStoreSchema:
description: |-
DataStoreSchema allows to specify the name of the database (for relational DataStores) or the key prefix (for etcd). This
diff --git a/controllers/resources.go b/controllers/resources.go
index 31385f9..f2c3737 100644
--- a/controllers/resources.go
+++ b/controllers/resources.go
@@ -26,18 +26,20 @@ import (
)
type GroupResourceBuilderConfiguration struct {
- client client.Client
- log logr.Logger
- tcpReconcilerConfig TenantControlPlaneReconcilerConfig
- tenantControlPlane kamajiv1alpha1.TenantControlPlane
- ExpirationThreshold time.Duration
- Connection datastore.Connection
- DataStore kamajiv1alpha1.DataStore
- KamajiNamespace string
- KamajiServiceAccount string
- KamajiService string
- KamajiMigrateImage string
- DiscoveryClient discovery.DiscoveryInterface
+ client client.Client
+ log logr.Logger
+ tcpReconcilerConfig TenantControlPlaneReconcilerConfig
+ tenantControlPlane kamajiv1alpha1.TenantControlPlane
+ ExpirationThreshold time.Duration
+ Connection datastore.Connection
+ DataStore kamajiv1alpha1.DataStore
+ DataStoreOverrides []builder.DataStoreOverrides
+ DataStoreOverriedsConnections map[string]datastore.Connection
+ KamajiNamespace string
+ KamajiServiceAccount string
+ KamajiService string
+ KamajiMigrateImage string
+ DiscoveryClient discovery.DiscoveryInterface
}
type GroupDeletableResourceBuilderConfiguration struct {
@@ -62,8 +64,9 @@ func GetResources(ctx context.Context, config GroupResourceBuilderConfiguration)
resources = append(resources, getKubernetesCertificatesResources(config.client, config.tcpReconcilerConfig, config.tenantControlPlane)...)
resources = append(resources, getKubeconfigResources(config.client, config.tcpReconcilerConfig, config.tenantControlPlane)...)
resources = append(resources, getKubernetesStorageResources(config.client, config.Connection, config.DataStore, config.ExpirationThreshold)...)
+ resources = append(resources, getKubernetesAdditionalStorageResources(config.client, config.DataStoreOverriedsConnections, config.DataStoreOverrides, config.ExpirationThreshold)...)
resources = append(resources, getKonnectivityServerRequirementsResources(config.client, config.ExpirationThreshold)...)
- resources = append(resources, getKubernetesDeploymentResources(config.client, config.tcpReconcilerConfig, config.DataStore)...)
+ resources = append(resources, getKubernetesDeploymentResources(config.client, config.tcpReconcilerConfig, config.DataStore, config.DataStoreOverrides)...)
resources = append(resources, getKonnectivityServerPatchResources(config.client)...)
resources = append(resources, getDataStoreMigratingCleanup(config.client, config.KamajiNamespace)...)
resources = append(resources, getKubernetesIngressResources(config.client)...)
@@ -252,12 +255,42 @@ func getKubernetesStorageResources(c client.Client, dbConnection datastore.Conne
}
}
-func getKubernetesDeploymentResources(c client.Client, tcpReconcilerConfig TenantControlPlaneReconcilerConfig, dataStore kamajiv1alpha1.DataStore) []resources.Resource {
+func getKubernetesAdditionalStorageResources(c client.Client, dbConnections map[string]datastore.Connection, dataStoreOverrides []builder.DataStoreOverrides, threshold time.Duration) []resources.Resource {
+ res := make([]resources.Resource, 0, len(dataStoreOverrides))
+ for _, dso := range dataStoreOverrides {
+ datastore := dso.DataStore
+ res = append(res,
+ &ds.MultiTenancy{
+ DataStore: datastore,
+ },
+ &ds.Config{
+ Client: c,
+ ConnString: dbConnections[dso.Resource].GetConnectionString(),
+ DataStore: datastore,
+ IsOverride: true,
+ },
+ &ds.Setup{
+ Client: c,
+ Connection: dbConnections[dso.Resource],
+ DataStore: datastore,
+ },
+ &ds.Certificate{
+ Client: c,
+ DataStore: datastore,
+ CertExpirationThreshold: threshold,
+ })
+ }
+
+ return res
+}
+
+func getKubernetesDeploymentResources(c client.Client, tcpReconcilerConfig TenantControlPlaneReconcilerConfig, dataStore kamajiv1alpha1.DataStore, dataStoreOverrides []builder.DataStoreOverrides) []resources.Resource {
return []resources.Resource{
&resources.KubernetesDeploymentResource{
Client: c,
DataStore: dataStore,
KineContainerImage: tcpReconcilerConfig.KineContainerImage,
+ DataStoreOverrides: dataStoreOverrides,
},
}
}
diff --git a/controllers/tenantcontrolplane_controller.go b/controllers/tenantcontrolplane_controller.go
index 9e830d9..1651c99 100644
--- a/controllers/tenantcontrolplane_controller.go
+++ b/controllers/tenantcontrolplane_controller.go
@@ -37,6 +37,7 @@ import (
kamajiv1alpha1 "github.com/clastix/kamaji/api/v1alpha1"
"github.com/clastix/kamaji/controllers/finalizers"
"github.com/clastix/kamaji/controllers/utils"
+ controlplanebuilder "github.com/clastix/kamaji/internal/builders/controlplane"
"github.com/clastix/kamaji/internal/datastore"
kamajierrors "github.com/clastix/kamaji/internal/errors"
"github.com/clastix/kamaji/internal/resources"
@@ -157,6 +158,25 @@ func (r *TenantControlPlaneReconciler) Reconcile(ctx context.Context, req ctrl.R
}
defer dsConnection.Close()
+ dso, err := r.dataStoreOverride(ctx, tenantControlPlane)
+ if err != nil {
+ log.Error(err, "cannot retrieve the DataStoreOverrides for the given instance")
+
+ return ctrl.Result{}, err
+ }
+ dsoConnections := make(map[string]datastore.Connection, len(dso))
+ for _, ds := range dso {
+ dsoConnection, err := datastore.NewStorageConnection(ctx, r.Client, ds.DataStore)
+ if err != nil {
+ log.Error(err, "cannot generate the DataStoreOverride connection for the given instance")
+
+ return ctrl.Result{}, err
+ }
+ defer dsoConnection.Close()
+
+ dsoConnections[ds.Resource] = dsoConnection
+ }
+
if markedToBeDeleted && controllerutil.ContainsFinalizer(tenantControlPlane, finalizers.DatastoreFinalizer) {
log.Info("marked for deletion, performing clean-up")
@@ -183,17 +203,19 @@ func (r *TenantControlPlaneReconciler) Reconcile(ctx context.Context, req ctrl.R
}
groupResourceBuilderConfiguration := GroupResourceBuilderConfiguration{
- client: r.Client,
- log: log,
- tcpReconcilerConfig: r.Config,
- tenantControlPlane: *tenantControlPlane,
- Connection: dsConnection,
- DataStore: *ds,
- KamajiNamespace: r.KamajiNamespace,
- KamajiServiceAccount: r.KamajiServiceAccount,
- KamajiService: r.KamajiService,
- KamajiMigrateImage: r.KamajiMigrateImage,
- DiscoveryClient: r.DiscoveryClient,
+ client: r.Client,
+ log: log,
+ tcpReconcilerConfig: r.Config,
+ tenantControlPlane: *tenantControlPlane,
+ Connection: dsConnection,
+ DataStore: *ds,
+ DataStoreOverrides: dso,
+ DataStoreOverriedsConnections: dsoConnections,
+ KamajiNamespace: r.KamajiNamespace,
+ KamajiServiceAccount: r.KamajiServiceAccount,
+ KamajiService: r.KamajiService,
+ KamajiMigrateImage: r.KamajiMigrateImage,
+ DiscoveryClient: r.DiscoveryClient,
}
registeredResources := GetResources(ctx, groupResourceBuilderConfiguration)
@@ -362,3 +384,21 @@ func (r *TenantControlPlaneReconciler) dataStore(ctx context.Context, tenantCont
return &ds, nil
}
+
+func (r *TenantControlPlaneReconciler) dataStoreOverride(ctx context.Context, tenantControlPlane *kamajiv1alpha1.TenantControlPlane) ([]controlplanebuilder.DataStoreOverrides, error) {
+ datastores := make([]controlplanebuilder.DataStoreOverrides, 0, len(tenantControlPlane.Spec.DataStoreOverrides))
+
+ for _, dso := range tenantControlPlane.Spec.DataStoreOverrides {
+ var ds kamajiv1alpha1.DataStore
+ if err := r.Client.Get(ctx, k8stypes.NamespacedName{Name: dso.DataStore}, &ds); err != nil {
+ return nil, errors.Wrap(err, "cannot retrieve *kamajiv1alpha.DataStore object")
+ }
+ if ds.Spec.Driver != kamajiv1alpha1.EtcdDriver {
+ return nil, errors.New("DataStoreOverrides can only use ETCD driver")
+ }
+
+ datastores = append(datastores, controlplanebuilder.DataStoreOverrides{Resource: dso.Resource, DataStore: ds})
+ }
+
+ return datastores, nil
+}
diff --git a/docs/content/guides/datastore-overrides.md b/docs/content/guides/datastore-overrides.md
new file mode 100644
index 0000000..15a77fe
--- /dev/null
+++ b/docs/content/guides/datastore-overrides.md
@@ -0,0 +1,78 @@
+# Datastore Overrides
+
+Kamaji offers the possibility of having multiple ETCD clusters backing different resources of the k8s api server by configuring the [`--etcd-servers-overrides`](https://kubernetes.io/docs/reference/command-line-tools-reference/kube-apiserver/#:~:text=%2D%2Detcd%2Dservers%2Doverrides%20strings) flag. This feature can be useful for massive clusters to store resources with high churn in a dedicated ETCD cluster.
+
+## Install Datastores
+
+Create a self-signed cert-manager `ClusterIssuer`.
+```bash
+echo 'apiVersion: cert-manager.io/v1
+kind: ClusterIssuer
+metadata:
+ name: self-signed
+spec:
+ selfSigned: {}
+' | kubectl apply -f -
+```
+
+Install two Datastores, a primary and a secondary that will be used for `/events` resources.
+```bash
+ helm install etcd-primary clastix/kamaji-etcd -n kamaji-etcd --create-namespace \
+ --set selfSignedCertificates.enabled=false \
+ --set certManager.enabled=true \
+ --set certManager.issuerRef.kind=ClusterIssuer \
+ --set certManager.issuerRef.name=self-signed
+```
+
+For the secondary Datastore, use the cert-manager CA created by the `etcd-primary` helm release.
+```bash
+ helm install etcd-secondary clastix/kamaji-etcd -n kamaji-etcd --create-namespace \
+ --set selfSignedCertificates.enabled=false \
+ --set certManager.enabled=true \
+ --set certManager.ca.create=false \
+ --set certManager.ca.nameOverride=etcd-primary-kamaji-etcd-ca \
+ --set certManager.issuerRef.kind=ClusterIssuer \
+ --set certManager.issuerRef.name=self-signed
+```
+
+## Create a Tenant Control Plane
+
+Using the `spec.dataStoreOverrides` field, Datastores different from the one used in `spec.dataStore` can be used to store specific resources.
+
+```bash
+echo 'apiVersion: kamaji.clastix.io/v1alpha1
+kind: TenantControlPlane
+metadata:
+ name: k8s-133
+ labels:
+ tenant.clastix.io: k8s-133
+spec:
+ controlPlane:
+ deployment:
+ replicas: 2
+ service:
+ serviceType: LoadBalancer
+ kubernetes:
+ version: "v1.33.1"
+ kubelet:
+ cgroupfs: systemd
+ dataStore: etcd-primary-kamaji-etcd
+ dataStoreOverrides:
+ - resource: "/events" # Store events in the secondary ETCD
+ dataStore: etcd-secondary-kamaji-etcd
+ networkProfile:
+ port: 6443
+ addons:
+ coreDNS: {}
+ kubeProxy: {}
+ konnectivity:
+ server:
+ port: 8132
+ agent:
+ mode: DaemonSet
+' | k apply -f -
+```
+
+## Considerations
+
+Only built-in resources can be tagetted by `--etcd-servers-overrides`, it is currently not possible to target Custom Resources.
diff --git a/docs/content/reference/api.md b/docs/content/reference/api.md
index 13bbf51..4e3d61f 100644
--- a/docs/content/reference/api.md
+++ b/docs/content/reference/api.md
@@ -28500,6 +28500,13 @@ Migration from one DataStore to another backed by the same Driver is possible. S
Migration from one DataStore to another backed by a different Driver is not supported.
| Name | +Type | +Description | +Required | +
|---|---|---|---|
| dataStore | +string | +
+ DataStore specifies the DataStore that should be used to store the Kubernetes data for the given Resource. + |
+ false | +
| resource | +string | +
+ Resource specifies which kubernetes resource to target. + |
+ false | +