Compare commits

..

5 Commits

Author SHA1 Message Date
Jeff McCune
cb9b39c3ca (#53) Add Vault as an optional service on the core clusters
This patch migrates the vault component from [holos-infra][1] to a cue
based component.  Vault is optional in the reference platform, so this
patch also defines an `#OptionalServices` struct to conditionally manage
a service across multiple clusters in the platform.

The primary use case for optional services is managing a namespace to
provision and provide secrets across clusters.

[1]: https://github.com/holos-run/holos-infra/tree/v0.5.0/components/core/core/vault
2024-03-12 17:18:38 -07:00
Jeff McCune
0f34b20546 (#54) Disable helm hooks when rendering components
Pods are unnecessarily created when deploying helm based holos
components and often fail.  Prevent these test pods by disabling helm
hooks with the `--no-hooks` flag.

Closes: #54
2024-03-12 14:14:20 -07:00
Jeff McCune
0d7bbbb659 (#48) Disable pg spec.dataSource for standby cluster
Problem:
The standby cluster on k2 fails to start.  A pgbackrest pod first
restores the database from S3, then the pgha nodes try to replay the WAL
as part of the standby initialization process.  This fails because the
PGDATA directory is not empty.

Solution:
Specify the spec.dataSource field only when the cluster is configured as
a primary cluster.

Result:
Non-primary clusters are standby, they skip the pgbackrest job to
restore from S3 and move straight to patroni replaying the WAL from S3
as part of the pgha pods.

One of the two pgha pods becomes the "standby leader" and restores the
WAL from S3.  The other is a cascading standby and then restores the
same WAL from the standby leader.

After 8 minutes both pods are ready.

```
❯ k get pods
NAME                               READY   STATUS    RESTARTS   AGE
zitadel-pgbouncer-d9f8cffc-j469g   2/2     Running   0          11m
zitadel-pgbouncer-d9f8cffc-xq29g   2/2     Running   0          11m
zitadel-pgha1-27w7-0               4/4     Running   0          11m
zitadel-pgha1-c5qj-0               4/4     Running   0          11m
zitadel-repo-host-0                2/2     Running   0          11m
```
2024-03-11 17:56:47 -07:00
Jeff McCune
3f3e36bbe9 (#48) Split workload into foundation and accounts
Problem:
The k3 and k4 clusters are getting the Zitadel components which are
really only intended for the core cluster pair.

Solution:
Split the workload subtree into two, named foundation and accounts.  The
core cluster pair gets foundation+accounts while the kX clusters get
just the foundation subtree.

Result:
prod-zitadel-iam is no longer managed on k3 and k4
2024-03-11 15:20:35 -07:00
Jeff McCune
9f41478d33 (#48) Restore from Monday morning after Gary and Nate registered
Set the restore point to time="2024-03-11T17:08:58Z" level=info
msg="crunchy-pgbackrest ends" which is just after Gary and Nate
registered and were granted the cluster-admin role.
2024-03-11 10:18:45 -07:00
71 changed files with 1711 additions and 75 deletions

View File

@@ -0,0 +1,39 @@
package holos
// Controls optional feature flags for services distributed across multiple holos components.
// For example, enable issuing certificates in the provisioner cluster when an optional service is
// enabled for a workload cluster.
#OptionalService: {
name: string
enabled: true | *false
clusters: [Name=_]: #Platform.clusters[Name]
clusterNames: [for k, v in clusters {k}]
namespaces: [Name=_]: #ManagedNamespace & {
name: Name
}
// servers represents istio Gateway.spec.servers.hosts entries
// Refer to istio/gateway/gateway.cue
servers: [Name=_]: {
hosts: [...string]
port: name: Name
port: number: 443
port: protocol: "HTTPS"
tls: credentialName: string
tls: mode: "SIMPLE"
}
// public tls certs should align to hosts.
certs: [Name=_]: #Certificate & {
metadata: name: Name
}
}
#OptionalServices: {
[Name=_]: #OptionalService & {
name: Name
}
}
for k, v in #OptionalServices {
#ManagedNamespaces: v.namespaces
}

View File

@@ -0,0 +1,54 @@
package holos
let CoreDomain = "core.\(#Platform.org.domain)"
let TargetNamespace = "prod-core-vault"
#OptionalServices: {
vault: {
enabled: true
clusters: core1: _
clusters: core2: _
namespaces: "prod-core-vault": labels: "istio-injection": "enabled"
certs: "vault-core": #Certificate & {
metadata: name: "vault-core"
metadata: namespace: "istio-ingress"
spec: {
commonName: "vault.\(CoreDomain)"
dnsNames: [commonName]
secretName: metadata.name
issuerRef: kind: "ClusterIssuer"
issuerRef: name: string | *"letsencrypt"
}
}
servers: "https-vault-core": {
hosts: ["\(TargetNamespace)/vault.\(CoreDomain)"]
tls: credentialName: certs."vault-core".spec.secretName
}
for k, v in clusters {
let obj = (Cert & {Name: "vault-core", Cluster: v.name}).APIObject
certs: "\(obj.metadata.name)": obj
servers: "https-\(obj.metadata.name)": {
hosts: [for host in obj.spec.dnsNames {"\(TargetNamespace)/\(host)"}]
tls: credentialName: obj.spec.secretName
}
}
}
}
// Cert provisions a cluster specific certificate.
let Cert = {
Name: string
Cluster: string
APIObject: #Certificate & {
metadata: name: "\(Cluster)-\(Name)"
metadata: namespace: string | *"istio-ingress"
spec: {
commonName: string | *"vault.\(Cluster).\(CoreDomain)"
dnsNames: [commonName]
secretName: metadata.name
issuerRef: kind: "ClusterIssuer"
issuerRef: name: string | *"letsencrypt"
}
}
}

View File

@@ -8,12 +8,17 @@ let S3Secret = "pgo-s3-creds"
let ZitadelUser = _DBName
let ZitadelAdmin = "\(_DBName)-admin"
// This must be an external storage bucket for our architecture.
let BucketRepoName = "repo2"
// Restore options. Set the timestamp to a known good point in time.
// time="2024-03-11T17:08:58Z" level=info msg="crunchy-pgbackrest ends"
let RestoreOptions = ["--type=time", "--target=\"2024-03-11 17:10:00+00\""]
#KubernetesObjects & {
apiObjects: {
ExternalSecret: "pgo-s3-creds": _
PostgresCluster: db: #PostgresCluster & HighlyAvailable & {
// This must be an external storage bucket for our architecture.
let BucketRepoName = spec.backups.pgbackrest.manual.repoName
metadata: name: _DBName
metadata: namespace: #TargetNamespace
spec: {
@@ -47,36 +52,37 @@ let ZitadelAdmin = "\(_DBName)-admin"
enabled: true
}
}
// Restore from a backup
dataSource: pgbackrest: {
stanza: "db"
configuration: [{secret: name: S3Secret}]
// Restore from known good full backup taken in https://github.com/holos-run/holos/issues/48#issuecomment-1987375044
options: ["--type=time", "--target=\"2024-03-10 21:56:00+00\""]
global: {
"\(BucketRepoName)-path": "/pgbackrest/\(#TargetNamespace)/\(metadata.name)/\(BucketRepoName)"
"\(BucketRepoName)-cipher-type": "aes-256-cbc"
}
repo: {
name: BucketRepoName
s3: {
bucket: string | *"\(#Platform.org.name)-zitadel-backups"
region: string | *#Backups.s3.region
endpoint: string | *"s3.dualstack.\(region).amazonaws.com"
// Restore from backup if and only if the cluster is primary
if Cluster.primary {
dataSource: pgbackrest: {
stanza: "db"
configuration: backups.pgbackrest.configuration
// Restore from known good full backup taken
options: RestoreOptions
global: {
"\(BucketRepoName)-path": "/pgbackrest/\(#TargetNamespace)/\(metadata.name)/\(BucketRepoName)"
"\(BucketRepoName)-cipher-type": "aes-256-cbc"
}
repo: {
name: BucketRepoName
s3: backups.pgbackrest.repos[1].s3
}
}
}
// Refer to https://access.crunchydata.com/documentation/postgres-operator/latest/tutorials/backups-disaster-recovery/backups
backups: pgbackrest: {
configuration: dataSource.pgbackrest.configuration
configuration: [{secret: name: S3Secret}]
// Defines details for manual pgBackRest backup Jobs
manual: {
// Note: the repoName value must match the config keys in the S3Secret.
// This must be an external repository for backup / restore / regional failovers.
repoName: "repo2"
repoName: BucketRepoName
options: ["--type=full", ...]
}
// Defines details for performing an in-place restore using pgBackRest
restore: {
// Enables triggering a restore by annotating the postgrescluster with postgres-operator.crunchydata.com/pgbackrest-restore="$(date)"
enabled: true
repoName: BucketRepoName
}
@@ -105,7 +111,11 @@ let ZitadelAdmin = "\(_DBName)-admin"
// Full backup weekly on Sunday at 1am, differntial daily at 1am every day except Sunday.
schedules: full: string | *"0 1 * * 0"
schedules: differential: string | *"0 1 * * 1-6"
s3: dataSource.pgbackrest.repo.s3
s3: {
bucket: string | *"\(#Platform.org.name)-zitadel-backups"
region: string | *#Backups.s3.region
endpoint: string | *"s3.dualstack.\(region).amazonaws.com"
}
},
]
}

View File

@@ -10,7 +10,8 @@ let Name = "zitadel"
#Kustomization: spec: targetNamespace: #TargetNamespace
#HelmChart & {
namespace: #TargetNamespace
namespace: #TargetNamespace
enableHooks: true
chart: {
name: Name
version: "7.9.0"

View File

@@ -0,0 +1,22 @@
package holos
#TargetNamespace: "default"
#InputKeys: {
project: "secrets"
component: "namespaces"
}
#KubernetesObjects & {
apiObjects: {
// #ManagedNamespaces is the set of all namespaces across all clusters in the platform.
for k, ns in #ManagedNamespaces {
Namespace: "\(ns.name)": #Namespace & {metadata: ns}
}
// #PlatformNamespaces is deprecated in favor of #ManagedNamespaces.
for ns in #PlatformNamespaces {
Namespace: "\(ns.name)": #Namespace & {metadata: ns}
}
}
}

View File

@@ -1,7 +1,8 @@
package holos
// The primary istio Gateway, named default
import "list"
// The primary istio Gateway, named default
let Name = "gateway"
#InputKeys: component: Name
@@ -31,5 +32,21 @@ let LoginCert = #PlatformCerts.login
},
]
}
for k, svc in #OptionalServices {
if svc.enabled {
if list.Contains(svc.clusterNames, #ClusterName) {
Gateway: "\(svc.name)": #Gateway & {
metadata: name: svc.name
metadata: namespace: #TargetNamespace
spec: selector: istio: "ingressgateway"
spec: servers: [for s in svc.servers {s}]
}
for k, s in svc.servers {
ExternalSecret: "\(s.tls.credentialName)": _
}
}
}
}
}
}

View File

@@ -30,5 +30,10 @@ package holos
"\(Kind)": "\(NS)/\(Name)": obj
}
}
for k, ns in #ManagedNamespaces {
let obj = #SecretStore & {_namespace: ns.name}
SecretStore: "\(ns.name)/\(obj.metadata.name)": obj
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,146 @@
package holos
#Values: {
// Vault Helm Chart Holos Values
global: {
enabled: true
// Istio handles this
tlsDisable: true
}
injector: enabled: false
server: {
image: {
// repository: "hashicorp/vault"
repository: "quay.io/holos/hashicorp/vault"
tag: "1.14.10"
// Overrides the default Image Pull Policy
pullPolicy: "IfNotPresent"
}
extraLabels: "sidecar.istio.io/inject": "true"
resources: requests: {
memory: "256Mi"
cpu: "2000m"
}
// limits:
// memory: 1024Mi
// cpu: 2000m
// For HA configuration and because we need to manually init the vault,
// we need to define custom readiness/liveness Probe settings
readinessProbe: {
enabled: true
path: "/v1/sys/health?standbyok=true&sealedcode=204&uninitcode=204"
}
livenessProbe: {
enabled: true
path: "/v1/sys/health?standbyok=true"
initialDelaySeconds: 60
}
// extraEnvironmentVars is a list of extra environment variables to set with
// the stateful set. These could be used to include variables required for
// auto-unseal.
// Vault validates an incomplete chain:
// https://github.com/hashicorp/vault/issues/11318
extraEnvironmentVars: {
GOMAXPROCS: "2"
} // Set to cpu limit, see https://github.com/uber-go/automaxprocs
// extraVolumes is a list of extra volumes to mount. These will be exposed
// to Vault in the path `/vault/userconfig/<name>/`.
extraVolumes: [{
type: "secret"
name: "gcpkms-creds"
}]
// This configures the Vault Statefulset to create a PVC for audit logs.
// See https://www.vaultproject.io/docs/audit/index.html to know more
auditStorage: {
enabled: true
mountPath: "/var/log/vault"
} // for compatibility with plain debian vm location.
standalone: {
enabled: false
}
ha: {
enabled: true
replicas: 3
raft: {
enabled: true
setNodeId: true
config: """
ui = true
listener \"tcp\" {
address = \"[::]:8200\"
cluster_address = \"[::]:8201\"
# mTLS is handled by the the istio sidecar
tls_disable = \"true\"
# Enable unauthenticated metrics access (necessary for Prometheus Operator)
telemetry {
unauthenticated_metrics_access = true
}
}
telemetry {
prometheus_retention_time = \"30s\"
disable_hostname = true
}
seal \"gcpckms\" {
credentials = \"/vault/userconfig/gcpkms-creds/credentials.json\"
project = \"v6-vault-f15f\"
region = \"us-west1\"
key_ring = \"vault-core\"
crypto_key = \"vault-core-unseal\"
}
# Note; the retry_join leader_api_address values come from the Stable
# Network ID feature of a Statefulset. See:
# https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#stable-network-id
storage \"raft\" {
path = \"/vault/data\"
retry_join {
leader_api_addr = \"http://vault-0.vault-internal:8200\"
leader_tls_servername = \"vault\"
}
retry_join {
leader_api_addr = \"http://vault-1.vault-internal:8200\"
leader_tls_servername = \"vault\"
}
retry_join {
leader_api_addr = \"http://vault-2.vault-internal:8200\"
leader_tls_servername = \"vault\"
}
autopilot {
cleanup_dead_servers = \"true\"
last_contact_threshold = \"200ms\"
last_contact_failure_threshold = \"10m\"
max_trailing_logs = 250000
min_quorum = 3
server_stabilization_time = \"10s\"
}
}
service_registration \"kubernetes\" {}
"""
// Vault UI (Will be exposed via the service mesh)
} // Vault UI (Will be exposed via the service mesh)
} // Vault UI (Will be exposed via the service mesh)
} // Vault UI (Will be exposed via the service mesh)// Vault UI (Will be exposed via the service mesh)
ui: {
enabled: true
serviceType: "ClusterIP"
serviceNodePort: null
externalPort: 8200
}
}

View File

@@ -0,0 +1,77 @@
package holos
import "encoding/yaml"
import "list"
let Name = "vault"
#InputKeys: component: Name
#InputKeys: project: "core"
#TargetNamespace: "\(#InstancePrefix)-\(Name)"
let Vault = #OptionalServices[Name]
if Vault.enabled {
if list.Contains(Vault.clusterNames, #ClusterName) {
#HelmChart & {
namespace: #TargetNamespace
chart: {
name: Name
version: "0.25.0"
repository: {
name: "hashicorp"
url: "https://helm.releases.hashicorp.com"
}
}
values: #Values
apiObjects: {
ExternalSecret: "gcpkms-creds": _
ExternalSecret: "vault-server-cert": _
VirtualService: "\(Name)": {
metadata: name: Name
metadata: namespace: #TargetNamespace
spec: hosts: [for cert in Vault.certs {cert.spec.commonName}]
spec: gateways: ["istio-ingress/\(Name)"]
spec: http: [
{
route: [
{
destination: host: "\(Name)-active"
destination: port: number: 8200
},
]
},
]
}
}
}
#Kustomize: {
patches: [
{
target: {
group: "apps"
version: "v1"
kind: "StatefulSet"
name: Name
}
patch: yaml.Marshal(EnvPatch)
},
]
}
let EnvPatch = [
{
op: "test"
path: "/spec/template/spec/containers/0/env/4/name"
value: "VAULT_ADDR"
},
{
op: "replace"
path: "/spec/template/spec/containers/0/env/4/value"
value: "http://$(VAULT_K8S_POD_NAME):8200"
},
]
}
}

View File

@@ -0,0 +1,13 @@
package holos
let Vault = #OptionalServices.vault
if Vault.enabled {
#KubernetesObjects & {
apiObjects: {
for k, obj in Vault.certs {
"\(obj.kind)": "\(obj.metadata.name)": obj
}
}
}
}

View File

@@ -24,6 +24,14 @@ ksObjects: []
"\(Kind)": "\(ns.name)/\(Name)": obj
}
}
for k, ns in #ManagedNamespaces {
for obj in (#PlatformNamespaceObjects & {_ns: ns}).objects {
let Kind = obj.kind
let Name = obj.metadata.name
"\(Kind)": "\(ns.name)/\(Name)": obj
}
}
}
}

View File

@@ -7,25 +7,16 @@ package holos
component: "namespaces"
}
// #PlatformNamespaceObjects defines the api objects necessary for eso SecretStores in external clusters to access secrets in a given namespace in the provisioner cluster.
#PlatformNamespaceObjects: {
_ns: #PlatformNamespace
objects: [
#Namespace & {
metadata: name: _ns.name
},
]
}
#KubernetesObjects & {
apiObjects: {
// #ManagedNamespaces is the set of all namespaces across all clusters in the platform.
for k, ns in #ManagedNamespaces {
Namespace: "\(ns.name)": #Namespace & {metadata: ns}
}
// #PlatformNamespaces is deprecated in favor of #ManagedNamespaces.
for ns in #PlatformNamespaces {
for obj in (#PlatformNamespaceObjects & {_ns: ns}).objects {
let Kind = obj.kind
let Name = obj.metadata.name
"\(Kind)": "\(Name)": obj
}
Namespace: "\(ns.name)": #Namespace & {metadata: ns}
}
}
}

View File

@@ -1,32 +0,0 @@
package holos
#TargetNamespace: "default"
#InputKeys: {
project: "secrets"
component: "namespaces"
}
// #PlatformNamespaceObjects defines the api objects necessary for eso SecretStores in external clusters to access secrets in a given namespace in the provisioner cluster.
#PlatformNamespaceObjects: {
_ns: #PlatformNamespace
objects: [
#Namespace & {
metadata: _ns
},
]
}
#KubernetesObjects & {
apiObjects: {
for ns in #PlatformNamespaces {
for obj in (#PlatformNamespaceObjects & {_ns: ns}).objects {
let Kind = obj.kind
let NS = ns.name
let Name = obj.metadata.name
"\(Kind)": "\(NS)/\(Name)": obj
}
}
}
}

View File

@@ -12,7 +12,7 @@ let Privileged = {
// #PlatformNamespaces is the union of all namespaces across all cluster types. Namespaces are created in all clusters regardless of if they're
// used within the cluster or not. The is important for security and consistency with IAM, RBAC, and Secrets sync between clusters.
// Holos adopts the namespace sameness position of SIG Multicluster, refer to https://github.com/kubernetes/community/blob/dd4c8b704ef1c9c3bfd928c6fa9234276d61ad18/sig-multicluster/namespace-sameness-position-statement.md
// TODO: Deprecate in favor of #ManagedNamespaces because it better to add fields to an object instead adding items to a list.
#PlatformNamespaces: [
{name: "external-secrets"},
{name: "holos-system"},

View File

@@ -285,6 +285,21 @@ _apiVersion: "holos.run/v1alpha1"
}
}
// ManagedNamespace is a namespace to manage across all clusters in the holos platform.
#ManagedNamespace: {
// TODO metadata labels and annotations
name: string
labels: [string]: string
}
// #ManagedNamepsaces is the union of all namespaces across all cluster types and optional services.
// Holos adopts the namespace sameness position of SIG Multicluster, refer to https://github.com/kubernetes/community/blob/dd4c8b704ef1c9c3bfd928c6fa9234276d61ad18/sig-multicluster/namespace-sameness-position-statement.md
#ManagedNamespaces: {
[Name=_]: {
name: Name
}
}
// #Backups defines backup configuration.
// TODO: Consider the best place for this, possibly as part of the site platform config. This represents the primary location for backups.
#Backups: {
@@ -393,6 +408,8 @@ _apiVersion: "holos.run/v1alpha1"
resourcesFile: ResourcesFile
// kustomizeFiles represents the files in a kustomize directory tree.
kustomizeFiles: #KustomizeFiles.Files
// enableHooks removes the --no-hooks flag from helm template
enableHooks: true | *false
}
// #KustomizeBuild is a holos component that uses plain yaml files as the source of api objects for a holos component.

View File

@@ -121,6 +121,7 @@ type HelmChart struct {
Namespace string `json:"namespace"`
Chart Chart `json:"chart"`
ValuesContent string `json:"valuesContent"`
EnableHooks bool `json:"enableHooks"`
// APIObjectMap holds the marshalled representation of api objects.
APIObjectMap apiObjectMap `json:"APIObjectMap"`
}
@@ -445,7 +446,12 @@ func runHelm(ctx context.Context, hc *HelmChart, r *Result, path holos.PathCompo
// Run charts
chart := hc.Chart
helmOut, err := util.RunCmd(ctx, "helm", "template", "--include-crds", "--values", valuesPath, "--namespace", hc.Namespace, "--kubeconfig", "/dev/null", "--version", chart.Version, chart.Release, cachedChartPath)
args := []string{"template"}
if !hc.EnableHooks {
args = append(args, "--no-hooks")
}
args = append(args, "--include-crds", "--values", valuesPath, "--namespace", hc.Namespace, "--kubeconfig", "/dev/null", "--version", chart.Version, chart.Release, cachedChartPath)
helmOut, err := util.RunCmd(ctx, "helm", args...)
if err != nil {
stderr := helmOut.Stderr.String()
lines := strings.Split(stderr, "\n")

View File

@@ -1 +1 @@
55
57

View File

@@ -1 +1 @@
4
0