mirror of
https://github.com/holos-run/holos.git
synced 2026-03-19 08:44:58 +00:00
Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d64c3e8c66 | ||
|
|
f344f97374 | ||
|
|
770088b912 | ||
|
|
cb9b39c3ca | ||
|
|
0f34b20546 |
39
docs/examples/optionalservices.cue
Normal file
39
docs/examples/optionalservices.cue
Normal file
@@ -0,0 +1,39 @@
|
||||
package holos
|
||||
|
||||
// Controls optional feature flags for services distributed across multiple holos components.
|
||||
// For example, enable issuing certificates in the provisioner cluster when an optional service is
|
||||
// enabled for a workload cluster.
|
||||
|
||||
#OptionalService: {
|
||||
name: string
|
||||
enabled: true | *false
|
||||
clusters: [Name=_]: #Platform.clusters[Name]
|
||||
clusterNames: [for k, v in clusters {k}]
|
||||
namespaces: [Name=_]: #ManagedNamespace & {
|
||||
name: Name
|
||||
}
|
||||
// servers represents istio Gateway.spec.servers.hosts entries
|
||||
// Refer to istio/gateway/gateway.cue
|
||||
servers: [Name=_]: {
|
||||
hosts: [...string]
|
||||
port: name: Name
|
||||
port: number: 443
|
||||
port: protocol: "HTTPS"
|
||||
tls: credentialName: string
|
||||
tls: mode: "SIMPLE"
|
||||
}
|
||||
// public tls certs should align to hosts.
|
||||
certs: [Name=_]: #Certificate & {
|
||||
metadata: name: Name
|
||||
}
|
||||
}
|
||||
|
||||
#OptionalServices: {
|
||||
[Name=_]: #OptionalService & {
|
||||
name: Name
|
||||
}
|
||||
}
|
||||
|
||||
for k, v in #OptionalServices {
|
||||
#ManagedNamespaces: v.namespaces
|
||||
}
|
||||
54
docs/examples/platforms/optional.site.cue
Normal file
54
docs/examples/platforms/optional.site.cue
Normal file
@@ -0,0 +1,54 @@
|
||||
package holos
|
||||
|
||||
let CoreDomain = "core.\(#Platform.org.domain)"
|
||||
let TargetNamespace = "prod-core-vault"
|
||||
|
||||
#OptionalServices: {
|
||||
vault: {
|
||||
enabled: true
|
||||
clusters: core1: _
|
||||
clusters: core2: _
|
||||
namespaces: "prod-core-vault": labels: "istio-injection": "enabled"
|
||||
certs: "vault-core": #Certificate & {
|
||||
metadata: name: "vault-core"
|
||||
metadata: namespace: "istio-ingress"
|
||||
spec: {
|
||||
commonName: "vault.\(CoreDomain)"
|
||||
dnsNames: [commonName]
|
||||
secretName: metadata.name
|
||||
issuerRef: kind: "ClusterIssuer"
|
||||
issuerRef: name: string | *"letsencrypt"
|
||||
}
|
||||
}
|
||||
servers: "https-vault-core": {
|
||||
hosts: ["\(TargetNamespace)/vault.\(CoreDomain)"]
|
||||
tls: credentialName: certs."vault-core".spec.secretName
|
||||
}
|
||||
for k, v in clusters {
|
||||
let obj = (Cert & {Name: "vault-core", Cluster: v.name}).APIObject
|
||||
certs: "\(obj.metadata.name)": obj
|
||||
servers: "https-\(obj.metadata.name)": {
|
||||
hosts: [for host in obj.spec.dnsNames {"\(TargetNamespace)/\(host)"}]
|
||||
tls: credentialName: obj.spec.secretName
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Cert provisions a cluster specific certificate.
|
||||
let Cert = {
|
||||
Name: string
|
||||
Cluster: string
|
||||
|
||||
APIObject: #Certificate & {
|
||||
metadata: name: "\(Cluster)-\(Name)"
|
||||
metadata: namespace: string | *"istio-ingress"
|
||||
spec: {
|
||||
commonName: string | *"vault.\(Cluster).\(CoreDomain)"
|
||||
dnsNames: [commonName]
|
||||
secretName: metadata.name
|
||||
issuerRef: kind: "ClusterIssuer"
|
||||
issuerRef: name: string | *"letsencrypt"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -13,7 +13,10 @@ let BucketRepoName = "repo2"
|
||||
|
||||
// Restore options. Set the timestamp to a known good point in time.
|
||||
// time="2024-03-11T17:08:58Z" level=info msg="crunchy-pgbackrest ends"
|
||||
let RestoreOptions = ["--type=time", "--target=\"2024-03-11 17:10:00+00\""]
|
||||
// let RestoreOptions = ["--type=time", "--target=\"2024-03-11 17:10:00+00\""]
|
||||
|
||||
// Restore the most recent backup.
|
||||
let RestoreOptions = []
|
||||
|
||||
#KubernetesObjects & {
|
||||
apiObjects: {
|
||||
@@ -40,7 +43,7 @@ let RestoreOptions = ["--type=time", "--target=\"2024-03-11 17:10:00+00\""]
|
||||
replicas: 2
|
||||
dataVolumeClaimSpec: {
|
||||
accessModes: ["ReadWriteOnce"]
|
||||
resources: requests: storage: string | *"1Gi"
|
||||
resources: requests: storage: "10Gi"
|
||||
}
|
||||
}]
|
||||
standby: {
|
||||
@@ -103,7 +106,7 @@ let RestoreOptions = ["--type=time", "--target=\"2024-03-11 17:10:00+00\""]
|
||||
name: "repo1"
|
||||
volume: volumeClaimSpec: {
|
||||
accessModes: ["ReadWriteOnce"]
|
||||
resources: requests: storage: string | *"1Gi"
|
||||
resources: requests: storage: string | *"4Gi"
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -137,7 +140,7 @@ let HighlyAvailable = {
|
||||
replicas: 2
|
||||
dataVolumeClaimSpec: {
|
||||
accessModes: ["ReadWriteOnce"]
|
||||
resources: requests: storage: "1Gi"
|
||||
resources: requests: storage: string | *"10Gi"
|
||||
}
|
||||
affinity: podAntiAffinity: preferredDuringSchedulingIgnoredDuringExecution: [{
|
||||
weight: 1
|
||||
|
||||
@@ -10,7 +10,8 @@ let Name = "zitadel"
|
||||
#Kustomization: spec: targetNamespace: #TargetNamespace
|
||||
|
||||
#HelmChart & {
|
||||
namespace: #TargetNamespace
|
||||
namespace: #TargetNamespace
|
||||
enableHooks: true
|
||||
chart: {
|
||||
name: Name
|
||||
version: "7.9.0"
|
||||
|
||||
@@ -7,26 +7,16 @@ package holos
|
||||
component: "namespaces"
|
||||
}
|
||||
|
||||
// #PlatformNamespaceObjects defines the api objects necessary for eso SecretStores in external clusters to access secrets in a given namespace in the provisioner cluster.
|
||||
#PlatformNamespaceObjects: {
|
||||
_ns: #PlatformNamespace
|
||||
|
||||
objects: [
|
||||
#Namespace & {
|
||||
metadata: _ns
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
#KubernetesObjects & {
|
||||
apiObjects: {
|
||||
// #ManagedNamespaces is the set of all namespaces across all clusters in the platform.
|
||||
for k, ns in #ManagedNamespaces {
|
||||
Namespace: "\(ns.name)": #Namespace & {metadata: ns}
|
||||
}
|
||||
|
||||
// #PlatformNamespaces is deprecated in favor of #ManagedNamespaces.
|
||||
for ns in #PlatformNamespaces {
|
||||
for obj in (#PlatformNamespaceObjects & {_ns: ns}).objects {
|
||||
let Kind = obj.kind
|
||||
let NS = ns.name
|
||||
let Name = obj.metadata.name
|
||||
"\(Kind)": "\(NS)/\(Name)": obj
|
||||
}
|
||||
Namespace: "\(ns.name)": #Namespace & {metadata: ns}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
package holos
|
||||
|
||||
// The primary istio Gateway, named default
|
||||
import "list"
|
||||
|
||||
// The primary istio Gateway, named default
|
||||
let Name = "gateway"
|
||||
|
||||
#InputKeys: component: Name
|
||||
@@ -31,5 +32,19 @@ let LoginCert = #PlatformCerts.login
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
for k, svc in #OptionalServices {
|
||||
if svc.enabled && list.Contains(svc.clusterNames, #ClusterName) {
|
||||
Gateway: "\(svc.name)": #Gateway & {
|
||||
metadata: name: svc.name
|
||||
metadata: namespace: #TargetNamespace
|
||||
spec: selector: istio: "ingressgateway"
|
||||
spec: servers: [for s in svc.servers {s}]
|
||||
}
|
||||
for k, s in svc.servers {
|
||||
ExternalSecret: "\(s.tls.credentialName)": _
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -30,5 +30,10 @@ package holos
|
||||
"\(Kind)": "\(NS)/\(Name)": obj
|
||||
}
|
||||
}
|
||||
|
||||
for k, ns in #ManagedNamespaces {
|
||||
let obj = #SecretStore & {_namespace: ns.name}
|
||||
SecretStore: "\(ns.name)/\(obj.metadata.name)": obj
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
1262
docs/examples/platforms/reference/clusters/optional/vault/values.cue
Normal file
1262
docs/examples/platforms/reference/clusters/optional/vault/values.cue
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,146 @@
|
||||
package holos
|
||||
|
||||
#Values: {
|
||||
|
||||
// Vault Helm Chart Holos Values
|
||||
global: {
|
||||
enabled: true
|
||||
// Istio handles this
|
||||
tlsDisable: true
|
||||
}
|
||||
|
||||
injector: enabled: false
|
||||
|
||||
server: {
|
||||
image: {
|
||||
// repository: "hashicorp/vault"
|
||||
repository: "quay.io/holos/hashicorp/vault"
|
||||
tag: "1.14.10"
|
||||
// Overrides the default Image Pull Policy
|
||||
pullPolicy: "IfNotPresent"
|
||||
}
|
||||
|
||||
extraLabels: "sidecar.istio.io/inject": "true"
|
||||
|
||||
resources: requests: {
|
||||
memory: "256Mi"
|
||||
cpu: "2000m"
|
||||
}
|
||||
// limits:
|
||||
// memory: 1024Mi
|
||||
// cpu: 2000m
|
||||
// For HA configuration and because we need to manually init the vault,
|
||||
// we need to define custom readiness/liveness Probe settings
|
||||
readinessProbe: {
|
||||
enabled: true
|
||||
path: "/v1/sys/health?standbyok=true&sealedcode=204&uninitcode=204"
|
||||
}
|
||||
livenessProbe: {
|
||||
enabled: true
|
||||
path: "/v1/sys/health?standbyok=true"
|
||||
initialDelaySeconds: 60
|
||||
}
|
||||
|
||||
// extraEnvironmentVars is a list of extra environment variables to set with
|
||||
// the stateful set. These could be used to include variables required for
|
||||
// auto-unseal.
|
||||
// Vault validates an incomplete chain:
|
||||
// https://github.com/hashicorp/vault/issues/11318
|
||||
extraEnvironmentVars: {
|
||||
GOMAXPROCS: "2"
|
||||
} // Set to cpu limit, see https://github.com/uber-go/automaxprocs
|
||||
// extraVolumes is a list of extra volumes to mount. These will be exposed
|
||||
// to Vault in the path `/vault/userconfig/<name>/`.
|
||||
extraVolumes: [{
|
||||
type: "secret"
|
||||
name: "gcpkms-creds"
|
||||
}]
|
||||
|
||||
// This configures the Vault Statefulset to create a PVC for audit logs.
|
||||
// See https://www.vaultproject.io/docs/audit/index.html to know more
|
||||
auditStorage: {
|
||||
enabled: true
|
||||
mountPath: "/var/log/vault"
|
||||
} // for compatibility with plain debian vm location.
|
||||
|
||||
standalone: {
|
||||
enabled: false
|
||||
}
|
||||
|
||||
ha: {
|
||||
enabled: true
|
||||
replicas: 3
|
||||
raft: {
|
||||
enabled: true
|
||||
setNodeId: true
|
||||
|
||||
config: """
|
||||
ui = true
|
||||
listener \"tcp\" {
|
||||
address = \"[::]:8200\"
|
||||
cluster_address = \"[::]:8201\"
|
||||
# mTLS is handled by the the istio sidecar
|
||||
tls_disable = \"true\"
|
||||
# Enable unauthenticated metrics access (necessary for Prometheus Operator)
|
||||
telemetry {
|
||||
unauthenticated_metrics_access = true
|
||||
}
|
||||
}
|
||||
|
||||
telemetry {
|
||||
prometheus_retention_time = \"30s\"
|
||||
disable_hostname = true
|
||||
}
|
||||
|
||||
seal \"gcpckms\" {
|
||||
credentials = \"/vault/userconfig/gcpkms-creds/credentials.json\"
|
||||
project = \"v6-vault-f15f\"
|
||||
region = \"us-west1\"
|
||||
key_ring = \"vault-core\"
|
||||
crypto_key = \"vault-core-unseal\"
|
||||
}
|
||||
|
||||
# Note; the retry_join leader_api_address values come from the Stable
|
||||
# Network ID feature of a Statefulset. See:
|
||||
# https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#stable-network-id
|
||||
storage \"raft\" {
|
||||
path = \"/vault/data\"
|
||||
retry_join {
|
||||
leader_api_addr = \"http://vault-0.vault-internal:8200\"
|
||||
leader_tls_servername = \"vault\"
|
||||
}
|
||||
retry_join {
|
||||
leader_api_addr = \"http://vault-1.vault-internal:8200\"
|
||||
leader_tls_servername = \"vault\"
|
||||
}
|
||||
retry_join {
|
||||
leader_api_addr = \"http://vault-2.vault-internal:8200\"
|
||||
leader_tls_servername = \"vault\"
|
||||
}
|
||||
|
||||
autopilot {
|
||||
cleanup_dead_servers = \"true\"
|
||||
last_contact_threshold = \"200ms\"
|
||||
last_contact_failure_threshold = \"10m\"
|
||||
max_trailing_logs = 250000
|
||||
min_quorum = 3
|
||||
server_stabilization_time = \"10s\"
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
service_registration \"kubernetes\" {}
|
||||
|
||||
"""
|
||||
|
||||
// Vault UI (Will be exposed via the service mesh)
|
||||
} // Vault UI (Will be exposed via the service mesh)
|
||||
} // Vault UI (Will be exposed via the service mesh)
|
||||
} // Vault UI (Will be exposed via the service mesh)// Vault UI (Will be exposed via the service mesh)
|
||||
ui: {
|
||||
enabled: true
|
||||
serviceType: "ClusterIP"
|
||||
serviceNodePort: null
|
||||
externalPort: 8200
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,75 @@
|
||||
package holos
|
||||
|
||||
import "encoding/yaml"
|
||||
|
||||
import "list"
|
||||
|
||||
let Name = "vault"
|
||||
#InputKeys: component: Name
|
||||
#InputKeys: project: "core"
|
||||
#TargetNamespace: "\(#InstancePrefix)-\(Name)"
|
||||
|
||||
let Vault = #OptionalServices[Name]
|
||||
|
||||
if Vault.enabled && list.Contains(Vault.clusterNames, #ClusterName) {
|
||||
#HelmChart & {
|
||||
namespace: #TargetNamespace
|
||||
chart: {
|
||||
name: Name
|
||||
version: "0.25.0"
|
||||
repository: {
|
||||
name: "hashicorp"
|
||||
url: "https://helm.releases.hashicorp.com"
|
||||
}
|
||||
}
|
||||
values: #Values
|
||||
|
||||
apiObjects: {
|
||||
ExternalSecret: "gcpkms-creds": _
|
||||
ExternalSecret: "vault-server-cert": _
|
||||
VirtualService: "\(Name)": {
|
||||
metadata: name: Name
|
||||
metadata: namespace: #TargetNamespace
|
||||
spec: hosts: [for cert in Vault.certs {cert.spec.commonName}]
|
||||
spec: gateways: ["istio-ingress/\(Name)"]
|
||||
spec: http: [
|
||||
{
|
||||
route: [
|
||||
{
|
||||
destination: host: "\(Name)-active"
|
||||
destination: port: number: 8200
|
||||
},
|
||||
]
|
||||
},
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#Kustomize: {
|
||||
patches: [
|
||||
{
|
||||
target: {
|
||||
group: "apps"
|
||||
version: "v1"
|
||||
kind: "StatefulSet"
|
||||
name: Name
|
||||
}
|
||||
patch: yaml.Marshal(EnvPatch)
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
let EnvPatch = [
|
||||
{
|
||||
op: "test"
|
||||
path: "/spec/template/spec/containers/0/env/4/name"
|
||||
value: "VAULT_ADDR"
|
||||
},
|
||||
{
|
||||
op: "replace"
|
||||
path: "/spec/template/spec/containers/0/env/4/value"
|
||||
value: "http://$(VAULT_K8S_POD_NAME):8200"
|
||||
},
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
package holos
|
||||
|
||||
let Vault = #OptionalServices.vault
|
||||
|
||||
if Vault.enabled {
|
||||
#KubernetesObjects & {
|
||||
apiObjects: {
|
||||
for k, obj in Vault.certs {
|
||||
"\(obj.kind)": "\(obj.metadata.name)": obj
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -24,6 +24,14 @@ ksObjects: []
|
||||
"\(Kind)": "\(ns.name)/\(Name)": obj
|
||||
}
|
||||
}
|
||||
|
||||
for k, ns in #ManagedNamespaces {
|
||||
for obj in (#PlatformNamespaceObjects & {_ns: ns}).objects {
|
||||
let Kind = obj.kind
|
||||
let Name = obj.metadata.name
|
||||
"\(Kind)": "\(ns.name)/\(Name)": obj
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -7,25 +7,16 @@ package holos
|
||||
component: "namespaces"
|
||||
}
|
||||
|
||||
// #PlatformNamespaceObjects defines the api objects necessary for eso SecretStores in external clusters to access secrets in a given namespace in the provisioner cluster.
|
||||
#PlatformNamespaceObjects: {
|
||||
_ns: #PlatformNamespace
|
||||
|
||||
objects: [
|
||||
#Namespace & {
|
||||
metadata: name: _ns.name
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
#KubernetesObjects & {
|
||||
apiObjects: {
|
||||
// #ManagedNamespaces is the set of all namespaces across all clusters in the platform.
|
||||
for k, ns in #ManagedNamespaces {
|
||||
Namespace: "\(ns.name)": #Namespace & {metadata: ns}
|
||||
}
|
||||
|
||||
// #PlatformNamespaces is deprecated in favor of #ManagedNamespaces.
|
||||
for ns in #PlatformNamespaces {
|
||||
for obj in (#PlatformNamespaceObjects & {_ns: ns}).objects {
|
||||
let Kind = obj.kind
|
||||
let Name = obj.metadata.name
|
||||
"\(Kind)": "\(Name)": obj
|
||||
}
|
||||
Namespace: "\(ns.name)": #Namespace & {metadata: ns}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@ let Privileged = {
|
||||
|
||||
// #PlatformNamespaces is the union of all namespaces across all cluster types. Namespaces are created in all clusters regardless of if they're
|
||||
// used within the cluster or not. The is important for security and consistency with IAM, RBAC, and Secrets sync between clusters.
|
||||
// Holos adopts the namespace sameness position of SIG Multicluster, refer to https://github.com/kubernetes/community/blob/dd4c8b704ef1c9c3bfd928c6fa9234276d61ad18/sig-multicluster/namespace-sameness-position-statement.md
|
||||
// TODO: Deprecate in favor of #ManagedNamespaces because it better to add fields to an object instead adding items to a list.
|
||||
#PlatformNamespaces: [
|
||||
{name: "external-secrets"},
|
||||
{name: "holos-system"},
|
||||
|
||||
@@ -285,6 +285,21 @@ _apiVersion: "holos.run/v1alpha1"
|
||||
}
|
||||
}
|
||||
|
||||
// ManagedNamespace is a namespace to manage across all clusters in the holos platform.
|
||||
#ManagedNamespace: {
|
||||
// TODO metadata labels and annotations
|
||||
name: string
|
||||
labels: [string]: string
|
||||
}
|
||||
|
||||
// #ManagedNamepsaces is the union of all namespaces across all cluster types and optional services.
|
||||
// Holos adopts the namespace sameness position of SIG Multicluster, refer to https://github.com/kubernetes/community/blob/dd4c8b704ef1c9c3bfd928c6fa9234276d61ad18/sig-multicluster/namespace-sameness-position-statement.md
|
||||
#ManagedNamespaces: {
|
||||
[Name=_]: {
|
||||
name: Name
|
||||
}
|
||||
}
|
||||
|
||||
// #Backups defines backup configuration.
|
||||
// TODO: Consider the best place for this, possibly as part of the site platform config. This represents the primary location for backups.
|
||||
#Backups: {
|
||||
@@ -393,6 +408,8 @@ _apiVersion: "holos.run/v1alpha1"
|
||||
resourcesFile: ResourcesFile
|
||||
// kustomizeFiles represents the files in a kustomize directory tree.
|
||||
kustomizeFiles: #KustomizeFiles.Files
|
||||
// enableHooks removes the --no-hooks flag from helm template
|
||||
enableHooks: true | *false
|
||||
}
|
||||
|
||||
// #KustomizeBuild is a holos component that uses plain yaml files as the source of api objects for a holos component.
|
||||
|
||||
305
docs/runbooks/login/failover.md
Normal file
305
docs/runbooks/login/failover.md
Normal file
@@ -0,0 +1,305 @@
|
||||
## Overview
|
||||
|
||||
Use this run book to move the ZITADEL primary postgres database from one cluster to another. At the end of the process https://zitadel.example.com service will be live on the cluster which was previously the standby at the start of the process.
|
||||
|
||||
Downtime: ~10 minutes if executed quickly. ~60 minutes if executed slowly and deliberately.
|
||||
|
||||
## Initial State
|
||||
1. https://login.ois.run is routed to core2.
|
||||
2. `postgrescluster/zitadel` on core2 is primary.
|
||||
3. `postgrescluster/zitadel` on core1 is standby.
|
||||
4. A recent [[#Full Backup]] has been taken and is stored in S3.
|
||||
5. The standby cluster has been restored from the recent full backup.
|
||||
## Process
|
||||
1. [[#Edit Platform site config]]
|
||||
2. [[#Render core2]]
|
||||
3. [[#Demote core2]]
|
||||
4. [[#Verify core2 is standby]]
|
||||
5. [[#Render core1]]
|
||||
6. [[#Promote core1]]
|
||||
7. [[#Verify core1 is primary]]
|
||||
8. [[#Reconcile Zitadel]]
|
||||
9. [[#Cut over NLB]]
|
||||
10. [[#Test Login]]
|
||||
|
||||
## Edit Platform site config
|
||||
Change the primary cluster from core2 to core1 in `platforms/platform.site.cue`.
|
||||
```cue
|
||||
#Platform: primaryCluster: name: "core1"
|
||||
```
|
||||
## Render core2
|
||||
Configure core as a standby.
|
||||
```
|
||||
holos render --cluster-name=core2 ~/workspace/holos-run/holos/docs/examples/platforms/reference/clusters/accounts/...
|
||||
```
|
||||
|
||||
Expected change:
|
||||
```diff
|
||||
diff --git a/deploy/clusters/core2/components/prod-iam-postgres/prod-iam-postgres.gen.yaml b/deploy/clusters/core2/components/prod-iam-postgres/prod-iam-postgres.gen.yaml
|
||||
index c1aa8394..45daa650 100644
|
||||
--- a/deploy/clusters/core2/components/prod-iam-postgres/prod-iam-postgres.gen.yaml
|
||||
+++ b/deploy/clusters/core2/components/prod-iam-postgres/prod-iam-postgres.gen.yaml
|
||||
@@ -85,24 +85,6 @@ spec:
|
||||
name: zitadel-repl-tls
|
||||
customTLSSecret:
|
||||
name: zitadel-primary-tls
|
||||
- dataSource:
|
||||
- pgbackrest:
|
||||
- configuration:
|
||||
- - secret:
|
||||
- name: pgo-s3-creds
|
||||
- global:
|
||||
- repo2-path: /pgbackrest/prod-iam-zitadel/zitadel/repo2
|
||||
- repo2-cipher-type: aes-256-cbc
|
||||
- options:
|
||||
- - --type=time
|
||||
- - --target="2024-03-11 17:10:00+00"
|
||||
- repo:
|
||||
- name: repo2
|
||||
- s3:
|
||||
- bucket: ois-zitadel-backups
|
||||
- endpoint: s3.dualstack.us-east-2.amazonaws.com
|
||||
- region: us-east-2
|
||||
- stanza: db
|
||||
image: registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi8-16.2-0
|
||||
instances:
|
||||
- name: pgha1
|
||||
@@ -139,7 +121,7 @@ spec:
|
||||
image: registry.developers.crunchydata.com/crunchydata/crunchy-pgbouncer:ubi8-1.21-3
|
||||
replicas: 2
|
||||
standby:
|
||||
- enabled: false
|
||||
+ enabled: true
|
||||
repoName: repo2
|
||||
users:
|
||||
- databases
|
||||
```
|
||||
## Demote core2
|
||||
|
||||
```
|
||||
10012 git add -p
|
||||
10013 git commit -m 'Make core2 a standby zitadel cluster'
|
||||
10014 git push origin HEAD
|
||||
10015 flux reconcile source git flux-system
|
||||
10016 flux get ks
|
||||
```
|
||||
|
||||
## Verify core2 is standby
|
||||
```
|
||||
k logs $(kubectl get pods -o name -l postgres-operator.crunchydata.com/role=master) | tail -2
|
||||
```
|
||||
Expected output:
|
||||
```
|
||||
2024-03-14 21:58:26,205 INFO: no action. I am (zitadel-pgha1-smw7-0), the standby leader with the lock
|
||||
```
|
||||
## Render core1
|
||||
```
|
||||
holos render --cluster-name=core1 ~/workspace/holos-run/holos/docs/examples/platforms/reference/clusters/accounts/...
|
||||
```
|
||||
Expected change:
|
||||
```diff
|
||||
diff --git a/deploy/clusters/core1/components/prod-iam-postgres/prod-iam-postgres.gen.yaml b/deploy/clusters/core1/components/prod-iam-postgres/prod-iam-postgres.gen.yaml
|
||||
index aa69814e..290c0865 100644
|
||||
--- a/deploy/clusters/core1/components/prod-iam-postgres/prod-iam-postgres.gen.yaml
|
||||
+++ b/deploy/clusters/core1/components/prod-iam-postgres/prod-iam-postgres.gen.yaml
|
||||
@@ -85,6 +85,22 @@ spec:
|
||||
name: zitadel-repl-tls
|
||||
customTLSSecret:
|
||||
name: zitadel-primary-tls
|
||||
+ dataSource:
|
||||
+ pgbackrest:
|
||||
+ configuration:
|
||||
+ - secret:
|
||||
+ name: pgo-s3-creds
|
||||
+ global:
|
||||
+ repo2-path: /pgbackrest/prod-iam-zitadel/zitadel/repo2
|
||||
+ repo2-cipher-type: aes-256-cbc
|
||||
+ options: []
|
||||
+ repo:
|
||||
+ name: repo2
|
||||
+ s3:
|
||||
+ bucket: ois-zitadel-backups
|
||||
+ endpoint: s3.dualstack.us-east-2.amazonaws.com
|
||||
+ region: us-east-2
|
||||
+ stanza: db
|
||||
image: registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi8-16.2-0
|
||||
instances:
|
||||
- name: pgha1
|
||||
@@ -121,7 +137,7 @@ spec:
|
||||
image: registry.developers.crunchydata.com/crunchydata/crunchy-pgbouncer:ubi8-1.21-3
|
||||
replicas: 2
|
||||
standby:
|
||||
- enabled: true
|
||||
+ enabled: false
|
||||
repoName: repo2
|
||||
users:
|
||||
- databases:
|
||||
```
|
||||
|
||||
## Promote core1
|
||||
```
|
||||
flux reconcile source git flux-system
|
||||
flux reconcile ks prod-iam-postgres
|
||||
```
|
||||
## Verify core1 is primary
|
||||
```
|
||||
k logs $(kubectl get pods -o name -l postgres-operator.crunchydata.com/role=master) | tail -1
|
||||
```
|
||||
Expected output:
|
||||
```
|
||||
2024-03-14 22:05:01,159 INFO: no action. I am (zitadel-pgha1-xl2b-0), the leader with the lock
|
||||
```
|
||||
## Reconcile Zitadel
|
||||
Now that the database is back up, reconcile zitadel.
|
||||
This is pretty brutal but it works:
|
||||
```
|
||||
❯ flux delete ks prod-iam-zitadel
|
||||
Are you sure you want to delete this kustomization: y
|
||||
► deleting kustomization prod-iam-zitadel in flux-system namespace
|
||||
✔ kustomization deleted
|
||||
```
|
||||
```
|
||||
❯ k apply --server-side=true -f deploy/clusters/core1/holos/components/prod-iam-zitadel-kustomization.gen.yaml
|
||||
kustomization.kustomize.toolkit.fluxcd.io/prod-iam-zitadel serverside-applied
|
||||
```
|
||||
ZITADEL should come up READY 2/2
|
||||
```
|
||||
❯ k get pods
|
||||
NAME READY STATUS RESTARTS AGE
|
||||
zitadel-76776f747f-94wft 2/2 Running 2 (30s ago) 34s
|
||||
zitadel-76776f747f-fl24c 2/2 Running 1 (30s ago) 34s
|
||||
zitadel-76776f747f-xx2w7 2/2 Running 2 (30s ago) 34s
|
||||
zitadel-backup-49c7-h25cj 0/1 Completed 0 3m23s
|
||||
zitadel-init-td2sh 0/1 Completed 0 34s
|
||||
zitadel-pgbouncer-d9f8cffc-2vwk5 2/2 Running 0 22m
|
||||
zitadel-pgbouncer-d9f8cffc-rrrhd 2/2 Running 0 22m
|
||||
zitadel-pgha1-4npq-0 4/4 Running 0 3m21s
|
||||
zitadel-pgha1-xl2b-0 4/4 Running 0 3m43s
|
||||
zitadel-repo-host-0 2/2 Running 0 22m
|
||||
zitadel-setup-b7zkx 0/1 Completed 1 34s
|
||||
zitadel-test-connection 0/1 Error 0 34s
|
||||
```
|
||||
|
||||
## Cut over NLB
|
||||
Finally, cut over the NLB config to move to core1.
|
||||
This is specific to our metal clusters.
|
||||
Apply with: `git push puppet +HEAD:production -o run=netlb`
|
||||
```diff
|
||||
diff --git a/site-modules/profile/templates/haproxy/haproxy.cfg.epp b/site-modules/profile/templates/haproxy/haproxy.cfg.epp
|
||||
index b358a70..3ee40db 100644
|
||||
--- a/site-modules/profile/templates/haproxy/haproxy.cfg.epp
|
||||
+++ b/site-modules/profile/templates/haproxy/haproxy.cfg.epp
|
||||
@@ -225,7 +225,7 @@ frontend sni-ingress from tcp-ingress
|
||||
use_backend core2-https if { req.ssl_sni -i web.holos.run }
|
||||
use_backend core2-https if { req.ssl_sni -i -m end .web.holos.run }
|
||||
# Identity provider is routed to one cluster at a time
|
||||
- use_backend core2-https if { req.ssl_sni -i login.ois.run }
|
||||
+ use_backend core1-https if { req.ssl_sni -i login.ois.run }
|
||||
# Holos Dev Environments
|
||||
use_backend k1-https if { req.ssl_sni -i holos.wtf }
|
||||
use_backend k1-https if { req.ssl_sni -i -m end .holos.wtf }
|
||||
@@ -358,7 +358,7 @@ frontend http-ingress from http-ingress
|
||||
bind 65.102.23.41:80
|
||||
bind 2602:41:6617:2ec0::80:1d:80 v6only
|
||||
# Zitadel is active on one cluster at a time
|
||||
- use_backend core2-http if { hdr(host) -i login.ois.run }
|
||||
+ use_backend core1-http if { hdr(host) -i login.ois.run }
|
||||
# Vault is active on core1 or core2
|
||||
use_backend core2-http if { hdr(host) -i vault.core.ois.run }
|
||||
# Infrastructure Dex OIDC ID issuer is active on core1 or core2
|
||||
|
||||
```
|
||||
## Test Login
|
||||
```
|
||||
kubectl oidc-login get-token \
|
||||
--oidc-extra-scope=openid \
|
||||
--oidc-extra-scope=email \
|
||||
--oidc-extra-scope=profile \
|
||||
--oidc-extra-scope=groups \
|
||||
--oidc-extra-scope=offline_access \
|
||||
--oidc-extra-scope=urn:zitadel:iam:org:domain:primary:openinfrastructure.co \
|
||||
--oidc-issuer-url=https://login.ois.run \
|
||||
--oidc-client-id=${CLIENT_ID:-257714027772314751@holos_platform} \
|
||||
--oidc-use-pkce \
|
||||
--force-refresh > ~/.kube/token.json
|
||||
```
|
||||
|
||||
```
|
||||
jq -r .status.token ~/.kube/token.json | cut -d. -f2 | base64 -d | jq
|
||||
```
|
||||
|
||||
```json
|
||||
{
|
||||
"amr": [
|
||||
"mfa",
|
||||
"user"
|
||||
],
|
||||
"at_hash": "5nUq6kXWT8GheCNvDNdyow",
|
||||
"aud": [
|
||||
"257714027772314751@holos_platform",
|
||||
"257713952794870157"
|
||||
],
|
||||
"auth_time": 1710172186,
|
||||
"azp": "257714027772314751@holos_platform",
|
||||
"client_id": "257714027772314751@holos_platform",
|
||||
"email": "jeff@openinfrastructure.co",
|
||||
"email_verified": true,
|
||||
"exp": 1710497614,
|
||||
"family_name": "McCune",
|
||||
"given_name": "Jeff",
|
||||
"groups": [
|
||||
"prod-cluster-admin"
|
||||
],
|
||||
"iat": 1710454404,
|
||||
"iss": "https://login.ois.run",
|
||||
"locale": null,
|
||||
"name": "Jeff McCune",
|
||||
"nickname": "Jeff",
|
||||
"preferred_username": "jeff@openinfrastructure.co",
|
||||
"sub": "257712562366383231",
|
||||
"updated_at": 1710105084,
|
||||
"urn:zitadel:iam:org:domain:primary": "openinfrastructure.co",
|
||||
"urn:zitadel:iam:org:project:257713952794870157:roles": {
|
||||
"prod-cluster-admin": {
|
||||
"257712562366317695": "openinfrastructure.co"
|
||||
}
|
||||
},
|
||||
"urn:zitadel:iam:org:project:roles": {
|
||||
"prod-cluster-admin": {
|
||||
"257712562366317695": "openinfrastructure.co"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
## Tasks
|
||||
### Identify the leader
|
||||
Same command for primary and standby clusters.
|
||||
```
|
||||
k get pods -o name -l postgres-operator.crunchydata.com/role=master
|
||||
```
|
||||
### Full Backup
|
||||
On the primary cluster:
|
||||
```
|
||||
kubectl -n prod-iam-zitadel annotate postgrescluster zitadel postgres-operator.crunchydata.com/pgbackrest-backup="$(date)" --overwrite
|
||||
```
|
||||
Watch the progress:
|
||||
```
|
||||
k -n prod-iam-zitadel logs -l postgres-operator.crunchydata.com/pgbackrest-backup=manual -f
|
||||
```
|
||||
Expected output:
|
||||
```
|
||||
time="2024-03-14T18:36:16Z" level=info msg="crunchy-pgbackrest starts"
|
||||
time="2024-03-14T18:36:16Z" level=info msg="debug flag set to false"
|
||||
time="2024-03-14T18:36:16Z" level=info msg="backrest backup command requested"
|
||||
time="2024-03-14T18:36:16Z" level=info msg="command to execute is [pgbackrest backup --stanza=db --repo=2 --type=full]"
|
||||
time="2024-03-14T18:39:11Z" level=info msg="output=[]"
|
||||
time="2024-03-14T18:39:11Z" level=info msg="stderr=[]"
|
||||
time="2024-03-14T18:39:11Z" level=info msg="crunchy-pgbackrest ends"
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
Full backup doesn't start
|
||||
Check the controller logs. If you see `Switchover failed` errors, try editing the number of postgrescluster replicas down to 1 then back up to 2. Then try a backup again.
|
||||
```
|
||||
Switchover failed, details: 503, Switchover failed\n" version=5.5.1-0-amd64
|
||||
```
|
||||
@@ -121,6 +121,7 @@ type HelmChart struct {
|
||||
Namespace string `json:"namespace"`
|
||||
Chart Chart `json:"chart"`
|
||||
ValuesContent string `json:"valuesContent"`
|
||||
EnableHooks bool `json:"enableHooks"`
|
||||
// APIObjectMap holds the marshalled representation of api objects.
|
||||
APIObjectMap apiObjectMap `json:"APIObjectMap"`
|
||||
}
|
||||
@@ -445,7 +446,12 @@ func runHelm(ctx context.Context, hc *HelmChart, r *Result, path holos.PathCompo
|
||||
|
||||
// Run charts
|
||||
chart := hc.Chart
|
||||
helmOut, err := util.RunCmd(ctx, "helm", "template", "--include-crds", "--values", valuesPath, "--namespace", hc.Namespace, "--kubeconfig", "/dev/null", "--version", chart.Version, chart.Release, cachedChartPath)
|
||||
args := []string{"template"}
|
||||
if !hc.EnableHooks {
|
||||
args = append(args, "--no-hooks")
|
||||
}
|
||||
args = append(args, "--include-crds", "--values", valuesPath, "--namespace", hc.Namespace, "--kubeconfig", "/dev/null", "--version", chart.Version, chart.Release, cachedChartPath)
|
||||
helmOut, err := util.RunCmd(ctx, "helm", args...)
|
||||
if err != nil {
|
||||
stderr := helmOut.Stderr.String()
|
||||
lines := strings.Split(stderr, "\n")
|
||||
|
||||
@@ -1 +1 @@
|
||||
56
|
||||
58
|
||||
|
||||
Reference in New Issue
Block a user