bootstrap controlplane

This commit is contained in:
Serge Logvinov
2022-12-26 12:23:15 +02:00
parent a77e0242d2
commit 90c0d299c7
21 changed files with 890 additions and 613 deletions

View File

@@ -3,4 +3,4 @@ _cfgs/
templates/controlplane.yaml
*.patch
#
gcloud.json
*.json

View File

@@ -1,29 +1,51 @@
create-lb:
ENDPOINT:=${shell terraform output -raw controlplane_endpoint 2>/dev/null}
ifeq ($(ENDPOINT),)
ENDPOINT := 127.0.0.1
endif
help:
@awk 'BEGIN {FS = ":.*?## "} /^[0-9a-zA-Z_-]+:.*?## / {sub("\\\\n",sprintf("\n%22c"," "), $$2);printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST)
create-network: ## Create networks
cd prepare && terraform init && terraform apply -auto-approve
create-lb: ## Create loadbalancer
terraform init
terraform apply -auto-approve -target=google_compute_address.api
create-config:
talosctl gen config --output-dir _cfgs --with-docs=false --with-examples=false talos-k8s-hezner https://127.0.0.1:6443
create-config: ## Genereate talos configs
talosctl gen config --output-dir _cfgs --with-docs=false --with-examples=false talos-k8s-gcp https://127.0.0.1:6443
talosctl --talosconfig _cfgs/talosconfig config endpoint ${ENDPOINT}
create-templates:
@yq ea -P '. as $$item ireduce ({}; . * $$item )' _cfgs/controlplane.yaml templates/controlplane.yaml.tpl > templates/controlplane.yaml
@echo 'podSubnets: "10.32.0.0/12"' > _cfgs/tfstate.vars
@echo 'serviceSubnets: "10.200.0.0/22"' >> _cfgs/tfstate.vars
@yq eval '.cluster.network.dnsDomain' _cfgs/init.yaml | awk '{ print "domain: "$$1}' >> _cfgs/tfstate.vars
@yq eval '.cluster.clusterName' _cfgs/init.yaml | awk '{ print "cluster_name: "$$1}' >> _cfgs/tfstate.vars
@yq eval '.machine.token' _cfgs/init.yaml | awk '{ print "tokenmachine: "$$1}' >> _cfgs/tfstate.vars
@yq eval '.cluster.token' _cfgs/init.yaml | awk '{ print "token: "$$1}' >> _cfgs/tfstate.vars
@yq eval '.cluster.ca.crt' _cfgs/init.yaml | awk '{ print "ca: "$$1}' >> _cfgs/tfstate.vars
@echo 'podSubnets: "10.32.0.0/12,fd00:10:32::/102"' > _cfgs/tfstate.vars
@echo 'serviceSubnets: "10.200.0.0/22,fd40:10:200::/112"' >> _cfgs/tfstate.vars
@echo 'nodeSubnets: "172.16.0.0/12"' >> _cfgs/tfstate.vars
@echo 'apiDomain: api.cluster.local' >> _cfgs/tfstate.vars
@yq eval '.cluster.network.dnsDomain' _cfgs/controlplane.yaml | awk '{ print "domain: "$$1}' >> _cfgs/tfstate.vars
@yq eval '.cluster.clusterName' _cfgs/controlplane.yaml | awk '{ print "clusterName: "$$1}' >> _cfgs/tfstate.vars
@yq eval '.cluster.id' _cfgs/controlplane.yaml | awk '{ print "clusterID: "$$1}' >> _cfgs/tfstate.vars
@yq eval '.cluster.secret' _cfgs/controlplane.yaml | awk '{ print "clusterSecret: "$$1}'>> _cfgs/tfstate.vars
@yq eval '.machine.token' _cfgs/controlplane.yaml | awk '{ print "tokenMachine: "$$1}' >> _cfgs/tfstate.vars
@yq eval '.machine.ca.crt' _cfgs/controlplane.yaml | awk '{ print "caMachine: "$$1}' >> _cfgs/tfstate.vars
@yq eval '.cluster.token' _cfgs/controlplane.yaml | awk '{ print "token: "$$1}' >> _cfgs/tfstate.vars
@yq eval '.cluster.ca.crt' _cfgs/controlplane.yaml | awk '{ print "ca: "$$1}' >> _cfgs/tfstate.vars
@yq eval -j '{"kubernetes": .}' _cfgs/tfstate.vars > terraform.tfvars.json
@yq eval -o=json '{"kubernetes": .}' _cfgs/tfstate.vars > terraform.tfvars.json
create-controlplane:
create-controlplane-bootstrap:
talosctl --talosconfig _cfgs/talosconfig config endpoint ${ENDPOINT}
talosctl --talosconfig _cfgs/talosconfig --nodes ${ENDPOINT} bootstrap
create-controlplane: ## Bootstrap controlplane node
terraform apply -auto-approve -target=null_resource.controlplane
create-infrastructure:
cd modules/worker && terraform init
terraform apply
create-kubeconfig: ## Download kubeconfig
talosctl --talosconfig _cfgs/talosconfig --nodes ${ENDPOINT} kubeconfig .
kubectl --kubeconfig=kubeconfig config set clusters.talos-k8s-gcp.server https://${ENDPOINT}:6443
kubectl --kubeconfig=kubeconfig config set-context --current --namespace=kube-system
create-deployments:
helm template --namespace=kube-system --version=1.12.4 -f deployments/cilium.yaml cilium \

View File

@@ -1,14 +1,12 @@
provider "google" {
project = var.project_id
region = var.region
credentials = "gcloud.json"
project = local.project
region = local.region
}
provider "google-beta" {
project = var.project_id
region = var.region
credentials = "gcloud.json"
}
# provider "google-beta" {
# project = var.project_id
# region = var.region
# }
data "google_client_config" "default" {}
# data "google_client_config" "default" {}

View File

@@ -1,13 +1,19 @@
data "google_client_openid_userinfo" "terraform" {}
resource "google_os_login_ssh_public_key" "terraform" {
project = var.project_id
user = data.google_client_openid_userinfo.terraform.email
key = file("~/.ssh/terraform.pub")
data "google_compute_image" "talos" {
project = local.project
family = "talos-amd64"
}
data "google_compute_image" "talos" {
project = var.project_id
family = "talos"
resource "google_compute_health_check" "instance" {
name = "${local.cluster_name}-instance-health-check"
timeout_sec = 5
check_interval_sec = 30
healthy_threshold = 1
unhealthy_threshold = 10
tcp_health_check {
port = "50000"
}
}

View File

@@ -60,7 +60,7 @@ data:
# Enable IPv6 addressing. If enabled, all endpoints are allocated an IPv6
# address.
enable-ipv6: "true"
enable-ipv6: "false"
# Users who wish to specify their own custom CNI configuration file must set
# custom-cni-conf to "true", otherwise Cilium may overwrite the configuration.
custom-cni-conf: "false"
@@ -156,7 +156,6 @@ data:
enable-l2-neigh-discovery: "true"
arping-refresh-period: "30s"
k8s-require-ipv4-pod-cidr: "true"
k8s-require-ipv6-pod-cidr: "true"
enable-endpoint-health-checking: "true"
enable-health-checking: "true"
enable-well-known-identities: "false"
@@ -185,6 +184,8 @@ data:
tofqdns-max-deferred-connection-deletes: "10000"
tofqdns-min-ttl: "3600"
tofqdns-proxy-response-max-delay: "100ms"
mtu: "1300"
agent-not-ready-taint-key: "node.cilium.io/agent-not-ready"
---
# Source: cilium/templates/cilium-agent/clusterrole.yaml
@@ -812,7 +813,7 @@ spec:
metadata:
annotations:
# ensure pods roll when configmap updates
cilium.io/cilium-configmap-checksum: "c3ffdb3de5df1007b50c84e0af5ba77bc44d069f56d62d3232573a21084f2f80"
cilium.io/cilium-configmap-checksum: "16ad070d5b63bf306024e40a412abddb22eac12ab78ce8b3f821045c84af75f5"
labels:
io.cilium/app: operator
name: cilium-operator

View File

@@ -23,6 +23,8 @@ localRedirectPolicy: true
tunnel: "vxlan"
autoDirectNodeRoutes: false
devices: [eth+]
extraConfig:
mtu: "1300"
healthChecking: true
@@ -33,14 +35,14 @@ ipam:
mode: "kubernetes"
k8s:
requireIPv4PodCIDR: true
requireIPv6PodCIDR: true
# requireIPv6PodCIDR: true
bpf:
masquerade: false
ipv4:
enabled: true
ipv6:
enabled: true
enabled: false
hostServices:
enabled: true
hostPort:

View File

@@ -0,0 +1,208 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: gcp-cloud-controller-manager
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
addonmanager.kubernetes.io/mode: Reconcile
name: system:gcp-cloud-controller-manager
rules:
- apiGroups:
- ""
- events.k8s.io
resources:
- events
verbs:
- create
- patch
- update
- apiGroups:
- coordination.k8s.io
resources:
- leases
verbs:
- create
- apiGroups:
- coordination.k8s.io
resourceNames:
- cloud-controller-manager
resources:
- leases
verbs:
- get
- update
- apiGroups:
- ""
resources:
- endpoints
- serviceaccounts
verbs:
- create
- get
- update
- apiGroups:
- ""
resources:
- nodes
verbs:
- get
- update
- apiGroups:
- ""
resources:
- namespaces
verbs:
- get
- apiGroups:
- ""
resources:
- nodes/status
verbs:
- patch
- update
- apiGroups:
- ""
resources:
- secrets
verbs:
- create
- delete
- get
- update
- apiGroups:
- authentication.k8s.io
resources:
- tokenreviews
verbs:
- create
- apiGroups:
- '*'
resources:
- '*'
verbs:
- list
- watch
- apiGroups:
- ""
resources:
- serviceaccounts/token
verbs:
- create
- apiGroups:
- ""
resources:
- services/status
verbs:
- patch
- update
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: system:gcp-cloud-controller-manager
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:gcp-cloud-controller-manager
subjects:
- kind: ServiceAccount
name: gcp-cloud-controller-manager
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: system:gcp-cloud-controller-manager:extension-apiserver-authentication-reader
namespace: kube-system
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: extension-apiserver-authentication-reader
subjects:
- kind: ServiceAccount
name: gcp-cloud-controller-manager
namespace: kube-system
---
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
k8s-app: cloud-controller-manager
tier: control-plane
name: cloud-controller-manager
namespace: kube-system
spec:
progressDeadlineSeconds: 600
replicas: 1
revisionHistoryLimit: 10
selector:
matchLabels:
k8s-app: cloud-controller-manager
strategy:
rollingUpdate:
maxSurge: 25%
maxUnavailable: 25%
type: RollingUpdate
template:
metadata:
creationTimestamp: null
labels:
k8s-app: cloud-controller-manager
tier: control-plane
spec:
containers:
- args:
- --bind-address=127.0.0.1
- --cloud-provider=gce
- --cloud-config=/etc/gce/gce.conf
- --allocate-node-cidrs=false
- --cluster-cidr=10.64.0.0/12
- --controllers=cloud-node-lifecycle
- --port=0
- --use-service-account-credentials
- -v=2
command:
- /bin/gcp-cloud-controller-manager
image: quay.io/openshift/origin-gcp-cloud-controller-manager:4.10.0
imagePullPolicy: IfNotPresent
name: cloud-controller-manager
resources:
requests:
cpu: 50m
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /etc/gce
name: config
readOnly: true
dnsPolicy: ClusterFirst
hostNetwork: true
nodeSelector:
node-role.kubernetes.io/control-plane: ""
node.cloudprovider.kubernetes.io/platform: gcp
priorityClassName: system-cluster-critical
restartPolicy: Always
schedulerName: default-scheduler
securityContext:
runAsNonRoot: true
runAsUser: 65521
seccompProfile:
type: RuntimeDefault
serviceAccount: gcp-cloud-controller-manager
serviceAccountName: gcp-cloud-controller-manager
terminationGracePeriodSeconds: 30
tolerations:
- effect: NoSchedule
key: node.cloudprovider.kubernetes.io/uninitialized
value: "true"
- effect: NoSchedule
key: node-role.kubernetes.io/control-plane
volumes:
- configMap:
defaultMode: 420
name: gcp-cloud-controller-manager
name: config

View File

@@ -34,3 +34,21 @@ resource "google_project_iam_binding" "terraform_networksAdmin" {
# expression = "request.time < timestamp(\"2023-01-30T22:00:00.000Z\")"
# }
}
resource "google_project_iam_binding" "terraform_saAdmin" {
project = var.project
role = "roles/iam.serviceAccountAdmin"
members = [
"serviceAccount:${google_service_account.terraform.email}",
]
}
resource "google_project_iam_binding" "terraform_iamAdmin" {
project = var.project
role = "roles/iam.securityAdmin"
members = [
"serviceAccount:${google_service_account.terraform.email}",
]
}

View File

@@ -1,79 +1,64 @@
resource "google_compute_address" "controlplane" {
count = max(lookup(var.controlplane, "count", 0), length(var.zones))
project = var.project_id
region = var.region
name = "${var.cluster_name}-master-${count.index + 1}"
description = "Local ${var.cluster_name}-master-${count.index + 1} ip"
address_type = "INTERNAL"
address = cidrhost(cidrsubnet(var.network_cidr, 8, 0), 231 + count.index)
subnetwork = "core"
purpose = "GCE_ENDPOINT"
locals {
contolplane_labels = "cloud.google.com/gke-boot-disk=pd-ssd"
}
resource "google_compute_instance_from_template" "controlplane" {
count = lookup(var.controlplane, "count", 0)
name = "master-${count.index + 1}"
project = var.project_id
zone = element(var.zones, count.index)
module "controlplane" {
source = "./modules/controlplane"
network_interface {
network = var.network
network_ip = google_compute_address.controlplane[count.index].address
subnetwork = "core"
access_config {
network_tier = "STANDARD"
}
alias_ip_range = count.index == 0 ? [{
ip_cidr_range = "${google_compute_address.lbv4_local.address}/32"
subnetwork_range_name = ""
}] : []
}
for_each = toset(local.zones)
name = "${local.cluster_name}-controlplane-${each.value}"
project = local.project
region = local.region
zone = each.value
cluster_name = local.cluster_name
source_instance_template = google_compute_instance_template.controlplane.id
depends_on = [
google_compute_instance_template.controlplane
]
lifecycle {
ignore_changes = [
source_instance_template,
labels
]
}
kubernetes = merge(var.kubernetes, {
lbv4_local = google_compute_address.lbv4_local.address
nodeSubnets = local.network_controlplane.ip_cidr_range
region = local.region
zone = each.key
project = local.project
network = local.network
})
controlplane = try(var.controlplane[each.key], {})
network = local.network_controlplane
subnetwork = local.network_controlplane.name
network_cidr = cidrsubnet(local.network_controlplane.ip_cidr_range, 6, 1 + index(local.zones, each.value))
instance_template = google_compute_instance_template.controlplane.id
}
resource "google_compute_instance_template" "controlplane" {
name_prefix = "${var.cluster_name}-master-"
project = var.project_id
region = var.region
machine_type = lookup(var.controlplane, "type", "e2-standard-2")
# min_cpu_platform = ""
name_prefix = "${local.cluster_name}-controlplane-"
project = local.project
region = local.region
machine_type = "e2-medium"
tags = concat(var.tags, ["${var.cluster_name}-infra", "${var.cluster_name}-master"])
tags = concat(var.tags, ["${local.cluster_name}-common", "${local.cluster_name}-controlplane"])
labels = {
label = "controlplane"
}
# metadata = {
# ssh-keys = "debian:${file("~/.ssh/terraform.pub")}"
# }
# metadata_startup_script = "apt-get install -y nginx"
metadata = {
cluster-name = local.cluster_name
cluster-location = local.region
kube-labels = local.contolplane_labels
}
disk {
boot = true
auto_delete = true
disk_size_gb = 16
disk_size_gb = 30
disk_type = "pd-ssd"
resource_policies = []
source_image = data.google_compute_image.talos.self_link
labels = { label = "controlplane" }
labels = { label = "${local.cluster_name}-controlplane" }
}
network_interface {
network = var.network
subnetwork = "core"
network = local.network_controlplane.network
subnetwork = local.network_controlplane.name
stack_type = "IPV4_IPV6"
access_config {
network_tier = "STANDARD"
}
@@ -90,33 +75,12 @@ resource "google_compute_instance_template" "controlplane" {
enable_vtpm = true
}
service_account {
email = google_service_account.controlplane.email
scopes = ["cloud-platform"]
}
lifecycle {
create_before_destroy = "true"
}
}
# resource "local_file" "controlplane" {
# count = lookup(var.controlplane, "count", 0)
# content = templatefile("${path.module}/templates/controlplane.yaml",
# merge(var.kubernetes, {
# name = "master-${count.index + 1}"
# type = count.index == 0 ? "init" : "controlplane"
# ipv4_local = google_compute_address.controlplane[count.index].address
# ipv4 = google_compute_instance_from_template.controlplane[count.index].network_interface[0].access_config[0].nat_ip
# lbv4_local = google_compute_address.lbv4_local.address
# lbv4 = google_compute_instance_from_template.controlplane[count.index].network_interface[0].access_config[0].nat_ip
# })
# )
# filename = "_cfgs/controlplane-${count.index + 1}.yaml"
# file_permission = "0640"
# depends_on = [google_compute_instance_from_template.controlplane]
# }
# resource "null_resource" "controlplane" {
# count = lookup(var.controlplane, "count", 0)
# provisioner "local-exec" {
# command = "sleep 60 && talosctl apply-config --insecure --nodes ${google_compute_instance_from_template.controlplane[count.index].network_interface[0].access_config[0].nat_ip} --file _cfgs/controlplane-${count.index + 1}.yaml"
# }
# depends_on = [google_compute_instance_from_template.controlplane, local_file.controlplane]
# }

51
gcp-zonal/instances-sa.tf Normal file
View File

@@ -0,0 +1,51 @@
resource "google_service_account" "controlplane" {
account_id = "controlplane"
display_name = "A service account for controlplane instances"
}
resource "google_project_iam_member" "ccm" {
project = local.project
role = "projects/${local.project}/roles/KubeCCM"
member = "serviceAccount:${google_service_account.controlplane.email}"
}
# resource "google_service_account" "csi" {
# account_id = "csi-driver"
# display_name = "A service account for csi-driver"
# }
# resource "google_project_iam_member" "csi" {
# project = local.project
# role = "projects/${local.project}/roles/KubeCsiDriver"
# member = "serviceAccount:${google_service_account.csi.email}"
# }
# resource "google_project_iam_member" "csi_storageAdmin" {
# project = local.project
# role = "roles/compute.storageAdmin"
# member = "serviceAccount:${google_service_account.csi.email}"
# }
# resource "google_project_iam_member" "csi_serviceAccountUser" {
# project = local.project
# role = "roles/iam.serviceAccountUser"
# member = "serviceAccount:${google_service_account.csi.email}"
# }
# resource "google_service_account" "autoscaler" {
# account_id = "cluster-autoscale"
# display_name = "A service account for cluster-autoscale"
# }
# resource "google_project_iam_member" "autoscaler" {
# project = local.project
# role = "projects/${local.project}/roles/KubeClusterAutoscaler"
# member = "serviceAccount:${google_service_account.autoscaler.email}"
# }
# resource "google_project_iam_member" "autoscaler_roles" {
# project = local.project
# role = "roles/viewer"
# member = "serviceAccount:${google_service_account.autoscaler.email}"
# }

View File

@@ -1,139 +0,0 @@
resource "google_compute_region_instance_group_manager" "web" {
name = "${var.cluster_name}-web-mig"
project = var.project_id
region = var.region
distribution_policy_zones = var.zones
base_instance_name = "${var.cluster_name}-web"
version {
instance_template = google_compute_instance_template.web["all"].id
}
update_policy {
type = "OPPORTUNISTIC"
instance_redistribution_type = "PROACTIVE"
minimal_action = "REPLACE"
replacement_method = "SUBSTITUTE"
}
target_pools = []
target_size = lookup(var.instances["all"], "web_count", 0)
wait_for_instances = false
named_port {
name = "http"
port = "80"
}
named_port {
name = "https"
port = "443"
}
lifecycle {
create_before_destroy = true
}
}
resource "google_compute_instance_group_manager" "web" {
for_each = { for k, v in var.instances : k => v if contains(var.zones, "${var.region}-${k}") }
name = "${var.cluster_name}-web-${each.key}-mig"
project = var.project_id
zone = "${var.region}-${each.key}"
base_instance_name = "${var.cluster_name}-web-${each.key}"
version {
instance_template = google_compute_instance_template.web[each.key].id
}
update_policy {
type = "OPPORTUNISTIC"
minimal_action = "REPLACE"
max_unavailable_fixed = 1
replacement_method = "SUBSTITUTE"
}
named_port {
name = "http"
port = "80"
}
named_port {
name = "https"
port = "443"
}
target_pools = []
target_size = lookup(each.value, "web_count", 0)
wait_for_instances = false
lifecycle {
create_before_destroy = true
}
}
resource "google_compute_instance_template" "web" {
for_each = var.instances
name_prefix = "${var.cluster_name}-web-${each.key}-"
project = var.project_id
region = var.region
machine_type = lookup(each.value, "web_instance_type", "e2-standard-2")
# min_cpu_platform = ""
tags = concat(var.tags, ["${var.cluster_name}-infra", "${var.cluster_name}-web"])
labels = {
label = "web"
}
metadata = {
user-data = templatefile("${path.module}/templates/worker.yaml.tpl",
merge(var.kubernetes, {
lbv4 = google_compute_address.lbv4_local.address
})
)
}
disk {
boot = true
auto_delete = true
disk_size_gb = 16
disk_type = "pd-balanced" // pd-ssd
resource_policies = []
source_image = data.google_compute_image.talos.self_link
labels = { label = "web" }
}
network_interface {
network = var.network
subnetwork = "core"
access_config {
network_tier = "STANDARD"
}
}
scheduling {
automatic_restart = true
on_host_maintenance = "MIGRATE"
}
shielded_instance_config {
enable_integrity_monitoring = true
enable_secure_boot = false
enable_vtpm = true
}
lifecycle {
create_before_destroy = "true"
}
}
resource "local_file" "web" {
content = templatefile("${path.module}/templates/worker.yaml.tpl",
merge(var.kubernetes, {
lbv4 = google_compute_address.lbv4_local.address
})
)
filename = "${path.module}/_cfgs/worker-0.yaml"
file_permission = "0640"
depends_on = [google_compute_region_instance_group_manager.web]
}

View File

@@ -0,0 +1,106 @@
resource "google_compute_instance_group_manager" "worker" {
for_each = { for k, v in var.instances : k => v if k != "all" }
name = "${local.cluster_name}-worker-${each.key}-mig"
description = "${local.cluster_name}-worker terraform group"
project = local.project
zone = each.key
base_instance_name = "worker-${each.key}"
version {
instance_template = google_compute_instance_template.worker[each.key].id
}
update_policy {
type = "OPPORTUNISTIC"
minimal_action = "RESTART"
max_surge_fixed = 1
max_unavailable_fixed = 1
replacement_method = "SUBSTITUTE"
}
# auto_healing_policies {
# health_check = google_compute_health_check.instance.id
# initial_delay_sec = 300
# }
target_pools = []
target_size = lookup(each.value, "worker_count", 0)
wait_for_instances = false
lifecycle {
create_before_destroy = true
ignore_changes = [
base_instance_name,
target_size,
]
}
}
locals {
worker_labels = "cloud.google.com/gke-boot-disk=pd-balanced,project.io/node-pool=worker"
}
resource "google_compute_instance_template" "worker" {
for_each = { for k, v in var.instances : k => v if k != "all" }
name_prefix = "${local.cluster_name}-worker-${each.key}-"
description = "${local.cluster_name}-worker terraform template"
project = local.project
region = local.region
machine_type = lookup(each.value, "worker_type", "e2-standard-2")
tags = concat(var.tags, ["${local.cluster_name}-common", "${local.cluster_name}-worker"])
labels = {
label = "${local.cluster_name}-worker"
}
metadata = {
cluster-name = local.cluster_name
cluster-location = local.region
kube-labels = local.worker_labels
kube-env = "AUTOSCALER_ENV_VARS: node_labels=${local.worker_labels};os=linux;os_distribution=cos"
user-data = templatefile("${path.module}/templates/worker.yaml.tpl",
merge(var.kubernetes, {
lbv4 = google_compute_address.lbv4_local.address
nodeSubnets = each.key == "all" ? local.network_controlplane.ip_cidr_range : local.networks[each.key].ip_cidr_range
labels = local.worker_labels
})
)
}
disk {
boot = true
auto_delete = true
disk_size_gb = 64
disk_type = "pd-balanced"
resource_policies = []
source_image = data.google_compute_image.talos.self_link
labels = { label = "${local.cluster_name}-worker" }
}
can_ip_forward = true
network_interface {
network = local.network_controlplane.network
subnetwork = each.key == "all" ? local.network_controlplane.name : local.networks[each.key].self_link
stack_type = "IPV4_IPV6"
access_config {
network_tier = "STANDARD"
}
}
scheduling {
automatic_restart = true
on_host_maintenance = "MIGRATE"
}
shielded_instance_config {
enable_integrity_monitoring = true
enable_secure_boot = false
enable_vtpm = true
}
lifecycle {
create_before_destroy = "true"
}
}

View File

@@ -1,105 +0,0 @@
resource "google_compute_region_instance_group_manager" "worker" {
name = "${var.cluster_name}-worker-mig"
project = var.project_id
region = var.region
distribution_policy_zones = var.zones
base_instance_name = "${var.cluster_name}-worker"
version {
instance_template = google_compute_instance_template.worker["all"].id
}
update_policy {
type = "OPPORTUNISTIC"
instance_redistribution_type = "PROACTIVE"
minimal_action = "REPLACE"
replacement_method = "SUBSTITUTE"
}
target_pools = []
target_size = lookup(var.instances["all"], "worker_count", 0)
wait_for_instances = false
lifecycle {
create_before_destroy = true
}
}
resource "google_compute_instance_group_manager" "worker" {
for_each = { for k, v in var.instances : k => v if contains(var.zones, "${var.region}-${k}") }
name = "${var.cluster_name}-worker-${each.key}-mig"
project = var.project_id
zone = "${var.region}-${each.key}"
base_instance_name = "${var.cluster_name}-worker-${each.key}"
version {
instance_template = google_compute_instance_template.worker[each.key].id
}
update_policy {
type = "OPPORTUNISTIC"
minimal_action = "REPLACE"
max_unavailable_fixed = 1
replacement_method = "SUBSTITUTE"
}
target_pools = []
target_size = lookup(each.value, "worker_count", 0)
wait_for_instances = false
lifecycle {
create_before_destroy = true
}
}
resource "google_compute_instance_template" "worker" {
for_each = var.instances
name_prefix = "${var.cluster_name}-worker-${each.key}-"
project = var.project_id
region = var.region
machine_type = lookup(each.value, "worker_instance_type", "e2-standard-2")
# min_cpu_platform = ""
tags = concat(var.tags, ["${var.cluster_name}-infra", "${var.cluster_name}-worker"])
labels = {
label = "worker"
}
metadata = {
user-data = templatefile("${path.module}/templates/worker.yaml.tpl",
merge(var.kubernetes, {
lbv4 = google_compute_address.lbv4_local.address
})
)
}
disk {
boot = true
auto_delete = true
disk_size_gb = 16
disk_type = "pd-balanced" // pd-ssd
resource_policies = []
source_image = data.google_compute_image.talos.self_link
labels = { label = "web" }
}
network_interface {
network = var.network
subnetwork = "core"
}
scheduling {
automatic_restart = true
on_host_maintenance = "MIGRATE"
}
shielded_instance_config {
enable_integrity_monitoring = true
enable_secure_boot = false
enable_vtpm = true
}
lifecycle {
create_before_destroy = "true"
}
}

View File

@@ -0,0 +1,83 @@
resource "google_compute_address" "local" {
count = max(lookup(var.controlplane, "count", 0), 3)
project = var.project
region = var.region
name = "${var.name}-${count.index + 1}"
description = "Local ${var.name}-${count.index + 1} ip"
address_type = "INTERNAL"
address = cidrhost(var.network_cidr, count.index)
subnetwork = var.subnetwork
purpose = "GCE_ENDPOINT"
}
resource "google_compute_instance_from_template" "controlplane" {
count = lookup(var.controlplane, "count", 0)
name = "controlplane-${var.zone}-${count.index + 1}"
project = var.project
zone = var.zone
machine_type = lookup(var.controlplane, "type", "e2-medium")
can_ip_forward = true
network_interface {
network = var.network.network
network_ip = google_compute_address.local[count.index].address
subnetwork = var.subnetwork
stack_type = "IPV4_IPV6"
access_config {
network_tier = "STANDARD"
}
}
source_instance_template = var.instance_template
lifecycle {
ignore_changes = [
attached_disk,
machine_type,
source_instance_template,
metadata,
labels
]
}
}
resource "google_compute_instance_group" "controlplane" {
project = var.project
name = "${var.cluster_name}-controlplane-${var.zone}"
zone = var.zone
network = var.network.network
instances = google_compute_instance_from_template.controlplane[*].self_link
named_port {
name = "https"
port = "6443"
}
depends_on = [google_compute_instance_from_template.controlplane]
}
resource "local_sensitive_file" "controlplane" {
count = lookup(var.controlplane, "count", 0)
content = templatefile("templates/controlplane.yaml",
merge(var.kubernetes, {
name = "controlplane-${var.zone}-${count.index + 1}"
ipv4 = google_compute_instance_from_template.controlplane[count.index].network_interface[0].access_config[0].nat_ip
ipv4_local = google_compute_address.local[count.index].address
})
)
filename = "_cfgs/controlplane-${var.zone}-${count.index + 1}.yaml"
file_permission = "0600"
depends_on = [google_compute_instance_from_template.controlplane]
}
resource "null_resource" "controlplane" {
count = lookup(var.controlplane, "count", 0)
provisioner "local-exec" {
command = "sleep 60 && talosctl apply-config --insecure --nodes ${google_compute_instance_from_template.controlplane[count.index].network_interface[0].access_config[0].nat_ip} --file ${local_sensitive_file.controlplane[count.index].filename}"
}
depends_on = [google_compute_instance_from_template.controlplane, local_sensitive_file.controlplane]
}

View File

@@ -0,0 +1,12 @@
output "controlplane" {
value = google_compute_instance_from_template.controlplane
}
output "controlplane_endpoints" {
value = lookup(var.controlplane, "count", 0) > 0 ? google_compute_instance_from_template.controlplane[0].network_interface[0].access_config[0].nat_ip : ""
}
output "instance_group_id" {
value = google_compute_instance_group.controlplane.id
}

View File

@@ -0,0 +1,69 @@
variable "project" {
description = "The project ID to host"
}
variable "cluster_name" {
description = "The cluster name"
}
variable "region" {
description = "The region to host"
}
variable "zone" {
description = "The zone to host"
}
variable "name" {
description = "The host name"
}
variable "network" {
description = "The VPC network created to host"
}
variable "subnetwork" {
description = "The VPC subnetwork created to host"
default = "core"
}
variable "network_cidr" {
description = "Local subnet rfc1918"
default = "172.16.0.0/16"
}
variable "tags" {
description = "Tags of resources"
type = list(string)
default = [
"develop"
]
}
variable "kubernetes" {
type = map(string)
default = {
podSubnets = "10.32.0.0/12"
serviceSubnets = "10.200.0.0/22"
domain = "cluster.local"
cluster_name = "malta"
tokenmachine = ""
token = ""
ca = ""
}
sensitive = true
}
variable "instance_template" {
description = "Instance template"
}
variable "controlplane" {
description = "Map of controlplane params"
type = map(any)
default = {
count = 0,
type = "e2-small",
}
}

View File

@@ -1,130 +1,61 @@
# resource "google_compute_address" "api" {
# project = var.project_id
# region = var.region
# name = "${var.cluster_name}-controlplane"
# description = "External ${var.cluster_name}-controlplane lb ip"
# address_type = "EXTERNAL"
# network_tier = "STANDARD"
# }
resource "google_compute_address" "lbv4_local" {
project = var.project_id
region = var.region
name = "${var.cluster_name}-master-lbv4"
description = "Local ${var.cluster_name}-master-lbv4 ip"
project = local.project
region = local.region
name = "${local.cluster_name}-controlplane-lbv4"
description = "Local ${local.cluster_name}-controlplane-lbv4 ip"
address_type = "INTERNAL"
address = cidrhost(cidrsubnet(var.network_cidr, 8, 0), 230)
subnetwork = "core"
address = cidrhost(local.network_controlplane.ip_cidr_range, 230)
subnetwork = local.network_controlplane.name
purpose = "GCE_ENDPOINT"
}
# resource "google_compute_forwarding_rule" "controlplane" {
# project = var.project_id
# name = "${var.cluster_name}-controlplane"
# region = var.region
# load_balancing_scheme = "INTERNAL"
# backend_service = google_compute_region_backend_service.controlplane.self_link
# ip_address = google_compute_address.lbv4_local.address
# ports = ["6443","50000"]
# ip_protocol = "TCP"
# network_tier = "STANDARD"
# }
resource "google_compute_forwarding_rule" "controlplane" {
project = local.project
name = "${local.cluster_name}-controlplane"
region = local.region
load_balancing_scheme = "INTERNAL"
backend_service = google_compute_region_backend_service.controlplane.self_link
network = local.network_controlplane.network
subnetwork = local.network_controlplane.name
ip_address = google_compute_address.lbv4_local.address
ports = ["6443"]
ip_protocol = "TCP"
network_tier = "PREMIUM"
}
# resource "google_compute_region_backend_service" "controlplane" {
# name = "${var.cluster_name}-controlplane"
# region = var.region
# health_checks = [google_compute_region_health_check.controlplane.id]
# load_balancing_scheme = "INTERNAL"
# protocol = "TCP"
# project = var.project_id
resource "google_compute_region_backend_service" "controlplane" {
project = local.project
name = "${local.cluster_name}-controlplane"
region = local.region
health_checks = [google_compute_region_health_check.controlplane.self_link]
load_balancing_scheme = "INTERNAL"
protocol = "TCP"
# dynamic "backend" {
# for_each = google_compute_instance_group.controlplane
# content {
# group = backend.value.id
# }
# }
# }
connection_draining_timeout_sec = 300
session_affinity = "NONE"
# resource "google_compute_region_health_check" "controlplane" {
# name = "${var.cluster_name}-controlplane-health-check"
# region = var.region
# check_interval_sec = 15
# timeout_sec = 5
# healthy_threshold = 1
# unhealthy_threshold = 2
dynamic "backend" {
for_each = module.controlplane
content {
balancing_mode = "CONNECTION"
group = backend.value.instance_group_id
}
}
}
# https_health_check {
# port = "6443"
# request_path = "/readyz"
# }
# }
resource "google_compute_region_health_check" "controlplane" {
name = "${local.cluster_name}-controlplane-health-check"
region = local.region
check_interval_sec = 15
timeout_sec = 5
healthy_threshold = 1
unhealthy_threshold = 2
# resource "google_compute_instance_group" "controlplane" {
# count = lookup(var.controlplane, "count", 0)
# project = var.project_id
# name = "${var.cluster_name}-controlplane-${element(var.zones, count.index)}"
# zone = element(var.zones, count.index)
# instances = [
# google_compute_instance_from_template.controlplane[count.index].id,
# ]
# named_port {
# name = "talos"
# port = "50000"
# }
# named_port {
# name = "https"
# port = "6443"
# }
# }
# resource "google_compute_forwarding_rule" "web" {
# project = var.project_id
# name = "${var.cluster_name}-web"
# region = var.region
# load_balancing_scheme = "EXTERNAL"
# backend_service = google_compute_region_backend_service.web.self_link
# ip_address = google_compute_address.api.address
# ports = ["80","443"]
# ip_protocol = "TCP"
# network_tier = "STANDARD"
# }
# resource "google_compute_region_backend_service" "web" {
# name = "${var.cluster_name}-web"
# region = var.region
# health_checks = [google_compute_region_health_check.web.id]
# load_balancing_scheme = "EXTERNAL"
# protocol = "TCP"
# project = var.project_id
# backend {
# group = google_compute_region_instance_group_manager.web.instance_group
# }
# dynamic "backend" {
# for_each = google_compute_instance_group_manager.web
# content {
# group = backend.value.instance_group
# }
# }
# }
# resource "google_compute_region_health_check" "web" {
# name = "${var.cluster_name}-web-health-check"
# region = var.region
# check_interval_sec = 15
# timeout_sec = 5
# healthy_threshold = 1
# unhealthy_threshold = 2
# http_health_check {
# port = "80"
# # request_path = "/healthz"
# request_path = "/"
# }
# }
tcp_health_check {
port = "6443"
}
log_config {
enable = false
}
}

4
gcp-zonal/outputs.tf Normal file
View File

@@ -0,0 +1,4 @@
output "controlplane_endpoint" {
value = try(flatten([for c in module.controlplane : c.controlplane_endpoints])[0], "")
}

View File

@@ -2,32 +2,40 @@ version: v1alpha1
debug: false
persist: true
machine:
type: ${type}
type: controlplane
certSANs:
- "${lbv4}"
- "${lbv4_local}"
- "${ipv4}"
- "${ipv4_local}"
- ${lbv4_local}
- ${apiDomain}
features:
kubernetesTalosAPIAccess:
enabled: true
allowedRoles:
- os:reader
allowedKubernetesNamespaces:
- kube-system
kubelet:
extraArgs:
node-ip: "${ipv4_local}"
rotate-server-certificates: true
clusterDNS:
- 169.254.2.53
- ${cidrhost(split(",",serviceSubnets)[0], 10)}
nodeIP:
validSubnets: ${format("%#v",split(",",nodeSubnets))}
network:
hostname: "${name}"
interfaces:
- interface: eth0
dhcp: true
- interface: eth0
cidr: ${lbv4_local}/32
- interface: eth0
cidr: ${lbv4}/32
- interface: lo
addresses:
- ${lbv4_local}
- ${ipv4}
- interface: dummy0
cidr: "169.254.2.53/32"
- interface: dummy0
cidr: "fd00::169:254:2:53/128"
addresses:
- 169.254.2.53/32
extraHostEntries:
- ip: ${ipv4_local}
aliases:
- ${apiDomain}
install:
disk: /dev/sda
bootloader: true
wipe: false
extraKernelArgs:
- elevator=noop
@@ -35,37 +43,92 @@ machine:
net.core.somaxconn: 65535
net.core.netdev_max_backlog: 4096
systemDiskEncryption:
state:
provider: luks2
keys:
- nodeID: {}
slot: 0
ephemeral:
provider: luks2
keys:
- nodeID: {}
slot: 0
options:
- no_read_workqueue
- no_write_workqueue
cluster:
id: ${clusterID}
secret: ${clusterSecret}
controlPlane:
endpoint: https://${lbv4}:6443
endpoint: https://${apiDomain}:6443
clusterName: ${clusterName}
discovery:
enabled: true
network:
dnsDomain: ${domain}
podSubnets: ${format("[%s]",podSubnets)}
serviceSubnets: ${format("[%s]",serviceSubnets)}
podSubnets: ${format("%#v",split(",",podSubnets))}
serviceSubnets: ${format("%#v",split(",",serviceSubnets))}
cni:
name: custom
urls:
- https://raw.githubusercontent.com/sergelogvinov/terraform-talos/main/gcp-zonal/deployments/cilium_result.yaml
- https://raw.githubusercontent.com/sergelogvinov/terraform-talos/main/gcp-zonal/deployments/cilium-result.yaml
proxy:
disabled: true
mode: ipvs
apiServer:
certSANs:
- "${lbv4_local}"
- "${lbv4}"
- "${ipv4_local}"
- "${ipv4}"
controllerManager: {}
- ${lbv4_local}
- ${apiDomain}
admissionControl:
- name: PodSecurity
configuration:
apiVersion: pod-security.admission.config.k8s.io/v1alpha1
defaults:
audit: restricted
audit-version: latest
enforce: baseline
enforce-version: latest
warn: restricted
warn-version: latest
exemptions:
namespaces:
- kube-system
- ingress-nginx
- monitoring
- local-path-storage
- local-lvm
runtimeClasses: []
usernames: []
kind: PodSecurityConfiguration
controllerManager:
extraArgs:
node-cidr-mask-size-ipv4: 24
node-cidr-mask-size-ipv6: 112
scheduler: {}
etcd: {}
etcd:
advertisedSubnets:
- ${ipv4_local}/32
listenSubnets:
- ${ipv4_local}/32
inlineManifests:
- name: gcp-cloud-controller-config
contents: |-
apiVersion: v1
kind: ConfigMap
metadata:
name: gcp-cloud-controller-manager
namespace: kube-system
data:
gce.conf: |
[global]
project-id = ${project}
network-name = ${network}
externalCloudProvider:
enabled: true
manifests:
- https://raw.githubusercontent.com/siderolabs/talos-cloud-controller-manager/main/docs/deploy/cloud-controller-manager.yml
- https://raw.githubusercontent.com/sergelogvinov/terraform-talos/main/gcp-zonal/deployments/kubelet-serving-cert-approver.yaml
- https://raw.githubusercontent.com/sergelogvinov/terraform-talos/main/gcp-zonal/deployments/metrics-server.yaml
- https://raw.githubusercontent.com/sergelogvinov/terraform-talos/main/gcp-zonal/deployments/local-path-storage.yaml
- https://raw.githubusercontent.com/sergelogvinov/terraform-talos/main/gcp-zonal/deployments/coredns-local.yaml
- https://raw.githubusercontent.com/sergelogvinov/terraform-talos/main/gcp-zonal/deployments/ingress-ns.yaml
- https://raw.githubusercontent.com/sergelogvinov/terraform-talos/main/gcp-zonal/deployments/ingress-result.yaml

View File

@@ -3,44 +3,60 @@ debug: false
persist: true
machine:
type: worker
token: ${tokenmachine}
token: ${tokenMachine}
certSANs: []
kubelet:
extraArgs:
cloud-provider: external
rotate-server-certificates: true
node-labels: "${labels}"
clusterDNS:
- 169.254.2.53
- ${cidrhost(split(",",serviceSubnets)[0], 10)}
nodeIP:
validSubnets: ${format("%#v",split(",",nodeSubnets))}
network:
interfaces:
- interface: eth0
dhcp: true
- interface: dummy0
cidr: "169.254.2.53/32"
- interface: dummy0
cidr: "fd00::169:254:2:53/128"
extraHostEntries:
- ip: ${lbv4}
aliases:
- ${apiDomain}
install:
wipe: false
extraKernelArgs:
- elevator=noop
sysctls:
net.core.somaxconn: 65535
net.core.netdev_max_backlog: 4096
install:
disk: /dev/sda
extraKernelArgs:
- elevator=noop
bootloader: true
wipe: true
systemDiskEncryption:
state:
provider: luks2
keys:
- nodeID: {}
slot: 0
ephemeral:
provider: luks2
keys:
- nodeID: {}
slot: 0
options:
- no_read_workqueue
- no_write_workqueue
cluster:
id: ${clusterID}
secret: ${clusterSecret}
controlPlane:
endpoint: https://${lbv4}:6443
clusterName: ${cluster_name}
endpoint: https://${apiDomain}:6443
clusterName: ${clusterName}
discovery:
enabled: true
network:
dnsDomain: ${domain}
serviceSubnets: ${format("%#v",split(",",serviceSubnets))}
proxy:
mode: ipvs
disabled: true
token: ${token}
ca:
crt: ${ca}
key: ""

View File

@@ -1,72 +1,79 @@
variable "project_id" {
description = "The project ID to host the cluster in"
data "terraform_remote_state" "prepare" {
backend = "local"
config = {
path = "${path.module}/prepare/terraform.tfstate"
}
}
variable "cluster_name" {
description = "A default cluster name"
default = "talos"
}
locals {
project = data.terraform_remote_state.prepare.outputs.project
region = data.terraform_remote_state.prepare.outputs.region
zones = data.terraform_remote_state.prepare.outputs.zones
variable "region" {
description = "The region to host the cluster in"
}
cluster_name = data.terraform_remote_state.prepare.outputs.cluster_name
variable "zones" {
type = list(string)
description = "The zone to host the cluster in (required if is a zonal cluster)"
network = data.terraform_remote_state.prepare.outputs.network
network_controlplane = data.terraform_remote_state.prepare.outputs.network_controlplane
networks = data.terraform_remote_state.prepare.outputs.networks
}
variable "kubernetes" {
type = map(string)
default = {
podSubnets = "10.32.0.0/12"
serviceSubnets = "10.200.0.0/22"
podSubnets = "10.32.0.0/12,fd40:10:32::/102"
serviceSubnets = "10.200.0.0/22,fd40:10:200::/112"
domain = "cluster.local"
cluster_name = "talos-k8s-hezner"
tokenmachine = ""
apiDomain = "api.cluster.local"
clusterName = "talos-k8s-gcp"
clusterID = ""
clusterSecret = ""
tokenMachine = ""
caMachine = ""
token = ""
ca = ""
}
sensitive = true
sensitive = true
}
variable "network" {
description = "The VPC network created to host the cluster in"
default = "production"
variable "controlplane" {
description = "Controlplane config"
type = map(any)
default = {
"europe-north1-a" = {
count = 0,
type = "e2-medium",
},
"europe-north1-b" = {
count = 0,
type = "e2-medium",
},
}
}
variable "network_cidr" {
description = "Local subnet rfc1918"
default = "172.16.0.0/16"
}
variable "whitelist_web" {
description = "Cloudflare subnets"
default = [
"173.245.48.0/20",
"103.21.244.0/22",
"103.22.200.0/22",
"103.31.4.0/22",
"141.101.64.0/18",
"108.162.192.0/18",
"190.93.240.0/20",
"188.114.96.0/20",
"197.234.240.0/22",
"198.41.128.0/17",
"162.158.0.0/15",
"172.64.0.0/13",
"131.0.72.0/22",
"104.16.0.0/13",
"104.24.0.0/14",
]
}
variable "whitelist_admin" {
description = "Cloudflare subnets"
default = [
"0.0.0.0/0",
]
variable "instances" {
description = "Map of region's properties"
type = map(any)
default = {
"europe-north1-a" = {
web_count = 0,
web_type = "e2-small",
worker_count = 0,
worker_type = "e2-small",
},
"europe-north1-b" = {
web_count = 0,
web_type = "e2-small",
worker_count = 0,
worker_type = "e2-small",
}
"europe-north1-c" = {
web_count = 0,
web_type = "e2-small",
worker_count = 0,
worker_type = "e2-small",
}
}
}
variable "tags" {
@@ -76,43 +83,3 @@ variable "tags" {
"develop"
]
}
variable "controlplane" {
description = "Count of controlplanes"
type = map(any)
default = {
count = 0,
type = "e2-small"
}
}
variable "instances" {
description = "Map of region's properties"
type = map(any)
default = {
"a" = {
web_count = 0,
web_instance_type = "e2-small",
worker_count = 0,
worker_instance_type = "e2-small",
},
"b" = {
web_count = 0,
web_instance_type = "e2-small",
worker_count = 0,
worker_instance_type = "e2-small",
}
"c" = {
web_count = 0,
web_instance_type = "e2-small",
worker_count = 0,
worker_instance_type = "e2-small",
}
"all" = {
web_count = 0,
web_instance_type = "e2-small",
worker_count = 0,
worker_instance_type = "e2-small",
}
}
}