Merge pull request #238 from Telecominfraproject/WIFI-13206-Ressurect-github-self-hosted-runners

Wifi 13206 ressurect GitHub self hosted runners
This commit is contained in:
Carsten Schafer
2023-12-07 09:39:00 -05:00
committed by GitHub
8 changed files with 137 additions and 130 deletions

View File

@@ -34,7 +34,7 @@ The scripts should work on MacOS and Linux (as of yet untested).
## Scaling nodegroups
Set the desiredCapacity for the nodegroup in cluster.CLUSTER_NAME.yaml and run:
Set CLUSTER_NODES in your env\_FILE and also set the desiredCapacity for the nodegroup in cluster.CLUSTER_NAME.yaml and run:
```bash
source env\_FILE
eksctl scale nodegroup -f cluster.$CLUSTER_NAME.yaml

View File

@@ -98,7 +98,7 @@ nodeGroups:
amiFamily: AmazonLinux2
minSize: 3
maxSize: 8
desiredCapacity: 4
desiredCapacity: 6
volumeSize: 100
ssh: # import public key from file
allow: true

View File

@@ -5,7 +5,7 @@ export CLUSTER_DOMAIN="lab.wlan.tip.build"
export CLUSTER_ZONE_ID="Z09534373UTXT2L1YL912"
export CLUSTER_INSTANCE_TYPE="c5.xlarge"
export CLUSTER_NAME="tip-wlan-main"
export CLUSTER_NODES=4
export CLUSTER_NODES=6
export CLUSTER_MIN_NODES=3
export CLUSTER_MAX_NODES=8
export CLUSTER_VOLUME_SIZE=100

View File

@@ -52,7 +52,8 @@ releases:
1. Set credentials that are required to connect to Kubernetes cluster
2. (optional) If you are going to use environment with secrets, make sure that you also have credentials required for access to AWS KMS key
3. Run `helmfile --environment $ENVIRONMENT diff` to see changes that would be applied
4. If everything is correct, run `helmfile --environment $ENVIRONMENT apply` to see changes that would be applied
3. Run `./predeploy.sh` to install pre-requisites.
4. Run `helmfile --environment $ENVIRONMENT diff` to see changes that would be applied
5. If everything is correct, run `helmfile --environment $ENVIRONMENT apply` to see changes that would be applied
If you would like to limit releasae that you would like to affect, you may use labels. For example, if you want to see changes that would be done only to **influxdb** release in **amazon-cicd** environment, you may run `helmfile --environment amazon-cicd --selector app=influxdb diff`

View File

@@ -1,7 +1,7 @@
podAnnotations:
iam.amazonaws.com/role: arn:aws:iam::{{ .Environment.Values.eks.accountID }}:role/tip-wlan-main-external-dns
iam.amazonaws.com/role: arn:aws:iam::{{ .Environment.Values.eks.accountID }}:role/{{ .Environment.Values.eks.clusterName }}-external-dns
aws:
region: us-east-1
region: {{ .Environment.Values.eks.dnsRegion }}
evaluateTargetHealth: false
domainFilters:
- {{ .Environment.Values.domain }}
@@ -14,5 +14,7 @@ extraArgs:
txtOwnerId: /hostedzone/{{ .Environment.Values.eks.hostedZoneId }}
policy: sync
serviceAccount:
create: false
name: {{ .Environment.Values.eks.clusterName }}-external-dns-sa
annotations:
eks.amazonaws.com/role-arn: "arn:aws:iam::{{ .Environment.Values.eks.accountID }}:role/{{ .Environment.Values.eks.clusterName }}-external-dns"

View File

@@ -1,55 +0,0 @@
version: v0.139.7
dependencies:
- name: actions-runner-controller
repository: https://actions-runner-controller.github.io/actions-runner-controller
version: 0.19.1
- name: aws-load-balancer-controller
repository: https://aws.github.io/eks-charts
version: 1.4.2
- name: aws-node-termination-handler
repository: https://aws.github.io/eks-charts
version: 0.16.0
- name: cert-manager
repository: https://charts.jetstack.io
version: v1.6.1
- name: cluster-autoscaler
repository: https://kubernetes.github.io/autoscaler/
version: 9.11.0
- name: core-dump-handler
repository: https://ibm.github.io/core-dump-handler
version: v8.6.0
- name: elasticsearch-curator
repository: https://charts.helm.sh/stable
version: 2.2.3
- name: external-dns
repository: https://charts.bitnami.com/bitnami
version: 6.12.2
- name: fluentd-elasticsearch
repository: https://kokuwaio.github.io/helm-charts
version: 13.1.0
- name: influxdb2
repository: https://helm.influxdata.com
version: 2.0.3
- name: ingress-nginx
repository: https://kubernetes.github.io/ingress-nginx
version: 4.2.0
- name: kube-prometheus-stack
repository: https://prometheus-community.github.io/helm-charts
version: 41.5.1
- name: kubernetes-dashboard
repository: https://kubernetes.github.io/dashboard/
version: 5.0.5
- name: logstash
repository: https://helm.elastic.co
version: 7.16.2
- name: metrics-server
repository: https://charts.bitnami.com/bitnami
version: 6.2.4
- name: prometheus-es-exporter
repository: https://braedon.github.io/helm
version: 0.2.0
- name: tigera-operator
repository: https://projectcalico.docs.tigera.io/charts
version: v3.22.2
digest: sha256:ff437e159bbd9894d11d568de89d4ac6bc2b9e91e12d153a70148ea05c4e8729
generated: "2022-12-21T13:28:30.610389163+01:00"

View File

@@ -1,3 +1,73 @@
environments:
amazon-cicd:
secrets:
- secrets/influxdb.yaml
- secrets/sso.yaml
- secrets/alertmanager.yaml
- secrets/actions-runner-controller.yaml
- secrets/ucentral-ap-firmware-logstash.yaml
- secrets/core-dump-handler.yaml
values:
- eks:
clusterName: tip-wlan-main
region: ap-south-1
dnsRegion: us-east-1
accountID: 289708231103
hostedZoneId: cicd
certificateARNCICD: arn:aws:acm:ap-south-1:289708231103:certificate/2cc8c764-11fd-411d-bf7d-a93f488f3f6c
certificateARNLab: arn:aws:acm:ap-south-1:289708231103:certificate/5281be14-c5e6-45c5-8d5c-e8660c76fbcb
- monitoring:
namespace: monitoring
publicNamespaces: ['openwifi-qa01', 'openwifi-dev01', 'openwifi-demo']
- domain: lab.wlan.tip.build
- storageClass: gp2
- autoscaler:
#TODO:
#enabled: true
enabled: false
- ingress:
enabled: true
- elastic:
#TODO:
#enabled: true
enabled: false
- kibana:
#TODO:
#enabled: true
enabled: false
- logstash:
#TODO:
#enabled: true
enabled: false
- prometheus:
#TODO:
#enabled: true
enabled: false
- k8s-dashboard:
enabled: true
- metrics-server:
enabled: true
- external-dns:
enabled: true
- alb-ingress:
enabled: true
- node-termination-handler:
enabled: true
- influxdb:
#skip this one for now
enabled: false
- actions-runner-controller:
enabled: true
- cert-manager:
enabled: true
- calico:
enabled: true
- core-dump-handler:
#skip this one now
enabled: false
---
repositories:
- name: stable
url: https://charts.helm.sh/stable
@@ -30,61 +100,6 @@ repositories:
- name: core-dump-handler
url: https://ibm.github.io/core-dump-handler
environments:
amazon-cicd:
secrets:
- secrets/influxdb.yaml
- secrets/sso.yaml
- secrets/alertmanager.yaml
- secrets/actions-runner-controller.yaml
- secrets/ucentral-ap-firmware-logstash.yaml
- secrets/core-dump-handler.yaml
values:
- eks:
clusterName: tip-wlan-main
region: us-east-2
accountID: 289708231103
hostedZoneId: cicd
certificateARNCICD: arn:aws:acm:us-east-2:289708231103:certificate/bfa89c7a-5b64-4a8a-bcfe-ffec655b5285
certificateARNLab: arn:aws:acm:us-east-2:289708231103:certificate/510429bd-1a3d-4c43-90ce-8e340795a888
- monitoring:
namespace: monitoring
publicNamespaces: ['openwifi-qa01', 'openwifi-dev01']
- domain: lab.wlan.tip.build
- storageClass: gp2
- autoscaler:
enabled: true
- ingress:
enabled: true
- elastic:
enabled: true
- kibana:
enabled: true
- logstash:
enabled: true
- prometheus:
enabled: true
- k8s-dashboard:
enabled: true
- metrics-server:
enabled: true
- external-dns:
enabled: true
- alb-ingress:
enabled: true
- node-termination-handler:
enabled: true
- influxdb:
enabled: true
- actions-runner-controller:
enabled: true
- cert-manager:
enabled: true
- calico:
enabled: true
- core-dump-handler:
enabled: true
helmDefaults:
force: false
timeout: 300
@@ -108,7 +123,7 @@ templates:
releases:
- name: cluster-autoscaler
condition: autoscaler.enabled
<<: *default
<<: *default
<<: *cluster-autoscaler
chart: autoscaler/cluster-autoscaler
version: 9.11.0
@@ -132,29 +147,46 @@ releases:
condition: ingress.enabled
<<: *default
chart: nginx/ingress-nginx
version: 4.2.0
#version: 4.2.0
version: 4.8.2
labels:
role: setup
group: system
app: ingress
values:
- controller:
# TEST:
ingressClassResource:
name: nginx-sso
ingressClass: nginx-sso
#ingressClass: nginx
service:
annotations:
service.beta.kubernetes.io/aws-load-balancer-ssl-cert: {{ .Environment.Values.eks.certificateARNLab }}
service.beta.kubernetes.io/aws-load-balancer-ssl-ports: https
service.beta.kubernetes.io/aws-load-balancer-type: elb
service.beta.kubernetes.io/aws-load-balancer-backend-protocol: tcp
#MAYBE TRY THESE:
#service.beta.kubernetes.io/aws-load-balancer-name: apps-ingress
#service.beta.kubernetes.io/aws-load-balancer-type: external
#service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
#service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
#service.beta.kubernetes.io/aws-load-balancer-healthcheck-protocol: http
#service.beta.kubernetes.io/aws-load-balancer-healthcheck-path: /healthz
#service.beta.kubernetes.io/aws-load-balancer-healthcheck-port: 10254
targetPorts:
http: http
https: http
publishService:
enabled: true
metrics:
enabled: true
#TODO:
#enabled: true
enabled: false
serviceMonitor:
enabled: true
#TODO:
#enabled: true
enabled: false
additionalLabels:
release: prometheus-operator
- defaultBackend:
@@ -405,7 +437,6 @@ releases:
repository: tip-tip-wlan-cloud-ucentral.jfrog.io/kube-state-metrics
tag: v2.6.0-tip20221103
- name: prometheus-operator-helper
condition: prometheus.enabled
namespace: {{ .Environment.Values.monitoring.namespace }}
@@ -641,6 +672,17 @@ releases:
client_id => "openwifi-qa01"
add_field => { "instance" => "openwifi-qa01" }
}
kafka {
bootstrap_servers => "kafka-headless.openwifi-demo.svc.cluster.local:9092"
topics => ["state", "healthcheck", "device_event_queue"]
tags => ["openwifi-kafka"]
#codec => json
decorate_events => true
auto_offset_reset => "latest"
client_id => "openwifi-demo"
add_field => { "instance" => "openwifi-demo" }
}
}
filter {
@@ -815,6 +857,7 @@ releases:
externalPort: 80
- protocolHttp: true
- ingress:
className: nginx-sso
enabled: true
paths:
- /
@@ -848,13 +891,15 @@ releases:
<<: *default
condition: alb-ingress.enabled
chart: eks/aws-load-balancer-controller
version: 1.4.2
version: 1.6.1
labels:
role: setup
group: system
app: aws-load-balancer-controller
values:
- serviceAccount:
create: false
name: {{ .Values.eks.clusterName }}-alb-ingress-sa
annotations:
eks.amazonaws.com/role-arn: arn:aws:iam::{{ .Values.eks.accountID }}:role/{{ .Values.eks.clusterName }}-alb-ingress
clusterName: {{ .Values.eks.clusterName }}
@@ -867,7 +912,7 @@ releases:
<<: *default
condition: node-termination-handler.enabled
chart: eks/aws-node-termination-handler
version: 0.16.0
version: 0.21.0
labels:
role: setup
group: system
@@ -963,12 +1008,13 @@ releases:
chart: charts/actions-runner-controller-addon
labels:
app: actions-runner-controller
disableValidation: true
- name: cert-manager
condition: cert-manager.enabled
namespace: kube-system
namespace: cert-manager
chart: jetstack/cert-manager
version: v1.6.1
version: v1.13.0
labels:
app: cert-manager
values:
@@ -982,17 +1028,21 @@ releases:
cpu: 500m
memory: 150Mi
- name: calico
condition: calico.enabled
chart: projectcalico/tigera-operator
version: v3.22.2
namespace: kube-system
version: v3.26.1
namespace: tigera-operator
disableValidation: true
labels:
app: calico
values:
- installation:
kubernetesProvider: EKS
- name: github-actions-network-policies
condition: calico.enabled
#condition: calico.enabled
condition: actions-runner-controller.enabled
namespace: actions-runner-controller
chart: charts/github-actions-network-policies
labels:

View File

@@ -0,0 +1,9 @@
#!/bin/bash
set -x
# only run on a clean initially created cluster for CRDs
kubectl apply -k "github.com/aws/eks-charts/stable/aws-load-balancer-controller/crds?ref=master"
#helm install calico projectcalico/tigera-operator \
# --namespace tigera-operator \
# --create-namespace \
# --version v3.26.1 \
# -f charts/tigera-operator/values.json