Files
wlan-toolsmith/helmfile/cloud-sdk/helmfile.yaml
Johann Hoffmann 6d9df4453f Adapt alert to use new exitcode metric
Signed-off-by: Johann Hoffmann <johann.hoffmann@mailbox.org>
2022-11-09 15:33:31 +01:00

1098 lines
35 KiB
YAML

repositories:
- name: stable
url: https://charts.helm.sh/stable
- name: kokuwa
url: https://kokuwaio.github.io/helm-charts
- name: nginx
url: https://kubernetes.github.io/ingress-nginx
- name: eks
url: https://aws.github.io/eks-charts
- name: elastic
url: https://helm.elastic.co
- name: kubernetes-dashboard
url: https://kubernetes.github.io/dashboard/
- name: autoscaler
url: https://kubernetes.github.io/autoscaler/
- name: bitnami
url: https://charts.bitnami.com/bitnami
- name: influxdata
url: https://helm.influxdata.com
- name: actions-runner-controller
url: https://actions-runner-controller.github.io/actions-runner-controller
- name: jetstack
url: https://charts.jetstack.io
- name: prometheus-community
url: https://prometheus-community.github.io/helm-charts
- name: projectcalico
url: https://projectcalico.docs.tigera.io/charts
- name: braedon
url: https://braedon.github.io/helm
- name: core-dump-handler
url: https://ibm.github.io/core-dump-handler
environments:
azure:
values:
- monitoring:
namespace: monitoring
- domain: tip.4c74356b41.com
- storageClass: default
- autoscaler:
enabled: true
- ingress:
enabled: true
- elastic:
enabled: true
- kibana:
enabled: true
- prometheus:
enabled: true
- external-dns:
enabled: true
amazon-cicd:
secrets:
- secrets/influxdb.yaml
- secrets/sso.yaml
- secrets/alertmanager.yaml
- secrets/actions-runner-controller.yaml
- secrets/ucentral-ap-firmware-logstash.yaml
- secrets/core-dump-handler.yaml
values:
- eks:
clusterName: tip-wlan-main
region: us-east-2
accountID: 289708231103
hostedZoneId: cicd
certificateARNCICD: arn:aws:acm:us-east-2:289708231103:certificate/bfa89c7a-5b64-4a8a-bcfe-ffec655b5285
certificateARNLab: arn:aws:acm:us-east-2:289708231103:certificate/510429bd-1a3d-4c43-90ce-8e340795a888
- monitoring:
namespace: monitoring
publicNamespaces: ['openwifi-qa01', 'openwifi-dev01']
- domain: lab.wlan.tip.build
- storageClass: gp2
- autoscaler:
enabled: true
- ingress:
enabled: true
- elastic:
enabled: true
- kibana:
enabled: true
- logstash:
enabled: true
- prometheus:
enabled: true
- k8s-dashboard:
enabled: true
- metrics-server:
enabled: true
- external-dns:
enabled: true
- alb-ingress:
enabled: true
- node-termination-handler:
enabled: true
- influxdb:
enabled: true
- actions-runner-controller:
enabled: true
- cert-manager:
enabled: true
- calico:
enabled: true
- core-dump-handler:
enabled: true
helmDefaults:
force: false
timeout: 300
# dont seem to work
# wait: false
# recreatePods: true
# verify: true
templates:
default: &default
namespace: kube-system
missingFileHandler: Warn
cluster-autoscaler: &cluster-autoscaler
values:
- envs/common/cluster-autoscaler.yaml.gotmpl
external-dns: &external-dns
values:
- envs/common/external-dns.yaml.gotmpl
# core setup
releases:
- name: cluster-autoscaler
condition: autoscaler.enabled
<<: *default
<<: *cluster-autoscaler
chart: autoscaler/cluster-autoscaler
version: 9.11.0
labels:
role: setup
group: system
app: autoscaler
- name: external-dns
condition: external-dns.enabled
<<: *default
<<: *external-dns
chart: bitnami/external-dns
version: 6.1.0
labels:
role: setup
group: system
app: external-dns
- name: nginx-ingress
condition: ingress.enabled
<<: *default
chart: nginx/ingress-nginx
version: 4.2.0
labels:
role: setup
group: system
app: ingress
values:
- controller:
ingressClass: nginx-sso
service:
annotations:
service.beta.kubernetes.io/aws-load-balancer-ssl-cert: {{ .Environment.Values.eks.certificateARNLab }}
service.beta.kubernetes.io/aws-load-balancer-ssl-ports: https
service.beta.kubernetes.io/aws-load-balancer-type: elb
service.beta.kubernetes.io/aws-load-balancer-backend-protocol: tcp
targetPorts:
http: http
https: http
publishService:
enabled: true
metrics:
enabled: true
serviceMonitor:
enabled: true
additionalLabels:
release: prometheus-operator
- defaultBackend:
enabled: true
# monitoring
- name: prometheus-operator
condition: prometheus.enabled
namespace: {{ .Environment.Values.monitoring.namespace }}
chart: prometheus-community/kube-prometheus-stack
version: 41.5.1
labels:
role: setup
group: monitoring
app: prometheus-operator
values:
- nameOverride: prometheus-operator
- prometheus:
enabled: true
prometheusSpec:
retention: 31d
resources:
requests:
memory: 1400Mi
limits:
memory: 3000Mi
storageSpec:
volumeClaimTemplate:
spec:
storageClassName: gp2
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 50Gi
ingress:
enabled: true
annotations:
nginx.ingress.kubernetes.io/auth-url: "https://$host/oauth2/auth"
nginx.ingress.kubernetes.io/auth-signin: "https://$host/oauth2/start?rd=$escaped_request_uri"
kubernetes.io/ingress.class: nginx-sso
hosts:
- prometheus.{{ .Environment.Values.domain }}
- additionalPrometheusRulesMap:
testbed:
groups:
- name: Testbed alerts
rules:
- alert: Waiting pod
expr: sum(kube_pod_container_status_waiting_reason{namespace=~".*nola.*", reason!="ContainerCreating"}) by (namespace, pod) > 0
for: 5m
labels:
severity: warning
area: testbed
namespace: "{{`{{ $labels.namespace }}`}}"
pod: "{{`{{ $labels.pod }}`}}"
annotations:
title: Pod *{{`{{ $labels.namespace }}/{{ $labels.pod }}`}}* is in waiting state
description: The pod {{`{{ $labels.namespace }}/{{ $labels.pod }}`}} is in waiting state for at least 5 minutes. Please check for image pull issues.
- alert: Node not ready
expr: sum(kube_node_status_condition{condition="Ready", status!="true"}) by (node) > 0
for: 60m
labels:
severity: error
area: testbed
node: "{{`{{ $labels.node }}`}}"
annotations:
title: Node {{`{{ $labels.node }}`}} not becoming ready
description: The cluster node {{`{{ $labels.node }}`}} is not getting ready since 60 minutes. Please contact the cluster administrators.
- alert: Pod OOM killed
expr: (kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total offset 10m >= 1) and ignoring (reason) min_over_time(kube_pod_container_status_last_terminated_reason{reason="OOMKilled"}[10m]) == 1
for: 0m
labels:
severity: error
area: testbed
namespace: "{{`{{ $labels.namespace }}`}}"
pod: "{{`{{ $labels.pod }}`}}"
reason: "{{`{{ $labels.reason }}`}}"
annotations:
title: Pod *{{`{{ $labels.namespace }}/{{ $labels.pod }}`}}* has been OOM killed
description: The pod *{{`{{ $labels.namespace }}/{{ $labels.pod }}`}}* has been killed due to reaching its memory limit. Investigate the memory usage or increase the limit to prevent this.
- alert: Pod exited with a segfault
expr: (kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total offset 10m >= 1) and min_over_time(kube_pod_container_status_last_terminated_exitcode[10m]) == 139
for: 0m
labels:
severity: error
area: testbed
namespace: "{{`{{ $labels.namespace }}`}}"
pod: "{{`{{ $labels.pod }}`}}"
reason: "{{`{{ $labels.reason }}`}}"
annotations:
title: Pod *{{`{{ $labels.namespace }}/{{ $labels.pod }}`}}* exited with a segfault
description: The pod *{{`{{ $labels.namespace }}/{{ $labels.pod }}`}}* exited with a segmentation fault, please examine the coredump.
- alert: Node low on memory
expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 5
for: 1m
labels:
severity: warning
area: testbed
annotations:
title: Node {{`{{ $labels.instance }}`}} has very little memory capacity left
description: The cluster node {{`{{ $labels.instance }}`}} has less than 5% memory available.
- alert: Pod stuck in crash loop
expr: increase(kube_pod_container_status_restarts_total[1m]) > 3
for: 2m
labels:
severity: error
area: testbed
namespace: "{{`{{ $labels.namespace }}`}}"
pod: "{{`{{ $labels.pod }}`}}"
annotations:
title: Pod *{{`{{ $labels.namespace }}/{{ $labels.pod }}`}}* is crash looping
description: "The pod *{{`{{ $labels.namespace }}/{{ $labels.pod }}`}}* is crash looping. Please check its logs to see what is going on."
- alert: Pod restarted many times
expr: kube_pod_container_status_restarts_total > 5
labels:
severity: warning
area: testbed
namespace: "{{`{{ $labels.namespace }}`}}"
pod: "{{`{{ $labels.pod }}`}}"
annotations:
title: Pod *{{`{{ $labels.namespace }}/{{ $labels.pod }}`}}* was restarted many times
description: "The pod *{{`{{ $labels.namespace }}/{{ $labels.pod }}`}}* was restarted more than 5 times. Please check its logs to see what is going on."
- alert: PVC running out of space
expr: kubelet_volume_stats_available_bytes / kubelet_volume_stats_capacity_bytes * 100 < 20
labels:
severity: warning
area: testbed
namespace: "{{`{{ $labels.namespace }}`}}"
persistentvolumeclaim: "{{`{{ $labels.persistentvolumeclaim }}`}}"
annotations:
title: PVC *{{`{{ $labels.namespace }}/{{ $labels.persistentvolumeclaim }}`}}* has less than 20% free storage
description: "The PVC *{{`{{ $labels.namespace }}/{{ $labels.persistentvolumeclaim }}`}}* only has {{`{{ $value }}`}}% capacity left. Please increase its size or clean it up."
- alert: ElasticSearch new assert_violation errors found
expr: rate(assert_violation_errors_hits[1m]) > 0
labels:
severity: warning
area: testbed
service: elasticsearch
namespace: "{{`{{ $labels.namespace }}`}}"
annotations:
title: New ElasticSearch logs found with Assertion violation (WIFI-9824)
description: New ElasticSearch logs found with Assertion violation (WIFI-9824)
- grafana:
image:
repository: grafana/grafana
tag: 8.5.13
grafana.ini:
users:
viewers_can_edit: true
auth:
disable_login_form: true
disable_signout_menu: true
auth.anonymous:
enabled: true
org_role: Viewer
testFramework:
enabled: false
ingress:
enabled: true
annotations:
nginx.ingress.kubernetes.io/auth-url: "https://$host/oauth2/auth"
nginx.ingress.kubernetes.io/auth-signin: "https://$host/oauth2/start?rd=$escaped_request_uri"
kubernetes.io/ingress.class: nginx-sso
hosts:
- grafana.{{ .Environment.Values.domain }}
dashboardProviders:
dashboardproviders.yaml:
apiVersion: 1
providers:
- name: 'default'
orgId: 1
folder: imported
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards/default
dashboards:
default:
performance:
url: https://raw.githubusercontent.com/Telecominfraproject/wlan-testing/master/grafana/performance.json
qaDebugging:
url: https://raw.githubusercontent.com/Telecominfraproject/wlan-cloud-ucentral-analytics/main/grafana/qa-debugging.json
datasources:
datasources.yaml:
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
url: http://prometheus-operated:9090
access: proxy
isDefault: false
- name: InfluxDB
type: influxdb
access: proxy
url: https://influx.cicd.{{ .Environment.Values.domain }}
user: tip
secureJsonData:
token: {{ .Environment.Values.influxdb.adminUser.token }}
password: {{ .Environment.Values.influxdb.adminUser.password }}
jsonData:
version: Flux
organization: tip
defaultBucket: tip-cicd
- name: ES
type: elasticsearch
access: proxy
url: http://elasticsearch-client.monitoring.svc:9200
database: logstash-*
isDefault: false
jsonData:
esVersion: 6
timeField: '@timestamp'
logMessageField: message
logLevelField: fields.level
- alertmanager:
config:
global:
resolve_timeout: 1m
slack_api_url: {{ .Environment.Values.alertmanager.slack_api_url }}
route:
receiver: "null"
routes:
- match:
area: testbed
receiver: "slack-notifications"
receivers:
- name: "null"
- name: "slack-notifications"
slack_configs:
- channel: "#open-wifi-testbed-alerts"
send_resolved: true
icon_url: https://avatars3.githubusercontent.com/u/3380462
title: |-
{{- readFile "alertmanager-templates/title.tpl" | nindent 14 }}
text: >-
{{- readFile "alertmanager-templates/text.tpl" | nindent 14 }}
title_link: ""
- kube-state-metrics:
image:
repository: tip-tip-wlan-cloud-ucentral.jfrog.io/kube-state-metrics
tag: v2.6.0-tip20221103
- name: prometheus-operator-helper
condition: prometheus.enabled
namespace: {{ .Environment.Values.monitoring.namespace }}
chart: charts/standalone-monitoring
labels:
role: setup
group: monitoring
app: prometheus-operator
sub: helper
values:
- monitoring:
namespace: {{ .Environment.Values.monitoring.namespace }}
domain: {{ .Environment.Values.domain }}
- proxy:
namespace: kube-system
- name: prometheus-operator-ingress-auth
condition: prometheus.enabled
namespace: kube-system
chart: charts/sso
labels:
role: setup
group: monitoring
app: prometheus-operator
sub: oAuth
values:
- monitoring:
domain: example.com
namespace: {{ .Environment.Values.monitoring.namespace }}
- oidc:
issuerUrl: {{ .Environment.Values.sso.oidc.issuerUrl }}
clientId: {{ .Environment.Values.sso.oidc.clientId }}
clientSecret: {{ .Environment.Values.sso.oidc.clientSecret }}
cookieSecret: {{ .Environment.Values.sso.oidc.cookieSecret }}
- name: fluentd
condition: elastic.enabled
namespace: {{ .Environment.Values.monitoring.namespace }}
chart: kokuwa/fluentd-elasticsearch
version: 13.1.0
labels:
role: setup
group: monitoring
app: fluentd
values:
- elasticsearch:
serviceAccount:
create: true
hosts:
- elasticsearch-client.{{ .Environment.Values.monitoring.namespace }}.svc.cluster.local
- resources:
requests:
cpu: 100m
memory: 200Mi
limits:
cpu: 500m
memory: 300Mi
- tolerations:
- effect: NoSchedule
operator: Exists
- name: elasticsearch
condition: elastic.enabled
namespace: {{ .Environment.Values.monitoring.namespace }}
chart: charts/elasticsearch
labels:
role: setup
group: monitoring
app: elasticsearch
values:
- image:
repository: "docker.elastic.co/elasticsearch/elasticsearch"
tag: "6.8.22"
- client:
resources:
limits:
memory: 2Gi
requests:
memory: 1024Mi
heapSize: "1024m"
- master:
resources:
limits:
cpu: 200m
memory: 1000Mi
requests:
cpu: 200m
memory: 800Mi
- data:
persistence:
size: 650Gi
resources:
limits:
cpu: 3
memory: 4Gi
requests:
cpu: 1500m
memory: 4Gi
heapSize: "2048m"
readinessProbe:
timeoutSeconds: 30
- name: elasticsearch-curator
condition: elastic.enabled
namespace: {{ .Environment.Values.monitoring.namespace }}
chart: stable/elasticsearch-curator
labels:
role: setup
group: monitoring
app: elasticsearch
values:
- configMaps:
config_yml: |-
client:
hosts:
- http://elasticsearch-client.{{ .Environment.Values.monitoring.namespace }}.svc.cluster.local:9200
action_file_yml: |-
actions:
1:
action: delete_indices
description: "Delete old indices"
options:
ignore_empty_list: True
continue_if_exception: True
timeout_override: 300
filters:
- filtertype: pattern
kind: prefix
value: 'logstash-'
- filtertype: age
source: name
direction: older
timestring: '%Y.%m.%d'
unit: days
unit_count: 30
- name: kibana
condition: kibana.enabled
namespace: {{ .Environment.Values.monitoring.namespace }}
chart: charts/kibana
labels:
role: setup
group: monitoring
app: kibana
values:
- testFramework:
enabled: false
- image:
repository: "docker.elastic.co/kibana/kibana"
tag: "6.8.6"
- files:
kibana.yml:
elasticsearch.hosts: http://elasticsearch-client.{{ .Environment.Values.monitoring.namespace }}.svc.cluster.local:9200
- ingress:
enabled: true
annotations:
nginx.ingress.kubernetes.io/auth-url: "https://$host/oauth2/auth"
nginx.ingress.kubernetes.io/auth-signin: "https://$host/oauth2/start?rd=$escaped_request_uri"
kubernetes.io/ingress.class: nginx-sso
hosts:
- kibana.{{ .Environment.Values.domain }}
- lifecycle:
postStart:
exec:
command:
- bash
- -c
- |
#!/bin/bash
# Config the index_pattern
TEMPLATE_NAME="logstash"
INDEX_PATTERN="logstash-*"
KIBANA_URL=http://localhost:5601
# Wait until service is ready
while [[ "$(curl -s -o /dev/null -w '%{http_code}\n' $KIBANA_URL/app/kibana)" != "200" ]]; do sleep 1; done
# Apply default Index Pattern into Kibana
curl -X POST -v $KIBANA_URL/api/saved_objects/index-pattern/$TEMPLATE_NAME \
-H 'kbn-xsrf: true' -H 'Content-Type: application/json' \
-d '{"attributes": {"title": "'$INDEX_PATTERN'"}}'
- dashboardImport:
enabled: true
timeout: 60
basePath: ""
dashboards:
k8s-container-logs: |
{{- readFile "kibana-dashboards/k8s-container-logs.json" | nindent 10 }}
- name: logstash
condition: logstash.enabled
namespace: {{ .Environment.Values.monitoring.namespace }}
chart: elastic/logstash
version: 7.16.2
labels:
role: setup
group: monitoring
app: logstash
values:
- image: docker.elastic.co/logstash/logstash
- replicas: 1
- resources:
limits:
memory: 2Gi
requests:
memory: 2Gi
- logstashConfig:
logstash.yml: |
http.host: 0.0.0.0
log.level: warn
- logstashPipeline:
logstash.conf: "" # override default pipeline
ucentral-kafka.conf: |
input {
kafka {
bootstrap_servers => "kafka-headless.openwifi-dev01.svc.cluster.local:9092"
topics => ["state", "healthcheck", "device_event_queue"]
tags => ["openwifi-kafka"]
#codec => json
decorate_events => true
auto_offset_reset => "latest"
client_id => "openwifi-dev01"
add_field => { "instance" => "openwifi-dev01" }
}
kafka {
bootstrap_servers => "kafka-headless.openwifi-qa01.svc.cluster.local:9092"
topics => ["state", "healthcheck", "device_event_queue"]
tags => ["openwifi-kafka"]
decorate_events => true
auto_offset_reset => "latest"
client_id => "openwifi-qa01"
add_field => { "instance" => "openwifi-qa01" }
}
}
filter {
if "openwifi-kafka" in [tags] {
mutate { copy => { "[@metadata][kafka]" => "kafka" } }
}
}
filter {
if "openwifi-kafka" in [tags] {
json {
source => "message"
remove_field => [ "message" ]
}
}
}
filter {
if ([payload][state][unit][load]) {
mutate {
add_field => { "[payload][state][unit][load1]" => "%{[payload][state][unit][load][0]}" }
add_field => { "[payload][state][unit][load5]" => "%{[payload][state][unit][load][1]}" }
add_field => { "[payload][state][unit][load15]" => "%{[payload][state][unit][load][2]}" }
remove_field => [ "[payload][state][unit][load]" ]
}
mutate {
convert => {
"[payload][state][unit][load1]" => "integer"
"[payload][state][unit][load5]" => "integer"
"[payload][state][unit][load15]" => "integer"
}
}
}
}
output {
if "openwifi-kafka" in [tags] {
elasticsearch {
hosts => "http://elasticsearch-client.{{ .Environment.Values.monitoring.namespace }}.svc.cluster.local:9200"
index => "logstash-ucentral-%{+YYYY.MM.dd}"
}
}
}
ucentral-ap-firmware.conf: |
input {
s3 {
access_key_id => "{{ .Environment.Values.ucentral_ap_firmware_logstash.access_key_id }}"
secret_access_key => "{{ .Environment.Values.ucentral_ap_firmware_logstash.secret_access_key }}"
bucket => "ucentral-ap-firmware-logs"
delete => true
region => "us-east-1"
tags => ["ucentral-ap-firmware"]
additional_settings => {
force_path_style => true
follow_redirects => false
}
}
}
filter {
if "ucentral-ap-firmware" in [tags] {
grok {
match => { "message" => "%{S3_ACCESS_LOG}" }
remove_field => ["message"]
}
date {
match => [ "timestamp", "dd/MMM/yyyy:HH:mm:ss Z" ]
remove_field => ["timestamp"]
}
geoip {
source => "clientip"
}
if [operation] == "REST.GET.OBJECT" and [key] =~ /^*\.bin$/ {
grok {
match => { "key" => "%{UPLOAD_DATE:upload_date}-%{AP_MODEL:ap_model}-%{GREEDYDATA:branch_name}-%{GIT_REV:git_rev}-%{WORD:suffix}.%{WORD:file_extension}" }
pattern_definitions => {
"UPLOAD_DATE" => "%{YEAR}%{MONTHNUM}%{MONTHDAY}"
"AP_MODEL" => "[a-z0-9]+_[a-z0-9]+(-|_)?[a-z0-9]+(-|_)?[a-z0-9]+"
"GIT_REV" => "[a-z0-9]{4,8}"
}
add_field => { "timestamp_clientip" => "%{@timestamp} %{clientip}" }
}
}
}
}
output {
if "ucentral-ap-firmware" in [tags] {
elasticsearch {
hosts => "http://elasticsearch-client.{{ .Environment.Values.monitoring.namespace }}.svc.cluster.local:9200"
index => "logstash-ucentral-ap-firmware-%{+YYYY.MM.dd}"
}
}
}
syslog.conf: |
input {
syslog {
tags => ["ucentral-syslog"]
port => 5514
grok_pattern => "(?:<%{POSINT:priority}>%{SYSLOGLINE}|%{MONTH} %{MONTHDAY} %{TIME} %{DATA:docker.container_name}/%{DATA:github.run_number}\[%{INT:undefined_number}\]: %{GREEDYDATA:message})"
}
}
filter {
if ([undefined_number]) {
mutate {
remove_field => [ "undefined_number" ]
}
}
}
output {
if "ucentral-syslog" in [tags] {
elasticsearch {
hosts => "http://elasticsearch-client.{{ .Environment.Values.monitoring.namespace }}.svc.cluster.local:9200"
index => "logstash-%{+YYYY.MM.dd}"
document_type => "_doc"
}
}
}
- service:
type: LoadBalancer
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: "nlb-ip"
service.beta.kubernetes.io/aws-load-balancer-internal: "true"
service.beta.kubernetes.io/aws-load-balancer-private-ipv4-addresses: "10.10.10.40,10.10.11.40,10.10.12.40"
ports:
- name: syslog
port: 514
targetPort: 5514
protocol: TCP
- name: k8s-dashboard-roles
condition: k8s-dashboard.enabled
namespace: {{ .Environment.Values.monitoring.namespace }}
chart: charts/k8s-dashboard-roles
labels:
role: setup
group: monitoring
app: k8s-dashboard
- name: k8s-dashboard
condition: k8s-dashboard.enabled
namespace: {{ .Environment.Values.monitoring.namespace }}
chart: kubernetes-dashboard/kubernetes-dashboard
version: 5.0.5
labels:
role: setup
group: monitoring
app: k8s-dashboard
values:
- settings:
defaultNamespace: {{ index .Environment.Values.monitoring.publicNamespaces 0 }}
namespaceFallbackList: {{ .Environment.Values.monitoring.publicNamespaces | toJson }}
itemsPerPage: 25
clusterName: TIP WLAN CI/CD
- extraArgs:
- --enable-skip-login
- --system-banner=Welcome to the TIP WLAN CI/CD Kubernetes cluster. If you are missing your namespace in the above select box, please <a href="https://telecominfraproject.atlassian.net/browse/WIFI">create a ticket</a>.
- rbac:
create: false
clusterRoleMetrics: true
clusterReadOnlyRole: false
- service:
type: NodePort
externalPort: 80
- protocolHttp: true
- ingress:
enabled: true
paths:
- /
- /*
annotations:
#alb.ingress.kubernetes.io/actions.ssl-redirect: '{"Type": "redirect", "RedirectConfig": { "Protocol": "HTTPS", "Port": "443", "StatusCode": "HTTP_302"}}'
#alb.ingress.kubernetes.io/group.name: wlan-cicd
#alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}, {"HTTPS": 443}]'
#alb.ingress.kubernetes.io/scheme: internet-facing
#kubernetes.io/ingress.class: alb
nginx.ingress.kubernetes.io/auth-url: "https://$host/oauth2/auth"
nginx.ingress.kubernetes.io/auth-signin: "https://$host/oauth2/start?rd=$escaped_request_uri"
kubernetes.io/ingress.class: nginx-sso
hosts:
- k8s-dashboard.{{ .Environment.Values.domain }}
- name: metrics-server
condition: metrics-server.enabled
namespace: {{ .Environment.Values.monitoring.namespace }}
chart: bitnami/metrics-server
version: 5.10.13
labels:
role: setup
group: monitoring
app: metrics-server
values:
- apiService:
create: true
- name: aws-load-balancer-controller
<<: *default
condition: alb-ingress.enabled
chart: eks/aws-load-balancer-controller
version: 1.4.2
labels:
role: setup
group: system
app: aws-load-balancer-controller
values:
- serviceAccount:
annotations:
eks.amazonaws.com/role-arn: arn:aws:iam::{{ .Values.eks.accountID }}:role/{{ .Values.eks.clusterName }}-alb-ingress
clusterName: {{ .Values.eks.clusterName }}
enableShield: false
enableWaf: false
enableWafv2: false
logLevel: info
- name: aws-node-termination-handler
<<: *default
condition: node-termination-handler.enabled
chart: eks/aws-node-termination-handler
version: 0.16.0
labels:
role: setup
group: system
app: node-termination-handler
values:
- deleteLocalData: true
- ignoreDaemonSets: true
- podTerminationGracePeriod: -1 # use values defined in Pod
- name: influxdb
namespace: test-bss
chart: influxdata/influxdb2
version: 2.0.3
condition: influxdb.enabled
labels:
role: setup
group: load-testing
app: influxdb
task: tools-133
values:
- image:
tag: 2.0.6-alpine
- adminUser:
organization: tip
bucket: tip-cicd
user: tip
password: {{ .Environment.Values.influxdb.adminUser.password }}
token: {{ .Environment.Values.influxdb.adminUser.token }}
retention_policy: "0s"
- persistence:
storageClass: gp2
size: 10Gi
- service:
type: NodePort
- resources:
limits:
cpu: 500m
memory: 500Mi
requests:
cpu: 500m
memory: 500Mi
- ingress:
enabled: true
annotations:
kubernetes.io/ingress.class: alb
alb.ingress.kubernetes.io/scheme: internet-facing
alb.ingress.kubernetes.io/group.name: test-bss-load-testing
alb.ingress.kubernetes.io/certificate-arn: {{ .Environment.Values.eks.certificateARNCICD }}
alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}, {"HTTPS": 443}]'
alb.ingress.kubernetes.io/actions.ssl-redirect: '{"Type": "redirect", "RedirectConfig": { "Protocol": "HTTPS", "Port": "443", "StatusCode": "HTTP_302"}}'
alb.ingress.kubernetes.io/healthcheck-path: /health
external-dns.alpha.kubernetes.io/hostname: influx.cicd.{{ .Environment.Values.domain }}
hostname: influx.cicd.{{ .Environment.Values.domain }}
path: "/*"
- name: actions-runner-controller
condition: actions-runner-controller.enabled
namespace: actions-runner-controller
chart: actions-runner-controller/actions-runner-controller
version: 0.19.1
labels:
app: actions-runner-controller
values:
- authSecret:
create: true
github_token: {{ .Environment.Values.actions_runner_controller.github_token }}
- image:
pullPolicy: IfNotPresent
- githubWebhookServer:
enabled: true
secret:
github_webhook_secret_token: {{ .Environment.Values.actions_runner_controller.webhook_secret }}
service:
type: NodePort
ingress:
enabled: true
hosts:
- host: "ghac-webhook.cicd.lab.wlan.tip.build"
paths:
- path: /*
pathType: ImplementationSpecific
annotations:
kubernetes.io/ingress.class: alb
alb.ingress.kubernetes.io/scheme: internet-facing
alb.ingress.kubernetes.io/group.name: wlan-cicd
alb.ingress.kubernetes.io/certificate-arn: {{ .Environment.Values.eks.certificateARNCICD }}
alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}, {"HTTPS": 443}]'
alb.ingress.kubernetes.io/actions.ssl-redirect: '{"Type": "redirect", "RedirectConfig": { "Protocol": "HTTPS", "Port": "443", "StatusCode": "HTTP_302"}}'
- name: actions-runner-controller-addon
condition: actions-runner-controller.enabled
namespace: actions-runner-controller
chart: charts/actions-runner-controller-addon
labels:
app: actions-runner-controller
- name: cert-manager
condition: cert-manager.enabled
namespace: kube-system
chart: jetstack/cert-manager
version: v1.6.1
labels:
app: cert-manager
values:
- installCRDs: true
- webhook:
resources:
requests:
cpu: 500m
memory: 150Mi
limits:
cpu: 500m
memory: 150Mi
- name: calico
condition: calico.enabled
chart: projectcalico/tigera-operator
version: v3.22.2
namespace: kube-system
labels:
app: calico
- name: github-actions-network-policies
condition: calico.enabled
namespace: actions-runner-controller
chart: charts/github-actions-network-policies
labels:
role: setup
group: networking
app: github-actions-network-policies
sub1: calico
sub2: actions-runner-controller-addon
- name: elasticsearch-exporter
condition: elastic.enabled
namespace: {{ .Environment.Values.monitoring.namespace }}
chart: braedon/prometheus-es-exporter
version: 0.2.0
labels:
group: monitoring
app: elasticsearch-exporter
values:
- image:
tag: 0.14.1
- elasticsearch:
cluster: elasticsearch-client.{{ .Environment.Values.monitoring.namespace }}.svc.cluster.local:9200
queries: |-
[DEFAULT]
QueryIntervalSecs = 15
QueryTimeoutSecs = 10
QueryIndices = _all
QueryOnError = drop
QueryOnMissing = drop
[query_assert_violation_errors]
QueryOnError = preserve
QueryOnMissing = zero
QueryJson = {
"query": {
"bool": {
"filter": {
"bool": {
"must": [
{
"match": {
"kubernetes.namespace_name.keyword": "openwifi-qa01"
}
},
{
"match": {
"kubernetes.container_name.keyword": "owgw"
}
}
]
}
},
"must": {
"match": {
"message": "Assertion violation: !_pStream"
}
}
}
},
"aggs": {
"agg_terms_kubernetes.container_name.keyword": {
"terms": {
"field": "kubernetes.container_name.keyword"
}
}
}
}
- name: core-dump-handler
condition: core-dump-handler.enabled
chart: core-dump-handler/core-dump-handler
version: v8.6.0
namespace: ibm-observe
labels:
app: core-dump-handler
values:
- daemonset:
s3BucketName: openwifi-core-dumps
s3AccessKey: {{ .Environment.Values.core_dump_handler.s3_access_key }}
s3Secret: {{ .Environment.Values.core_dump_handler.s3_secret }}
s3Region: us-east-1
includeCrioExe: true
vendor: rhel7