mirror of
https://github.com/Telecominfraproject/wlan-toolsmith.git
synced 2025-10-29 10:02:33 +00:00
1098 lines
35 KiB
YAML
1098 lines
35 KiB
YAML
repositories:
|
|
- name: stable
|
|
url: https://charts.helm.sh/stable
|
|
- name: kokuwa
|
|
url: https://kokuwaio.github.io/helm-charts
|
|
- name: nginx
|
|
url: https://kubernetes.github.io/ingress-nginx
|
|
- name: eks
|
|
url: https://aws.github.io/eks-charts
|
|
- name: elastic
|
|
url: https://helm.elastic.co
|
|
- name: kubernetes-dashboard
|
|
url: https://kubernetes.github.io/dashboard/
|
|
- name: autoscaler
|
|
url: https://kubernetes.github.io/autoscaler/
|
|
- name: bitnami
|
|
url: https://charts.bitnami.com/bitnami
|
|
- name: influxdata
|
|
url: https://helm.influxdata.com
|
|
- name: actions-runner-controller
|
|
url: https://actions-runner-controller.github.io/actions-runner-controller
|
|
- name: jetstack
|
|
url: https://charts.jetstack.io
|
|
- name: prometheus-community
|
|
url: https://prometheus-community.github.io/helm-charts
|
|
- name: projectcalico
|
|
url: https://projectcalico.docs.tigera.io/charts
|
|
- name: braedon
|
|
url: https://braedon.github.io/helm
|
|
- name: core-dump-handler
|
|
url: https://ibm.github.io/core-dump-handler
|
|
|
|
environments:
|
|
azure:
|
|
values:
|
|
- monitoring:
|
|
namespace: monitoring
|
|
- domain: tip.4c74356b41.com
|
|
- storageClass: default
|
|
- autoscaler:
|
|
enabled: true
|
|
- ingress:
|
|
enabled: true
|
|
- elastic:
|
|
enabled: true
|
|
- kibana:
|
|
enabled: true
|
|
- prometheus:
|
|
enabled: true
|
|
- external-dns:
|
|
enabled: true
|
|
|
|
amazon-cicd:
|
|
secrets:
|
|
- secrets/influxdb.yaml
|
|
- secrets/sso.yaml
|
|
- secrets/alertmanager.yaml
|
|
- secrets/actions-runner-controller.yaml
|
|
- secrets/ucentral-ap-firmware-logstash.yaml
|
|
- secrets/core-dump-handler.yaml
|
|
values:
|
|
- eks:
|
|
clusterName: tip-wlan-main
|
|
region: us-east-2
|
|
accountID: 289708231103
|
|
hostedZoneId: cicd
|
|
certificateARNCICD: arn:aws:acm:us-east-2:289708231103:certificate/bfa89c7a-5b64-4a8a-bcfe-ffec655b5285
|
|
certificateARNLab: arn:aws:acm:us-east-2:289708231103:certificate/510429bd-1a3d-4c43-90ce-8e340795a888
|
|
- monitoring:
|
|
namespace: monitoring
|
|
publicNamespaces: ['openwifi-qa01', 'openwifi-dev01']
|
|
- domain: lab.wlan.tip.build
|
|
- storageClass: gp2
|
|
- autoscaler:
|
|
enabled: true
|
|
- ingress:
|
|
enabled: true
|
|
- elastic:
|
|
enabled: true
|
|
- kibana:
|
|
enabled: true
|
|
- logstash:
|
|
enabled: true
|
|
- prometheus:
|
|
enabled: true
|
|
- k8s-dashboard:
|
|
enabled: true
|
|
- metrics-server:
|
|
enabled: true
|
|
- external-dns:
|
|
enabled: true
|
|
- alb-ingress:
|
|
enabled: true
|
|
- node-termination-handler:
|
|
enabled: true
|
|
- influxdb:
|
|
enabled: true
|
|
- actions-runner-controller:
|
|
enabled: true
|
|
- cert-manager:
|
|
enabled: true
|
|
- calico:
|
|
enabled: true
|
|
- core-dump-handler:
|
|
enabled: true
|
|
|
|
helmDefaults:
|
|
force: false
|
|
timeout: 300
|
|
# dont seem to work
|
|
# wait: false
|
|
# recreatePods: true
|
|
# verify: true
|
|
|
|
templates:
|
|
default: &default
|
|
namespace: kube-system
|
|
missingFileHandler: Warn
|
|
cluster-autoscaler: &cluster-autoscaler
|
|
values:
|
|
- envs/common/cluster-autoscaler.yaml.gotmpl
|
|
external-dns: &external-dns
|
|
values:
|
|
- envs/common/external-dns.yaml.gotmpl
|
|
|
|
# core setup
|
|
releases:
|
|
- name: cluster-autoscaler
|
|
condition: autoscaler.enabled
|
|
<<: *default
|
|
<<: *cluster-autoscaler
|
|
chart: autoscaler/cluster-autoscaler
|
|
version: 9.11.0
|
|
labels:
|
|
role: setup
|
|
group: system
|
|
app: autoscaler
|
|
|
|
- name: external-dns
|
|
condition: external-dns.enabled
|
|
<<: *default
|
|
<<: *external-dns
|
|
chart: bitnami/external-dns
|
|
version: 6.1.0
|
|
labels:
|
|
role: setup
|
|
group: system
|
|
app: external-dns
|
|
|
|
- name: nginx-ingress
|
|
condition: ingress.enabled
|
|
<<: *default
|
|
chart: nginx/ingress-nginx
|
|
version: 4.2.0
|
|
labels:
|
|
role: setup
|
|
group: system
|
|
app: ingress
|
|
values:
|
|
- controller:
|
|
ingressClass: nginx-sso
|
|
service:
|
|
annotations:
|
|
service.beta.kubernetes.io/aws-load-balancer-ssl-cert: {{ .Environment.Values.eks.certificateARNLab }}
|
|
service.beta.kubernetes.io/aws-load-balancer-ssl-ports: https
|
|
service.beta.kubernetes.io/aws-load-balancer-type: elb
|
|
service.beta.kubernetes.io/aws-load-balancer-backend-protocol: tcp
|
|
targetPorts:
|
|
http: http
|
|
https: http
|
|
publishService:
|
|
enabled: true
|
|
metrics:
|
|
enabled: true
|
|
serviceMonitor:
|
|
enabled: true
|
|
additionalLabels:
|
|
release: prometheus-operator
|
|
- defaultBackend:
|
|
enabled: true
|
|
|
|
# monitoring
|
|
- name: prometheus-operator
|
|
condition: prometheus.enabled
|
|
namespace: {{ .Environment.Values.monitoring.namespace }}
|
|
chart: prometheus-community/kube-prometheus-stack
|
|
version: 41.5.1
|
|
labels:
|
|
role: setup
|
|
group: monitoring
|
|
app: prometheus-operator
|
|
values:
|
|
- nameOverride: prometheus-operator
|
|
- prometheus:
|
|
enabled: true
|
|
prometheusSpec:
|
|
retention: 31d
|
|
resources:
|
|
requests:
|
|
memory: 1400Mi
|
|
limits:
|
|
memory: 3000Mi
|
|
storageSpec:
|
|
volumeClaimTemplate:
|
|
spec:
|
|
storageClassName: gp2
|
|
accessModes: ["ReadWriteOnce"]
|
|
resources:
|
|
requests:
|
|
storage: 50Gi
|
|
ingress:
|
|
enabled: true
|
|
annotations:
|
|
nginx.ingress.kubernetes.io/auth-url: "https://$host/oauth2/auth"
|
|
nginx.ingress.kubernetes.io/auth-signin: "https://$host/oauth2/start?rd=$escaped_request_uri"
|
|
kubernetes.io/ingress.class: nginx-sso
|
|
hosts:
|
|
- prometheus.{{ .Environment.Values.domain }}
|
|
- additionalPrometheusRulesMap:
|
|
testbed:
|
|
groups:
|
|
- name: Testbed alerts
|
|
rules:
|
|
- alert: Waiting pod
|
|
expr: sum(kube_pod_container_status_waiting_reason{namespace=~".*nola.*", reason!="ContainerCreating"}) by (namespace, pod) > 0
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
area: testbed
|
|
namespace: "{{`{{ $labels.namespace }}`}}"
|
|
pod: "{{`{{ $labels.pod }}`}}"
|
|
annotations:
|
|
title: Pod *{{`{{ $labels.namespace }}/{{ $labels.pod }}`}}* is in waiting state
|
|
description: The pod {{`{{ $labels.namespace }}/{{ $labels.pod }}`}} is in waiting state for at least 5 minutes. Please check for image pull issues.
|
|
- alert: Node not ready
|
|
expr: sum(kube_node_status_condition{condition="Ready", status!="true"}) by (node) > 0
|
|
for: 60m
|
|
labels:
|
|
severity: error
|
|
area: testbed
|
|
node: "{{`{{ $labels.node }}`}}"
|
|
annotations:
|
|
title: Node {{`{{ $labels.node }}`}} not becoming ready
|
|
description: The cluster node {{`{{ $labels.node }}`}} is not getting ready since 60 minutes. Please contact the cluster administrators.
|
|
- alert: Pod OOM killed
|
|
expr: (kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total offset 10m >= 1) and ignoring (reason) min_over_time(kube_pod_container_status_last_terminated_reason{reason="OOMKilled"}[10m]) == 1
|
|
for: 0m
|
|
labels:
|
|
severity: error
|
|
area: testbed
|
|
namespace: "{{`{{ $labels.namespace }}`}}"
|
|
pod: "{{`{{ $labels.pod }}`}}"
|
|
reason: "{{`{{ $labels.reason }}`}}"
|
|
annotations:
|
|
title: Pod *{{`{{ $labels.namespace }}/{{ $labels.pod }}`}}* has been OOM killed
|
|
description: The pod *{{`{{ $labels.namespace }}/{{ $labels.pod }}`}}* has been killed due to reaching its memory limit. Investigate the memory usage or increase the limit to prevent this.
|
|
- alert: Pod exited with a segfault
|
|
expr: (kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total offset 10m >= 1) and min_over_time(kube_pod_container_status_last_terminated_exitcode[10m]) == 139
|
|
for: 0m
|
|
labels:
|
|
severity: error
|
|
area: testbed
|
|
namespace: "{{`{{ $labels.namespace }}`}}"
|
|
pod: "{{`{{ $labels.pod }}`}}"
|
|
reason: "{{`{{ $labels.reason }}`}}"
|
|
annotations:
|
|
title: Pod *{{`{{ $labels.namespace }}/{{ $labels.pod }}`}}* exited with a segfault
|
|
description: The pod *{{`{{ $labels.namespace }}/{{ $labels.pod }}`}}* exited with a segmentation fault, please examine the coredump.
|
|
- alert: Node low on memory
|
|
expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 5
|
|
for: 1m
|
|
labels:
|
|
severity: warning
|
|
area: testbed
|
|
annotations:
|
|
title: Node {{`{{ $labels.instance }}`}} has very little memory capacity left
|
|
description: The cluster node {{`{{ $labels.instance }}`}} has less than 5% memory available.
|
|
- alert: Pod stuck in crash loop
|
|
expr: increase(kube_pod_container_status_restarts_total[1m]) > 3
|
|
for: 2m
|
|
labels:
|
|
severity: error
|
|
area: testbed
|
|
namespace: "{{`{{ $labels.namespace }}`}}"
|
|
pod: "{{`{{ $labels.pod }}`}}"
|
|
annotations:
|
|
title: Pod *{{`{{ $labels.namespace }}/{{ $labels.pod }}`}}* is crash looping
|
|
description: "The pod *{{`{{ $labels.namespace }}/{{ $labels.pod }}`}}* is crash looping. Please check its logs to see what is going on."
|
|
- alert: Pod restarted many times
|
|
expr: kube_pod_container_status_restarts_total > 5
|
|
labels:
|
|
severity: warning
|
|
area: testbed
|
|
namespace: "{{`{{ $labels.namespace }}`}}"
|
|
pod: "{{`{{ $labels.pod }}`}}"
|
|
annotations:
|
|
title: Pod *{{`{{ $labels.namespace }}/{{ $labels.pod }}`}}* was restarted many times
|
|
description: "The pod *{{`{{ $labels.namespace }}/{{ $labels.pod }}`}}* was restarted more than 5 times. Please check its logs to see what is going on."
|
|
- alert: PVC running out of space
|
|
expr: kubelet_volume_stats_available_bytes / kubelet_volume_stats_capacity_bytes * 100 < 20
|
|
labels:
|
|
severity: warning
|
|
area: testbed
|
|
namespace: "{{`{{ $labels.namespace }}`}}"
|
|
persistentvolumeclaim: "{{`{{ $labels.persistentvolumeclaim }}`}}"
|
|
annotations:
|
|
title: PVC *{{`{{ $labels.namespace }}/{{ $labels.persistentvolumeclaim }}`}}* has less than 20% free storage
|
|
description: "The PVC *{{`{{ $labels.namespace }}/{{ $labels.persistentvolumeclaim }}`}}* only has {{`{{ $value }}`}}% capacity left. Please increase its size or clean it up."
|
|
- alert: ElasticSearch new assert_violation errors found
|
|
expr: rate(assert_violation_errors_hits[1m]) > 0
|
|
labels:
|
|
severity: warning
|
|
area: testbed
|
|
service: elasticsearch
|
|
namespace: "{{`{{ $labels.namespace }}`}}"
|
|
annotations:
|
|
title: New ElasticSearch logs found with Assertion violation (WIFI-9824)
|
|
description: New ElasticSearch logs found with Assertion violation (WIFI-9824)
|
|
|
|
- grafana:
|
|
image:
|
|
repository: grafana/grafana
|
|
tag: 8.5.13
|
|
grafana.ini:
|
|
users:
|
|
viewers_can_edit: true
|
|
auth:
|
|
disable_login_form: true
|
|
disable_signout_menu: true
|
|
auth.anonymous:
|
|
enabled: true
|
|
org_role: Viewer
|
|
testFramework:
|
|
enabled: false
|
|
ingress:
|
|
enabled: true
|
|
annotations:
|
|
nginx.ingress.kubernetes.io/auth-url: "https://$host/oauth2/auth"
|
|
nginx.ingress.kubernetes.io/auth-signin: "https://$host/oauth2/start?rd=$escaped_request_uri"
|
|
kubernetes.io/ingress.class: nginx-sso
|
|
hosts:
|
|
- grafana.{{ .Environment.Values.domain }}
|
|
dashboardProviders:
|
|
dashboardproviders.yaml:
|
|
apiVersion: 1
|
|
providers:
|
|
- name: 'default'
|
|
orgId: 1
|
|
folder: imported
|
|
type: file
|
|
disableDeletion: false
|
|
editable: true
|
|
options:
|
|
path: /var/lib/grafana/dashboards/default
|
|
|
|
dashboards:
|
|
default:
|
|
performance:
|
|
url: https://raw.githubusercontent.com/Telecominfraproject/wlan-testing/master/grafana/performance.json
|
|
qaDebugging:
|
|
url: https://raw.githubusercontent.com/Telecominfraproject/wlan-cloud-ucentral-analytics/main/grafana/qa-debugging.json
|
|
|
|
datasources:
|
|
datasources.yaml:
|
|
apiVersion: 1
|
|
datasources:
|
|
- name: Prometheus
|
|
type: prometheus
|
|
url: http://prometheus-operated:9090
|
|
access: proxy
|
|
isDefault: false
|
|
- name: InfluxDB
|
|
type: influxdb
|
|
access: proxy
|
|
url: https://influx.cicd.{{ .Environment.Values.domain }}
|
|
user: tip
|
|
secureJsonData:
|
|
token: {{ .Environment.Values.influxdb.adminUser.token }}
|
|
password: {{ .Environment.Values.influxdb.adminUser.password }}
|
|
jsonData:
|
|
version: Flux
|
|
organization: tip
|
|
defaultBucket: tip-cicd
|
|
- name: ES
|
|
type: elasticsearch
|
|
access: proxy
|
|
url: http://elasticsearch-client.monitoring.svc:9200
|
|
database: logstash-*
|
|
isDefault: false
|
|
jsonData:
|
|
esVersion: 6
|
|
timeField: '@timestamp'
|
|
logMessageField: message
|
|
logLevelField: fields.level
|
|
|
|
- alertmanager:
|
|
config:
|
|
global:
|
|
resolve_timeout: 1m
|
|
slack_api_url: {{ .Environment.Values.alertmanager.slack_api_url }}
|
|
|
|
route:
|
|
receiver: "null"
|
|
routes:
|
|
- match:
|
|
area: testbed
|
|
receiver: "slack-notifications"
|
|
|
|
receivers:
|
|
- name: "null"
|
|
- name: "slack-notifications"
|
|
slack_configs:
|
|
- channel: "#open-wifi-testbed-alerts"
|
|
send_resolved: true
|
|
icon_url: https://avatars3.githubusercontent.com/u/3380462
|
|
title: |-
|
|
{{- readFile "alertmanager-templates/title.tpl" | nindent 14 }}
|
|
text: >-
|
|
{{- readFile "alertmanager-templates/text.tpl" | nindent 14 }}
|
|
title_link: ""
|
|
|
|
- kube-state-metrics:
|
|
image:
|
|
repository: tip-tip-wlan-cloud-ucentral.jfrog.io/kube-state-metrics
|
|
tag: v2.6.0-tip20221103
|
|
|
|
|
|
- name: prometheus-operator-helper
|
|
condition: prometheus.enabled
|
|
namespace: {{ .Environment.Values.monitoring.namespace }}
|
|
chart: charts/standalone-monitoring
|
|
labels:
|
|
role: setup
|
|
group: monitoring
|
|
app: prometheus-operator
|
|
sub: helper
|
|
values:
|
|
- monitoring:
|
|
namespace: {{ .Environment.Values.monitoring.namespace }}
|
|
domain: {{ .Environment.Values.domain }}
|
|
- proxy:
|
|
namespace: kube-system
|
|
|
|
- name: prometheus-operator-ingress-auth
|
|
condition: prometheus.enabled
|
|
namespace: kube-system
|
|
chart: charts/sso
|
|
labels:
|
|
role: setup
|
|
group: monitoring
|
|
app: prometheus-operator
|
|
sub: oAuth
|
|
values:
|
|
- monitoring:
|
|
domain: example.com
|
|
namespace: {{ .Environment.Values.monitoring.namespace }}
|
|
- oidc:
|
|
issuerUrl: {{ .Environment.Values.sso.oidc.issuerUrl }}
|
|
clientId: {{ .Environment.Values.sso.oidc.clientId }}
|
|
clientSecret: {{ .Environment.Values.sso.oidc.clientSecret }}
|
|
cookieSecret: {{ .Environment.Values.sso.oidc.cookieSecret }}
|
|
|
|
- name: fluentd
|
|
condition: elastic.enabled
|
|
namespace: {{ .Environment.Values.monitoring.namespace }}
|
|
chart: kokuwa/fluentd-elasticsearch
|
|
version: 13.1.0
|
|
labels:
|
|
role: setup
|
|
group: monitoring
|
|
app: fluentd
|
|
values:
|
|
- elasticsearch:
|
|
serviceAccount:
|
|
create: true
|
|
hosts:
|
|
- elasticsearch-client.{{ .Environment.Values.monitoring.namespace }}.svc.cluster.local
|
|
- resources:
|
|
requests:
|
|
cpu: 100m
|
|
memory: 200Mi
|
|
limits:
|
|
cpu: 500m
|
|
memory: 300Mi
|
|
- tolerations:
|
|
- effect: NoSchedule
|
|
operator: Exists
|
|
|
|
- name: elasticsearch
|
|
condition: elastic.enabled
|
|
namespace: {{ .Environment.Values.monitoring.namespace }}
|
|
chart: charts/elasticsearch
|
|
labels:
|
|
role: setup
|
|
group: monitoring
|
|
app: elasticsearch
|
|
values:
|
|
- image:
|
|
repository: "docker.elastic.co/elasticsearch/elasticsearch"
|
|
tag: "6.8.22"
|
|
- client:
|
|
resources:
|
|
limits:
|
|
memory: 2Gi
|
|
requests:
|
|
memory: 1024Mi
|
|
heapSize: "1024m"
|
|
|
|
- master:
|
|
resources:
|
|
limits:
|
|
cpu: 200m
|
|
memory: 1000Mi
|
|
requests:
|
|
cpu: 200m
|
|
memory: 800Mi
|
|
|
|
- data:
|
|
persistence:
|
|
size: 650Gi
|
|
resources:
|
|
limits:
|
|
cpu: 3
|
|
memory: 4Gi
|
|
requests:
|
|
cpu: 1500m
|
|
memory: 4Gi
|
|
heapSize: "2048m"
|
|
readinessProbe:
|
|
timeoutSeconds: 30
|
|
|
|
- name: elasticsearch-curator
|
|
condition: elastic.enabled
|
|
namespace: {{ .Environment.Values.monitoring.namespace }}
|
|
chart: stable/elasticsearch-curator
|
|
labels:
|
|
role: setup
|
|
group: monitoring
|
|
app: elasticsearch
|
|
values:
|
|
- configMaps:
|
|
config_yml: |-
|
|
client:
|
|
hosts:
|
|
- http://elasticsearch-client.{{ .Environment.Values.monitoring.namespace }}.svc.cluster.local:9200
|
|
action_file_yml: |-
|
|
actions:
|
|
1:
|
|
action: delete_indices
|
|
description: "Delete old indices"
|
|
options:
|
|
ignore_empty_list: True
|
|
continue_if_exception: True
|
|
timeout_override: 300
|
|
filters:
|
|
- filtertype: pattern
|
|
kind: prefix
|
|
value: 'logstash-'
|
|
- filtertype: age
|
|
source: name
|
|
direction: older
|
|
timestring: '%Y.%m.%d'
|
|
unit: days
|
|
unit_count: 30
|
|
|
|
- name: kibana
|
|
condition: kibana.enabled
|
|
namespace: {{ .Environment.Values.monitoring.namespace }}
|
|
chart: charts/kibana
|
|
labels:
|
|
role: setup
|
|
group: monitoring
|
|
app: kibana
|
|
values:
|
|
- testFramework:
|
|
enabled: false
|
|
- image:
|
|
repository: "docker.elastic.co/kibana/kibana"
|
|
tag: "6.8.6"
|
|
- files:
|
|
kibana.yml:
|
|
elasticsearch.hosts: http://elasticsearch-client.{{ .Environment.Values.monitoring.namespace }}.svc.cluster.local:9200
|
|
- ingress:
|
|
enabled: true
|
|
annotations:
|
|
nginx.ingress.kubernetes.io/auth-url: "https://$host/oauth2/auth"
|
|
nginx.ingress.kubernetes.io/auth-signin: "https://$host/oauth2/start?rd=$escaped_request_uri"
|
|
kubernetes.io/ingress.class: nginx-sso
|
|
hosts:
|
|
- kibana.{{ .Environment.Values.domain }}
|
|
- lifecycle:
|
|
postStart:
|
|
exec:
|
|
command:
|
|
- bash
|
|
- -c
|
|
- |
|
|
#!/bin/bash
|
|
# Config the index_pattern
|
|
TEMPLATE_NAME="logstash"
|
|
INDEX_PATTERN="logstash-*"
|
|
KIBANA_URL=http://localhost:5601
|
|
|
|
# Wait until service is ready
|
|
while [[ "$(curl -s -o /dev/null -w '%{http_code}\n' $KIBANA_URL/app/kibana)" != "200" ]]; do sleep 1; done
|
|
|
|
# Apply default Index Pattern into Kibana
|
|
curl -X POST -v $KIBANA_URL/api/saved_objects/index-pattern/$TEMPLATE_NAME \
|
|
-H 'kbn-xsrf: true' -H 'Content-Type: application/json' \
|
|
-d '{"attributes": {"title": "'$INDEX_PATTERN'"}}'
|
|
- dashboardImport:
|
|
enabled: true
|
|
timeout: 60
|
|
basePath: ""
|
|
dashboards:
|
|
k8s-container-logs: |
|
|
{{- readFile "kibana-dashboards/k8s-container-logs.json" | nindent 10 }}
|
|
|
|
- name: logstash
|
|
condition: logstash.enabled
|
|
namespace: {{ .Environment.Values.monitoring.namespace }}
|
|
chart: elastic/logstash
|
|
version: 7.16.2
|
|
labels:
|
|
role: setup
|
|
group: monitoring
|
|
app: logstash
|
|
values:
|
|
- image: docker.elastic.co/logstash/logstash
|
|
- replicas: 1
|
|
- resources:
|
|
limits:
|
|
memory: 2Gi
|
|
requests:
|
|
memory: 2Gi
|
|
- logstashConfig:
|
|
logstash.yml: |
|
|
http.host: 0.0.0.0
|
|
log.level: warn
|
|
- logstashPipeline:
|
|
logstash.conf: "" # override default pipeline
|
|
ucentral-kafka.conf: |
|
|
input {
|
|
kafka {
|
|
bootstrap_servers => "kafka-headless.openwifi-dev01.svc.cluster.local:9092"
|
|
topics => ["state", "healthcheck", "device_event_queue"]
|
|
tags => ["openwifi-kafka"]
|
|
#codec => json
|
|
decorate_events => true
|
|
auto_offset_reset => "latest"
|
|
client_id => "openwifi-dev01"
|
|
add_field => { "instance" => "openwifi-dev01" }
|
|
}
|
|
|
|
kafka {
|
|
bootstrap_servers => "kafka-headless.openwifi-qa01.svc.cluster.local:9092"
|
|
topics => ["state", "healthcheck", "device_event_queue"]
|
|
tags => ["openwifi-kafka"]
|
|
decorate_events => true
|
|
auto_offset_reset => "latest"
|
|
client_id => "openwifi-qa01"
|
|
add_field => { "instance" => "openwifi-qa01" }
|
|
}
|
|
}
|
|
|
|
filter {
|
|
if "openwifi-kafka" in [tags] {
|
|
mutate { copy => { "[@metadata][kafka]" => "kafka" } }
|
|
}
|
|
}
|
|
|
|
filter {
|
|
if "openwifi-kafka" in [tags] {
|
|
json {
|
|
source => "message"
|
|
remove_field => [ "message" ]
|
|
}
|
|
}
|
|
}
|
|
|
|
filter {
|
|
if ([payload][state][unit][load]) {
|
|
mutate {
|
|
add_field => { "[payload][state][unit][load1]" => "%{[payload][state][unit][load][0]}" }
|
|
add_field => { "[payload][state][unit][load5]" => "%{[payload][state][unit][load][1]}" }
|
|
add_field => { "[payload][state][unit][load15]" => "%{[payload][state][unit][load][2]}" }
|
|
remove_field => [ "[payload][state][unit][load]" ]
|
|
}
|
|
|
|
mutate {
|
|
convert => {
|
|
"[payload][state][unit][load1]" => "integer"
|
|
"[payload][state][unit][load5]" => "integer"
|
|
"[payload][state][unit][load15]" => "integer"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
output {
|
|
if "openwifi-kafka" in [tags] {
|
|
elasticsearch {
|
|
hosts => "http://elasticsearch-client.{{ .Environment.Values.monitoring.namespace }}.svc.cluster.local:9200"
|
|
index => "logstash-ucentral-%{+YYYY.MM.dd}"
|
|
}
|
|
}
|
|
}
|
|
|
|
ucentral-ap-firmware.conf: |
|
|
input {
|
|
s3 {
|
|
access_key_id => "{{ .Environment.Values.ucentral_ap_firmware_logstash.access_key_id }}"
|
|
secret_access_key => "{{ .Environment.Values.ucentral_ap_firmware_logstash.secret_access_key }}"
|
|
bucket => "ucentral-ap-firmware-logs"
|
|
delete => true
|
|
region => "us-east-1"
|
|
tags => ["ucentral-ap-firmware"]
|
|
additional_settings => {
|
|
force_path_style => true
|
|
follow_redirects => false
|
|
}
|
|
}
|
|
}
|
|
|
|
filter {
|
|
if "ucentral-ap-firmware" in [tags] {
|
|
grok {
|
|
match => { "message" => "%{S3_ACCESS_LOG}" }
|
|
remove_field => ["message"]
|
|
}
|
|
|
|
date {
|
|
match => [ "timestamp", "dd/MMM/yyyy:HH:mm:ss Z" ]
|
|
remove_field => ["timestamp"]
|
|
}
|
|
|
|
geoip {
|
|
source => "clientip"
|
|
}
|
|
|
|
if [operation] == "REST.GET.OBJECT" and [key] =~ /^*\.bin$/ {
|
|
grok {
|
|
match => { "key" => "%{UPLOAD_DATE:upload_date}-%{AP_MODEL:ap_model}-%{GREEDYDATA:branch_name}-%{GIT_REV:git_rev}-%{WORD:suffix}.%{WORD:file_extension}" }
|
|
pattern_definitions => {
|
|
"UPLOAD_DATE" => "%{YEAR}%{MONTHNUM}%{MONTHDAY}"
|
|
"AP_MODEL" => "[a-z0-9]+_[a-z0-9]+(-|_)?[a-z0-9]+(-|_)?[a-z0-9]+"
|
|
"GIT_REV" => "[a-z0-9]{4,8}"
|
|
}
|
|
add_field => { "timestamp_clientip" => "%{@timestamp} %{clientip}" }
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
output {
|
|
if "ucentral-ap-firmware" in [tags] {
|
|
elasticsearch {
|
|
hosts => "http://elasticsearch-client.{{ .Environment.Values.monitoring.namespace }}.svc.cluster.local:9200"
|
|
index => "logstash-ucentral-ap-firmware-%{+YYYY.MM.dd}"
|
|
}
|
|
}
|
|
}
|
|
|
|
syslog.conf: |
|
|
input {
|
|
syslog {
|
|
tags => ["ucentral-syslog"]
|
|
port => 5514
|
|
grok_pattern => "(?:<%{POSINT:priority}>%{SYSLOGLINE}|%{MONTH} %{MONTHDAY} %{TIME} %{DATA:docker.container_name}/%{DATA:github.run_number}\[%{INT:undefined_number}\]: %{GREEDYDATA:message})"
|
|
}
|
|
}
|
|
|
|
filter {
|
|
if ([undefined_number]) {
|
|
mutate {
|
|
remove_field => [ "undefined_number" ]
|
|
}
|
|
}
|
|
}
|
|
|
|
output {
|
|
if "ucentral-syslog" in [tags] {
|
|
elasticsearch {
|
|
hosts => "http://elasticsearch-client.{{ .Environment.Values.monitoring.namespace }}.svc.cluster.local:9200"
|
|
index => "logstash-%{+YYYY.MM.dd}"
|
|
document_type => "_doc"
|
|
}
|
|
}
|
|
}
|
|
- service:
|
|
type: LoadBalancer
|
|
annotations:
|
|
service.beta.kubernetes.io/aws-load-balancer-type: "nlb-ip"
|
|
service.beta.kubernetes.io/aws-load-balancer-internal: "true"
|
|
service.beta.kubernetes.io/aws-load-balancer-private-ipv4-addresses: "10.10.10.40,10.10.11.40,10.10.12.40"
|
|
ports:
|
|
- name: syslog
|
|
port: 514
|
|
targetPort: 5514
|
|
protocol: TCP
|
|
|
|
- name: k8s-dashboard-roles
|
|
condition: k8s-dashboard.enabled
|
|
namespace: {{ .Environment.Values.monitoring.namespace }}
|
|
chart: charts/k8s-dashboard-roles
|
|
labels:
|
|
role: setup
|
|
group: monitoring
|
|
app: k8s-dashboard
|
|
|
|
- name: k8s-dashboard
|
|
condition: k8s-dashboard.enabled
|
|
namespace: {{ .Environment.Values.monitoring.namespace }}
|
|
chart: kubernetes-dashboard/kubernetes-dashboard
|
|
version: 5.0.5
|
|
labels:
|
|
role: setup
|
|
group: monitoring
|
|
app: k8s-dashboard
|
|
values:
|
|
- settings:
|
|
defaultNamespace: {{ index .Environment.Values.monitoring.publicNamespaces 0 }}
|
|
namespaceFallbackList: {{ .Environment.Values.monitoring.publicNamespaces | toJson }}
|
|
itemsPerPage: 25
|
|
clusterName: TIP WLAN CI/CD
|
|
- extraArgs:
|
|
- --enable-skip-login
|
|
- --system-banner=Welcome to the TIP WLAN CI/CD Kubernetes cluster. If you are missing your namespace in the above select box, please <a href="https://telecominfraproject.atlassian.net/browse/WIFI">create a ticket</a>.
|
|
- rbac:
|
|
create: false
|
|
clusterRoleMetrics: true
|
|
clusterReadOnlyRole: false
|
|
- service:
|
|
type: NodePort
|
|
externalPort: 80
|
|
- protocolHttp: true
|
|
- ingress:
|
|
enabled: true
|
|
paths:
|
|
- /
|
|
- /*
|
|
annotations:
|
|
#alb.ingress.kubernetes.io/actions.ssl-redirect: '{"Type": "redirect", "RedirectConfig": { "Protocol": "HTTPS", "Port": "443", "StatusCode": "HTTP_302"}}'
|
|
#alb.ingress.kubernetes.io/group.name: wlan-cicd
|
|
#alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}, {"HTTPS": 443}]'
|
|
#alb.ingress.kubernetes.io/scheme: internet-facing
|
|
#kubernetes.io/ingress.class: alb
|
|
nginx.ingress.kubernetes.io/auth-url: "https://$host/oauth2/auth"
|
|
nginx.ingress.kubernetes.io/auth-signin: "https://$host/oauth2/start?rd=$escaped_request_uri"
|
|
kubernetes.io/ingress.class: nginx-sso
|
|
hosts:
|
|
- k8s-dashboard.{{ .Environment.Values.domain }}
|
|
|
|
- name: metrics-server
|
|
condition: metrics-server.enabled
|
|
namespace: {{ .Environment.Values.monitoring.namespace }}
|
|
chart: bitnami/metrics-server
|
|
version: 5.10.13
|
|
labels:
|
|
role: setup
|
|
group: monitoring
|
|
app: metrics-server
|
|
values:
|
|
- apiService:
|
|
create: true
|
|
|
|
- name: aws-load-balancer-controller
|
|
<<: *default
|
|
condition: alb-ingress.enabled
|
|
chart: eks/aws-load-balancer-controller
|
|
version: 1.4.2
|
|
labels:
|
|
role: setup
|
|
group: system
|
|
app: aws-load-balancer-controller
|
|
values:
|
|
- serviceAccount:
|
|
annotations:
|
|
eks.amazonaws.com/role-arn: arn:aws:iam::{{ .Values.eks.accountID }}:role/{{ .Values.eks.clusterName }}-alb-ingress
|
|
clusterName: {{ .Values.eks.clusterName }}
|
|
enableShield: false
|
|
enableWaf: false
|
|
enableWafv2: false
|
|
logLevel: info
|
|
|
|
- name: aws-node-termination-handler
|
|
<<: *default
|
|
condition: node-termination-handler.enabled
|
|
chart: eks/aws-node-termination-handler
|
|
version: 0.16.0
|
|
labels:
|
|
role: setup
|
|
group: system
|
|
app: node-termination-handler
|
|
values:
|
|
- deleteLocalData: true
|
|
- ignoreDaemonSets: true
|
|
- podTerminationGracePeriod: -1 # use values defined in Pod
|
|
|
|
- name: influxdb
|
|
namespace: test-bss
|
|
chart: influxdata/influxdb2
|
|
version: 2.0.3
|
|
condition: influxdb.enabled
|
|
labels:
|
|
role: setup
|
|
group: load-testing
|
|
app: influxdb
|
|
task: tools-133
|
|
values:
|
|
- image:
|
|
tag: 2.0.6-alpine
|
|
- adminUser:
|
|
organization: tip
|
|
bucket: tip-cicd
|
|
user: tip
|
|
password: {{ .Environment.Values.influxdb.adminUser.password }}
|
|
token: {{ .Environment.Values.influxdb.adminUser.token }}
|
|
retention_policy: "0s"
|
|
- persistence:
|
|
storageClass: gp2
|
|
size: 10Gi
|
|
- service:
|
|
type: NodePort
|
|
- resources:
|
|
limits:
|
|
cpu: 500m
|
|
memory: 500Mi
|
|
requests:
|
|
cpu: 500m
|
|
memory: 500Mi
|
|
- ingress:
|
|
enabled: true
|
|
annotations:
|
|
kubernetes.io/ingress.class: alb
|
|
alb.ingress.kubernetes.io/scheme: internet-facing
|
|
alb.ingress.kubernetes.io/group.name: test-bss-load-testing
|
|
alb.ingress.kubernetes.io/certificate-arn: {{ .Environment.Values.eks.certificateARNCICD }}
|
|
alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}, {"HTTPS": 443}]'
|
|
alb.ingress.kubernetes.io/actions.ssl-redirect: '{"Type": "redirect", "RedirectConfig": { "Protocol": "HTTPS", "Port": "443", "StatusCode": "HTTP_302"}}'
|
|
alb.ingress.kubernetes.io/healthcheck-path: /health
|
|
external-dns.alpha.kubernetes.io/hostname: influx.cicd.{{ .Environment.Values.domain }}
|
|
hostname: influx.cicd.{{ .Environment.Values.domain }}
|
|
path: "/*"
|
|
|
|
- name: actions-runner-controller
|
|
condition: actions-runner-controller.enabled
|
|
namespace: actions-runner-controller
|
|
chart: actions-runner-controller/actions-runner-controller
|
|
version: 0.19.1
|
|
labels:
|
|
app: actions-runner-controller
|
|
values:
|
|
- authSecret:
|
|
create: true
|
|
github_token: {{ .Environment.Values.actions_runner_controller.github_token }}
|
|
- image:
|
|
pullPolicy: IfNotPresent
|
|
- githubWebhookServer:
|
|
enabled: true
|
|
secret:
|
|
github_webhook_secret_token: {{ .Environment.Values.actions_runner_controller.webhook_secret }}
|
|
service:
|
|
type: NodePort
|
|
ingress:
|
|
enabled: true
|
|
hosts:
|
|
- host: "ghac-webhook.cicd.lab.wlan.tip.build"
|
|
paths:
|
|
- path: /*
|
|
pathType: ImplementationSpecific
|
|
annotations:
|
|
kubernetes.io/ingress.class: alb
|
|
alb.ingress.kubernetes.io/scheme: internet-facing
|
|
alb.ingress.kubernetes.io/group.name: wlan-cicd
|
|
alb.ingress.kubernetes.io/certificate-arn: {{ .Environment.Values.eks.certificateARNCICD }}
|
|
alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}, {"HTTPS": 443}]'
|
|
alb.ingress.kubernetes.io/actions.ssl-redirect: '{"Type": "redirect", "RedirectConfig": { "Protocol": "HTTPS", "Port": "443", "StatusCode": "HTTP_302"}}'
|
|
|
|
- name: actions-runner-controller-addon
|
|
condition: actions-runner-controller.enabled
|
|
namespace: actions-runner-controller
|
|
chart: charts/actions-runner-controller-addon
|
|
labels:
|
|
app: actions-runner-controller
|
|
|
|
- name: cert-manager
|
|
condition: cert-manager.enabled
|
|
namespace: kube-system
|
|
chart: jetstack/cert-manager
|
|
version: v1.6.1
|
|
labels:
|
|
app: cert-manager
|
|
values:
|
|
- installCRDs: true
|
|
- webhook:
|
|
resources:
|
|
requests:
|
|
cpu: 500m
|
|
memory: 150Mi
|
|
limits:
|
|
cpu: 500m
|
|
memory: 150Mi
|
|
|
|
|
|
- name: calico
|
|
condition: calico.enabled
|
|
chart: projectcalico/tigera-operator
|
|
version: v3.22.2
|
|
namespace: kube-system
|
|
labels:
|
|
app: calico
|
|
|
|
- name: github-actions-network-policies
|
|
condition: calico.enabled
|
|
namespace: actions-runner-controller
|
|
chart: charts/github-actions-network-policies
|
|
labels:
|
|
role: setup
|
|
group: networking
|
|
app: github-actions-network-policies
|
|
sub1: calico
|
|
sub2: actions-runner-controller-addon
|
|
|
|
- name: elasticsearch-exporter
|
|
condition: elastic.enabled
|
|
namespace: {{ .Environment.Values.monitoring.namespace }}
|
|
chart: braedon/prometheus-es-exporter
|
|
version: 0.2.0
|
|
labels:
|
|
group: monitoring
|
|
app: elasticsearch-exporter
|
|
values:
|
|
- image:
|
|
tag: 0.14.1
|
|
- elasticsearch:
|
|
cluster: elasticsearch-client.{{ .Environment.Values.monitoring.namespace }}.svc.cluster.local:9200
|
|
queries: |-
|
|
[DEFAULT]
|
|
QueryIntervalSecs = 15
|
|
QueryTimeoutSecs = 10
|
|
QueryIndices = _all
|
|
QueryOnError = drop
|
|
QueryOnMissing = drop
|
|
[query_assert_violation_errors]
|
|
QueryOnError = preserve
|
|
QueryOnMissing = zero
|
|
QueryJson = {
|
|
"query": {
|
|
"bool": {
|
|
"filter": {
|
|
"bool": {
|
|
"must": [
|
|
{
|
|
"match": {
|
|
"kubernetes.namespace_name.keyword": "openwifi-qa01"
|
|
}
|
|
},
|
|
{
|
|
"match": {
|
|
"kubernetes.container_name.keyword": "owgw"
|
|
}
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"must": {
|
|
"match": {
|
|
"message": "Assertion violation: !_pStream"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"aggs": {
|
|
"agg_terms_kubernetes.container_name.keyword": {
|
|
"terms": {
|
|
"field": "kubernetes.container_name.keyword"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
- name: core-dump-handler
|
|
condition: core-dump-handler.enabled
|
|
chart: core-dump-handler/core-dump-handler
|
|
version: v8.6.0
|
|
namespace: ibm-observe
|
|
labels:
|
|
app: core-dump-handler
|
|
values:
|
|
- daemonset:
|
|
s3BucketName: openwifi-core-dumps
|
|
s3AccessKey: {{ .Environment.Values.core_dump_handler.s3_access_key }}
|
|
s3Secret: {{ .Environment.Values.core_dump_handler.s3_secret }}
|
|
s3Region: us-east-1
|
|
includeCrioExe: true
|
|
vendor: rhel7
|