Files
kubernetes/cluster/addons/fluentd-gcp/fluentd-gcp-ds.yaml
Bryan Moyles 32c2bfadfd A large set of improvements to the Stackdriver components.
Metadata Agent Improvements
Bump metadata agent version to 0.2-0.0.21-1.
Expand the metadata agent's access to all API groups.
Remove metadata agent config maps in favor of command line flags.
Update the metadata agent's liveness probe to a new /healthz handler.

Logging Agent Improvements
Bump logging agent version to 0.2-1.5.33-1-k8s-1.
Appropriately set log severity for k8s_container.
Fix detect exceptions plugin to analyze message field instead of log field.
Fix detect exceptions plugin to analyze streams based on local resource id.
Disable the metadata agent for monitored resource construction in logging.
Disable timestamp adjustment in logs to optimize performance.
Reduce logging agent buffer chunk limit to 512k to optimize performance.
2018-08-06 11:26:35 -04:00

120 lines
4.4 KiB
YAML

apiVersion: extensions/v1beta1
kind: DaemonSet
metadata:
name: fluentd-gcp-{{ fluentd_gcp_yaml_version }}
namespace: kube-system
labels:
k8s-app: fluentd-gcp
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
version: {{ fluentd_gcp_yaml_version }}
spec:
updateStrategy:
type: RollingUpdate
template:
metadata:
labels:
k8s-app: fluentd-gcp
kubernetes.io/cluster-service: "true"
version: {{ fluentd_gcp_yaml_version }}
# This annotation ensures that fluentd does not get evicted if the node
# supports critical pod annotation based priority scheme.
# Note that this does not guarantee admission on the nodes (#40573).
annotations:
scheduler.alpha.kubernetes.io/critical-pod: ''
spec:
priorityClassName: system-node-critical
serviceAccountName: fluentd-gcp
dnsPolicy: Default
containers:
- name: fluentd-gcp
image: gcr.io/stackdriver-agents/stackdriver-logging-agent:{{ fluentd_gcp_version }}
volumeMounts:
- name: varlog
mountPath: /var/log
- name: varlibdockercontainers
mountPath: /var/lib/docker/containers
readOnly: true
- name: config-volume
mountPath: /etc/google-fluentd/config.d
env:
- name: NODE_NAME
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: spec.nodeName
- name: STACKDRIVER_METADATA_AGENT_URL
value: http://$(NODE_NAME):8799
# Liveness probe is aimed to help in situarions where fluentd
# silently hangs for no apparent reasons until manual restart.
# The idea of this probe is that if fluentd is not queueing or
# flushing chunks for 5 minutes, something is not right. If
# you want to change the fluentd configuration, reducing amount of
# logs fluentd collects, consider changing the threshold or turning
# liveness probe off completely.
livenessProbe:
initialDelaySeconds: 600
periodSeconds: 60
exec:
command:
- '/bin/sh'
- '-c'
- >
LIVENESS_THRESHOLD_SECONDS=${LIVENESS_THRESHOLD_SECONDS:-300};
STUCK_THRESHOLD_SECONDS=${LIVENESS_THRESHOLD_SECONDS:-900};
if [ ! -e /var/log/fluentd-buffers ];
then
exit 1;
fi;
touch -d "${STUCK_THRESHOLD_SECONDS} seconds ago" /tmp/marker-stuck;
if [[ -z "$(find /var/log/fluentd-buffers -type f -newer /tmp/marker-stuck -print -quit)" ]];
then
rm -rf /var/log/fluentd-buffers;
exit 1;
fi;
touch -d "${LIVENESS_THRESHOLD_SECONDS} seconds ago" /tmp/marker-liveness;
if [[ -z "$(find /var/log/fluentd-buffers -type f -newer /tmp/marker-liveness -print -quit)" ]];
then
exit 1;
fi;
# BEGIN_PROMETHEUS_TO_SD
- name: prometheus-to-sd-exporter
image: k8s.gcr.io/prometheus-to-sd:v0.2.4
command:
- /monitor
- --stackdriver-prefix={{ prometheus_to_sd_prefix }}/addons
- --api-override={{ prometheus_to_sd_endpoint }}
- --source=fluentd:http://localhost:24231?whitelisted=stackdriver_successful_requests_count,stackdriver_failed_requests_count,stackdriver_ingested_entries_count,stackdriver_dropped_entries_count
- --pod-id=$(POD_NAME)
- --namespace-id=$(POD_NAMESPACE)
env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
# END_PROMETHEUS_TO_SD
nodeSelector:
beta.kubernetes.io/fluentd-ds-ready: "true"
tolerations:
- key: "node.alpha.kubernetes.io/ismaster"
effect: "NoSchedule"
- operator: "Exists"
effect: "NoExecute"
- operator: "Exists"
effect: "NoSchedule"
terminationGracePeriodSeconds: 60
volumes:
- name: varlog
hostPath:
path: /var/log
- name: varlibdockercontainers
hostPath:
path: /var/lib/docker/containers
- name: config-volume
configMap:
name: {{ fluentd_gcp_configmap_name }}-v1.2.5