Compare commits

...

34 Commits

Author SHA1 Message Date
kongfei605
71944c64b4 Merge pull request #65 from flashcatcloud/kongfei_develop
fix redis mode compare problem
2022-08-11 15:52:09 +08:00
kongfei
219beae857 fix redis mode compare problem 2022-08-11 15:35:37 +08:00
kongfei605
180552c1cb Merge pull request #63 from flashcatcloud/kongfei_develop
update to 5.10.3
2022-08-11 14:15:25 +08:00
kongfei
436a216b38 update to 5.10.3 2022-08-11 14:10:46 +08:00
kongfei605
005a673cfa Merge pull request #62 from LinkMaq/bugfix-61
fixed(categraf): Empty docker_socket when kubernetes runtime is not d…
2022-08-10 07:10:24 +08:00
kongfei605
055ffb3002 Merge pull request #60 from LinkMaq/bugfix-54
fixed(database): add subPath for my.cnf  #54
2022-08-10 07:05:17 +08:00
LinkMaq
584a00f668 fixed(categraf): Empty docker_socket when kubernetes runtime is not docker 2022-08-10 00:53:16 +08:00
LinkMaq
c697ffa2dc fixed(database): add subPath for my.cnf #54 2022-08-09 23:41:54 +08:00
kongfei605
6e09f7b074 Merge pull request #59 from flashcatcloud/kongfei_develop
v5.10.2
2022-08-08 10:35:42 +08:00
kongfei
b3cc4112db update to v5.10.2 2022-08-08 10:33:34 +08:00
kongfei
67f43473d7 v5.10.2 2022-08-08 10:32:36 +08:00
kongfei605
36588e5e3d Merge pull request #58 from xiaoziv/fix-nserver-hostname
Update deployment.yaml
2022-08-06 20:40:39 +08:00
xiaoziv
93740aa23f Update deployment.yaml
nserver设置多副本时候,指定hostname会导致多副本的hostname相同,导致规则的分配出现问题
2022-08-05 14:34:10 +08:00
kongfei605
6872d2e5fe Merge pull request #57 from flashcatcloud/kongfei_develop
upgrade v5.10.1
2022-08-03 20:39:27 +08:00
kongfei
fa2abf8f33 upgrade v5.10.1 2022-08-03 20:39:02 +08:00
kongfei605
6ddb9be9a0 Merge pull request #55 from flashcatcloud/kongfei_develop
v5.9.8
2022-07-29 06:26:30 +08:00
kongfei
8fe0978add v5.9.8 2022-07-29 06:25:57 +08:00
kongfei605
8603f7d667 Merge pull request #53 from flashcatcloud/kongfei_develop
v5.9.7
2022-07-27 16:14:17 +08:00
kongfei
d7bb2eb5ea v5.9.7 2022-07-27 16:04:38 +08:00
kongfei605
3464091f5b Merge pull request #52 from flashcatcloud/kongfei_develop
fix dsn only support internal service port
2022-07-25 06:55:45 +08:00
kongfei
5348a4ab1f fix dsn only support internal service port 2022-07-25 06:54:28 +08:00
kongfei605
80c36d00d1 Merge pull request #51 from xiaoziv/fix-database-bug
Fix database url bug
2022-07-25 06:47:44 +08:00
xiaoziv
8b68709276 Update daemonset.yaml 2022-07-22 21:42:37 +08:00
xiaoziv
90acdccee9 Update conf-cm.yaml 2022-07-22 21:39:13 +08:00
xiaoziv
0a14c37633 Update conf-cm.yaml 2022-07-22 21:37:29 +08:00
kongfei605
ee40d6f865 Merge pull request #48 from xiaoziv/pull-secret-support
support imagePullSecrets for n9e & prometheus
2022-07-20 20:29:05 +08:00
xiaoziv
c0e0d614bc Update deployment.yaml
remove nginx additional imagePullSecrets config
2022-07-20 20:06:23 +08:00
kongfei605
6a4bc21e6e Merge pull request #49 from xiaoziv/sql-add-recording-rule-table
Update a-n9e.sql
2022-07-20 19:25:43 +08:00
xiaoziv
27cfa3dd62 Update a-n9e.sql
add recording rule
2022-07-20 18:49:15 +08:00
xiaoziv
07ee7ed200 support imagePullSecrets for n9e & prometheus 2022-07-20 16:20:16 +08:00
kongfei605
47bad75a42 Merge pull request #46 from flashcatcloud/kongfei_develop
v5.9.6
2022-07-09 15:51:57 +08:00
kongfei
ca4877f1a2 v5.9.6 2022-07-09 15:51:03 +08:00
kongfei605
5aa37e029d Merge pull request #45 from flashcatcloud/kongfei_develop
pull n9e image from flashcatcloud
2022-07-06 18:16:53 +08:00
kongfei
253467398f pull n9e image from flashcatcloud 2022-07-06 18:16:07 +08:00
15 changed files with 1912 additions and 1542 deletions

View File

@@ -27,7 +27,7 @@ sources:
maintainers:
- email: contact-us@flashcat.cloud
name: flashcatcloud
version: 0.1.2
version: 0.1.3
apiVersion: v1
appVersion: 5.9.4
appVersion: 5.9.6
icon: https://raw.githubusercontent.com/flashcatcloud/n9e-helm/master/n9e-icon.png

File diff suppressed because it is too large Load Diff

232
dashboards/n9e_server.json Normal file
View File

@@ -0,0 +1,232 @@
{
"name": "夜莺大盘",
"tags": "",
"configs": {
"var": [],
"panels": [
{
"targets": [
{
"refId": "A",
"expr": "rate(n9e_server_samples_received_total[1m])"
}
],
"name": "每秒接收的数据点个数",
"options": {
"tooltip": {
"mode": "all",
"sort": "none"
},
"legend": {
"displayMode": "hidden"
},
"standardOptions": {},
"thresholds": {}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"fillOpacity": 0.5,
"stack": "off"
},
"version": "2.0.0",
"type": "timeseries",
"layout": {
"h": 4,
"w": 12,
"x": 0,
"y": 0,
"i": "53fcb9dc-23f9-41e0-bc5e-121eed14c3a4",
"isResizable": true
},
"id": "53fcb9dc-23f9-41e0-bc5e-121eed14c3a4"
},
{
"targets": [
{
"refId": "A",
"expr": "rate(n9e_server_alerts_total[10m])"
}
],
"name": "每秒产生的告警事件个数",
"options": {
"tooltip": {
"mode": "all",
"sort": "none"
},
"legend": {
"displayMode": "hidden"
},
"standardOptions": {},
"thresholds": {}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"fillOpacity": 0.5,
"stack": "off"
},
"version": "2.0.0",
"type": "timeseries",
"layout": {
"h": 4,
"w": 12,
"x": 12,
"y": 0,
"i": "47fc6252-9cc8-4b53-8e27-0c5c59a47269",
"isResizable": true
},
"id": "f70dcb8b-b58b-4ef9-9e48-f230d9e17140"
},
{
"targets": [
{
"refId": "A",
"expr": "n9e_server_alert_queue_size"
}
],
"name": "告警事件内存队列长度",
"options": {
"tooltip": {
"mode": "all",
"sort": "none"
},
"legend": {
"displayMode": "hidden"
},
"standardOptions": {},
"thresholds": {}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"fillOpacity": 0.5,
"stack": "off"
},
"version": "2.0.0",
"type": "timeseries",
"layout": {
"h": 4,
"w": 12,
"x": 0,
"y": 4,
"i": "ad1af16c-de0c-45f4-8875-cea4e85d51d0",
"isResizable": true
},
"id": "caf23e58-d907-42b0-9ed6-722c8c6f3c5f"
},
{
"targets": [
{
"refId": "A",
"expr": "n9e_server_http_request_duration_seconds_sum/n9e_server_http_request_duration_seconds_count"
}
],
"name": "数据接收接口平均响应时间(单位:秒)",
"options": {
"tooltip": {
"mode": "all",
"sort": "desc"
},
"legend": {
"displayMode": "hidden"
},
"standardOptions": {},
"thresholds": {}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"fillOpacity": 0.5,
"stack": "noraml"
},
"version": "2.0.0",
"type": "timeseries",
"layout": {
"h": 4,
"w": 12,
"x": 12,
"y": 4,
"i": "64c3abc2-404c-4462-a82f-c109a21dac91",
"isResizable": true
},
"id": "6b8d2db1-efca-4b9e-b429-57a9d2272bc5"
},
{
"targets": [
{
"refId": "A",
"expr": "n9e_server_sample_queue_size"
}
],
"name": "内存数据队列长度",
"options": {
"tooltip": {
"mode": "all",
"sort": "desc"
},
"legend": {
"displayMode": "hidden"
},
"standardOptions": {},
"thresholds": {}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"fillOpacity": 0.5,
"stack": "off"
},
"version": "2.0.0",
"type": "timeseries",
"layout": {
"h": 4,
"w": 12,
"x": 0,
"y": 8,
"i": "1c7da942-58c2-40dc-b42f-983e4a35b89b",
"isResizable": true
},
"id": "bd41677d-40d3-482e-bb6e-fbd25df46d87"
},
{
"targets": [
{
"refId": "A",
"expr": "avg(n9e_server_forward_duration_seconds_sum/n9e_server_forward_duration_seconds_count)"
}
],
"name": "数据发往TSDB平均耗时单位",
"options": {
"tooltip": {
"mode": "all",
"sort": "desc"
},
"legend": {
"displayMode": "hidden"
},
"standardOptions": {},
"thresholds": {}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"fillOpacity": 0.5,
"stack": "noraml"
},
"version": "2.0.0",
"type": "timeseries",
"layout": {
"h": 4,
"w": 12,
"x": 12,
"y": 8,
"i": "eed94a0b-954f-48ac-82e5-a2eada1c8a3d",
"isResizable": true
},
"id": "c8642e72-f384-46a5-8410-1e6be2953c3c"
}
],
"version": "2.0.0"
}
}

View File

@@ -123,7 +123,10 @@ insert into `role_operation`(role_name, operation) values('Standard', '/job-tpls
insert into `role_operation`(role_name, operation) values('Standard', '/job-tasks');
insert into `role_operation`(role_name, operation) values('Standard', '/job-tasks/add');
insert into `role_operation`(role_name, operation) values('Standard', '/job-tasks/put');
insert into `role_operation`(role_name, operation) values('Standard', '/recording-rules');
insert into `role_operation`(role_name, operation) values('Standard', '/recording-rules/add');
insert into `role_operation`(role_name, operation) values('Standard', '/recording-rules/put');
insert into `role_operation`(role_name, operation) values('Standard', '/recording-rules/del');
-- for alert_rule | collect_rule | mute | dashboard grouping
CREATE TABLE `busi_group` (
@@ -341,6 +344,25 @@ CREATE TABLE `metric_view` (
insert into metric_view(name, cate, configs) values('Host View', 0, '{"filters":[{"oper":"=","label":"__name__","value":"cpu_usage_idle"}],"dynamicLabels":[],"dimensionLabels":[{"label":"ident","value":""}]}');
CREATE TABLE `recording_rule` (
`id` bigint unsigned not null auto_increment,
`group_id` bigint not null default '0' comment 'group_id',
`cluster` varchar(128) not null,
`name` varchar(255) not null comment 'new metric name',
`note` varchar(255) not null comment 'rule note',
`disabled` tinyint(1) not null comment '0:enabled 1:disabled',
`prom_ql` varchar(8192) not null comment 'promql',
`prom_eval_interval` int not null comment 'evaluate interval',
`append_tags` varchar(255) default '' comment 'split by space: service=n9e mod=api',
`create_at` bigint default '0',
`create_by` varchar(64) default '',
`update_at` bigint default '0',
`update_by` varchar(64) default '',
PRIMARY KEY (`id`),
KEY `group_id` (`group_id`),
KEY `update_at` (`update_at`)
) ENGINE=InnoDB DEFAULT CHARSET = utf8mb4;
CREATE TABLE `alert_aggr_view` (
`id` bigint unsigned not null auto_increment,
`name` varchar(191) not null default '',
@@ -380,6 +402,7 @@ CREATE TABLE `alert_cur_event` (
`notify_cur_number` int not null default 0 comment '',
`target_ident` varchar(191) not null default '' comment 'target ident, also in tags',
`target_note` varchar(191) not null default '' comment 'target note',
`first_trigger_time` bigint,
`trigger_time` bigint not null,
`trigger_value` varchar(255) not null,
`tags` varchar(1024) not null default '' comment 'merge data_tags rule_tags, split by ,,',
@@ -414,6 +437,7 @@ CREATE TABLE `alert_his_event` (
`notify_cur_number` int not null default 0 comment '',
`target_ident` varchar(191) not null default '' comment 'target ident, also in tags',
`target_note` varchar(191) not null default '' comment 'target note',
`first_trigger_time` bigint,
`trigger_time` bigint not null,
`trigger_value` varchar(255) not null,
`recover_time` bigint not null default 0,

View File

@@ -240,6 +240,12 @@ app: "{{ template "nightingale.name" . }}"
{{- end }}
{{- end -}}
{{- define "nightingale.redis.mode" -}}
{{- with .Values.redis }}
{{- ternary "standalone" .external.mode (eq .type "internal") }}
{{- end }}
{{- end -}}
/*scheme://[redis:password@]host:port[/master_set]*/
{{- define "nightingale.redis.url" -}}
{{- with .Values.redis }}

View File

@@ -15,7 +15,6 @@
#
*/}}
{{- if eq .Values.categraf.type "internal" -}}
{{- $categraf := .Values.persistence.persistentVolumeClaim.telgraf -}}
apiVersion: apps/v1
kind: DaemonSet
metadata:
@@ -38,6 +37,10 @@ spec:
{{ toYaml .Values.categraf.podAnnotations | indent 8 }}
{{- end }}
spec:
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
@@ -93,8 +96,10 @@ spec:
name: input-net
- mountPath: /etc/categraf/conf/input.netstat
name: input-netstat
{{- if and ( eq .Values.categraf.type "internal") ( .Values.categraf.internal.docker_socket) }}
- mountPath: /etc/categraf/conf/input.docker
name: input-docker
{{- end }}
- mountPath: /etc/categraf/conf/input.kubernetes
name: input-kubernetes
- mountPath: /etc/categraf/conf/input.prometheus
@@ -115,8 +120,10 @@ spec:
- mountPath: /hostfs
name: hostrofs
readOnly: true
{{- if and ( eq .Values.categraf.type "internal") ( .Values.categraf.internal.docker_socket) }}
- name: docker-socket
mountPath: {{ trimPrefix "unix://" .Values.categraf.internal.docker_socket }}
{{- end }}
volumes:
- name: categraf-config
configMap:
@@ -144,9 +151,11 @@ spec:
- name: input-netstat
configMap:
name: input-netstat
{{- if and ( eq .Values.categraf.type "internal") ( .Values.categraf.internal.docker_socket) }}
- name: input-docker
configMap:
name: input-docker
{{- end }}
- name: input-kubernetes
configMap:
name: input-kubernetes
@@ -174,8 +183,10 @@ spec:
- name: hostroutmp
hostPath:
path: /var/run/utmp
{{- if and ( eq .Values.categraf.type "internal") ( .Values.categraf.internal.docker_socket) }}
- name: docker-socket
hostPath:
path: {{ trimPrefix "unix://" .Values.categraf.internal.docker_socket }}
type: Socket
{{- end }}
{{- end -}}

View File

@@ -15,6 +15,7 @@
#
*/}}
{{- if eq .Values.categraf.type "internal" -}}
{{- if .Values.categraf.internal.docker_socket -}}
apiVersion: v1
kind: ConfigMap
metadata:
@@ -22,4 +23,5 @@ metadata:
data:
{{ (.Files.Glob "categraf/conf/input.docker/*.toml").AsConfig | indent 2 }}
{{- end -}}
{{- end -}}

View File

@@ -73,6 +73,7 @@ spec:
- mountPath: /var/lib/mysql/
name: database-data
- mountPath: /etc/my.cnf
subPath: my.cnf
name: database-config
- mountPath: /docker-entrypoint-initdb.d
name: database-initdb-config

View File

@@ -87,8 +87,9 @@ data:
[Redis]
Address = "{{ template "nightingale.redis.addr" . }}"
Password = "{{ template "nightingale.redis.password" . }}"
RedisType = "{{ template "nightingale.redis.mode"}}"
[DB]
DSN="{{ template "nightingale.database.username" . }}:{{ template "nightingale.database.rawPassword" . }}@tcp({{ template "nightingale.database" . }}:{{ template "nightingale.database.servicePort" . }})/{{ template "nightingale.database.name" . }}?charset=utf8mb4&parseTime=True&loc=Local&allowNativePasswords=true"
DSN="{{ template "nightingale.database.username" . }}:{{ template "nightingale.database.rawPassword" . }}@tcp({{ template "nightingale.database.host" . }}:{{ template "nightingale.database.port" . }})/{{ template "nightingale.database.name" . }}?charset=utf8mb4&parseTime=True&loc=Local&allowNativePasswords=true"
Debug = false
DBType = "mysql"
MaxLifetime = 7200
@@ -116,8 +117,8 @@ data:
Url = "http://{{ template "nightingale.prometheus.host" . }}:{{ template "nightingale.prometheus.servicePort" . }}/api/v1/write"
BasicAuthUser = "{{ template "nightingale.prometheus.username" . }}"
BasicAuthPass = "{{ template "nightingale.prometheus.rawPassword" . }}"
Timeout = 30000
DialTimeout = 10000
Timeout = 10000
DialTimeout = 3000
TLSHandshakeTimeout = 30000
ExpectContinueTimeout = 1000
IdleConnTimeout = 90000

View File

@@ -38,6 +38,10 @@ spec:
{{ toYaml .Values.nserver.podAnnotations | indent 8 }}
{{- end }}
spec:
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
containers:
- args:
- /app/n9e
@@ -62,7 +66,7 @@ spec:
name: nserver-template
- mountPath: /app/etc/script
name: nserver-script
hostname: nserver
# hostname: nserver
restartPolicy: Always
volumes:
- name: nserver-config

View File

@@ -27,6 +27,8 @@ data:
MetricsYamlFile = "/app/etc/metrics.yaml"
BuiltinAlertsDir = "/app/etc/alerts"
BuiltinDashboardsDir = "/app/etc/dashboards"
ClustersFrom = "config"
ClustersFromAPIs = []
[[NotifyChannels]]
Label = "邮箱"
Key = "email"
@@ -69,6 +71,10 @@ data:
AccessExpired = 1500
RefreshExpired = 10080
RedisKeyPrefix = "/jwt/"
[ProxyAuth]
Enable = false
HeaderUserNameKey = "X-User-Name"
DefaultRoles = ["Standard"]
[BasicAuth]
user001 = "ccc26da7b9aba533cbb263a36c07dcc5"
[AnonymousAccess]
@@ -105,8 +111,9 @@ data:
[Redis]
Address = "{{ template "nightingale.redis.addr" . }}"
Password = "{{ template "nightingale.redis.password" . }}"
RedisType = "{{ template "nightingale.redis.mode" . }}"
[DB]
DSN = "{{ template "nightingale.database.username" . }}:{{ template "nightingale.database.rawPassword" . }}@tcp({{ template "nightingale.database" . }}:{{ template "nightingale.database.servicePort" . }})/{{ template "nightingale.database.name" . }}?charset=utf8mb4&parseTime=True&loc=Local&allowNativePasswords=true"
DSN = "{{ template "nightingale.database.username" . }}:{{ template "nightingale.database.rawPassword" . }}@tcp({{ template "nightingale.database.host" . }}:{{ template "nightingale.database.port" . }})/{{ template "nightingale.database.name" . }}?charset=utf8mb4&parseTime=True&loc=Local&allowNativePasswords=true"
Debug = true
DBType = "mysql"
MaxLifetime = 7200
@@ -119,17 +126,15 @@ data:
BasicAuthUser = ""
BasicAuthPass = ""
Timeout = 30000
DialTimeout = 10000
TLSHandshakeTimeout = 30000
ExpectContinueTimeout = 1000
IdleConnTimeout = 90000
KeepAlive = 30000
MaxConnsPerHost = 0
MaxIdleConns = 100
DialTimeout = 3000
MaxIdleConnsPerHost = 100
[Ibex]
Address = "http://ibex:10090"
BasicAuthUser = "ibex"
BasicAuthPass = "ibex"
Timeout = 3000
[TargetMetrics]
TargetUp = '''max(max_over_time(target_up{ident=~"(%s)"}[%dm])) by (ident)'''
LoadPerCore = '''max(max_over_time(system_load_norm_1{ident=~"(%s)"}[%dm])) by (ident)'''
MemUtil = '''100-max(max_over_time(mem_available_percent{ident=~"(%s)"}[%dm])) by (ident)'''
{{- end -}}

View File

@@ -38,6 +38,10 @@ spec:
{{ toYaml .Values.nwebapi.podAnnotations | indent 8 }}
{{- end }}
spec:
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
containers:
- args:
- /app/n9e

View File

@@ -36,6 +36,10 @@ spec:
{{ include "nightingale.labels" . | indent 8 }}
component: prometheus
spec:
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
containers:
- args:
- --config.file=/etc/prometheus/prometheus.yml

26
tpl/README.md Normal file
View File

@@ -0,0 +1,26 @@
# 告警消息模版文件
模版中可以使用的变量参考`AlertCurEvent`对象
模版语法如何使用可以参考[html/template](https://pkg.go.dev/html/template)
## 如何在告警模版中添加监控详情url
假设web的地址是http://127.0.0.1:18000/, 实际使用时用web地址替换该地址
在监控模版中添加以下行:
* dingtalk / wecom / feishu
```markdown
[监控详情](http://127.0.0.1:18000/metric/explorer?promql={{ .PromQl | escape }})
```
* mailbody
```html
<tr>
<th>监控详情:</th>
<td>
<a href="http://127.0.0.1:18000/metric/explorer?promql={{ .PromQl | escape }}" target="_blank">点击查看</a>
</td>
</tr>
```

View File

@@ -162,6 +162,7 @@ redis:
addr: "192.168.0.2:6379"
sentinelMasterSet: ""
password: ""
mode: "standalone"
podAnnotations: {}
prometheus:
@@ -192,11 +193,14 @@ categraf:
automountServiceAccountToken: false
image:
repository: flashcatcloud/categraf
tag: v0.1.12
tag: latest
nodeSelector: {}
tolerations: []
affinity: {}
priorityClassName:
## Parm: categraf.internal.docker_socket Desc: the path of docker socket on kubelet node.
## "unix:///var/run/docker.sock" is default, if your kubernetes runtime is container or others, empty this variable.
## docker_socket: ""
docker_socket: unix:///var/run/docker.sock
external:
host: "192.168.0.3"
@@ -211,8 +215,8 @@ nwebapi:
serviceAccountName: ""
automountServiceAccountToken: false
image:
repository: docker.io/ulric2019/nightingale
tag: 5.9.4
repository: flashcatcloud/nightingale
tag: 5.10.3
nodeSelector: {}
tolerations: []
affinity: {}
@@ -229,8 +233,8 @@ nserver:
serviceAccountName: ""
automountServiceAccountToken: false
image:
repository: docker.io/ulric2019/nightingale
tag: 5.9.4
repository: flashcatcloud/nightingale
tag: 5.10.3
nodeSelector: {}
tolerations: []
affinity: {}