Compare commits

...

1178 Commits

Author SHA1 Message Date
Yening Qin
959b0389c6 refactor: udpate workflow (#3028) 2026-01-11 19:37:09 +08:00
ning
3d8f1b3ef5 refactor: update database init 2026-01-09 17:25:16 +08:00
ning
ce838036ad refactor: database init 2026-01-09 17:19:16 +08:00
ning
578ac096e5 docs: fix dash tpl 2026-01-09 15:45:51 +08:00
promalert
48ee6117e9 chore: execute goimports to format the code (#3031)
Signed-off-by: promalert <promalert@outlook.com>
2026-01-07 16:54:35 +08:00
ning
5afd6a60e9 update site info 2026-01-06 15:22:36 +08:00
ning
37372ae9ea refactor: notify channel weight 2026-01-06 10:56:36 +08:00
Snowykami
48e7c34ebf feat: notify channel support JIRA Issue and JSM Alert (#2989) 2026-01-06 10:31:02 +08:00
ning
acd0ec4bef refactor: update change-pass api 2026-01-05 19:35:32 +08:00
ning
c1ad946bc5 Merge branch 'main' of github.com:ccfos/nightingale 2026-01-05 18:02:27 +08:00
ning
4c2affc7da refactor: notify record add status code 2026-01-05 18:01:50 +08:00
SenCoder
273d282beb feat: alert eval sql support go template (#3024) 2026-01-05 15:47:55 +08:00
Yening Qin
3e86656381 feat: prom support mtls (#3029) 2026-01-05 10:40:07 +08:00
Yening Qin
f942772d2b refactor: builtin metrics add params (#3027) 2026-01-04 14:11:30 +08:00
ning
fbc0c22d7a Merge branch 'main' of github.com:ccfos/nightingale 2026-01-04 10:18:21 +08:00
ning
abd452a6df fix: migrate BuiltinPayload 2026-01-03 23:19:22 +08:00
Ulric Qin
47f05627d9 refactor the start command of nightingale 2026-01-03 08:30:21 +08:00
Ulric Qin
edd8e2a3db Merge branch 'main' of https://github.com/ccfos/nightingale 2026-01-03 08:20:17 +08:00
Ulric Qin
c4ca2920ef no need to init Admin operations 2026-01-03 08:20:10 +08:00
ning
afc8d7d21c fix: query data 2025-12-30 19:25:24 +08:00
ning
c0e13e2870 update ds perm check 2025-12-30 16:47:42 +08:00
ning
4f186a71ba fix: datasource delete 2025-12-30 16:35:36 +08:00
ning
104c275f2d refactor: doris datasource conf 2025-12-29 20:36:17 +08:00
pioneerlfn
2ba7a970e8 fix: doris exec sql timeout unit: s -> ms (#3019) 2025-12-29 16:08:45 +08:00
ning
c98241b3fd fix: search view api 2025-12-26 14:47:57 +08:00
ning
b30caf625b refactor: save view check name 2025-12-26 12:11:12 +08:00
SenCoder
32e8b961c2 refactor: add args parameter to CallIbex 2025-12-25 14:51:27 +08:00
ning
2ff0a8fdbb Merge branch 'main' of github.com:ccfos/nightingale 2025-12-25 14:42:33 +08:00
ning
7ff74d0948 fix: es query use ASCII control character as label separator to avoid truncation when user data contains -- 2025-12-25 14:42:16 +08:00
pioneerlfn
da58d825c0 refactor: doris add method showIndexes (#3011) 2025-12-25 11:48:29 +08:00
SenCoder
0014b77c4d refactor: change canDoIbex func to public (#3010) 2025-12-24 21:21:01 +08:00
Yening Qin
fc7fdde2d5 feat: support search view save (#3009) 2025-12-24 17:52:45 +08:00
pioneerlfn
61b63fc75c fix: doris query logs with interval (#3008) 2025-12-24 12:14:12 +08:00
pioneerlfn
80f564ec63 refactor: doris query data (#3003) 2025-12-22 16:04:36 +08:00
Ulric Qin
203c2a885b eval log: use Warn level when error message is not blank 2025-12-22 14:39:07 +08:00
ning
9bee3e1379 fix: vlogs query 2025-12-20 19:52:29 +08:00
ning
c214580e87 refactor: trim prom param 2025-12-18 19:45:06 +08:00
ning
f6faed0659 fix: webhook connection leak 2025-12-17 18:07:43 +08:00
ning
990819d6c1 fix: webhook connection leak 2025-12-17 18:01:28 +08:00
SenCoder
5fff517cce refactor: callibex return task Id (#2994) 2025-12-12 18:45:09 +08:00
ning
db1bb34277 refactor: index pattern delete check 2025-12-11 15:31:49 +08:00
ning
81e37c9ed4 refactor: removes invisible characters from user contacts 2025-12-11 14:52:57 +08:00
pioneerlfn
27ec6a2d04 fix: doris macro/time (#2990) 2025-12-11 12:27:54 +08:00
ning
372a8cff2f refactor: builtin tpl add 2025-12-08 16:53:58 +08:00
Yening Qin
68850800ed feat: support victorialogs alert (#2988) 2025-12-05 14:51:58 +08:00
Busyster996
717f7f1c4b docs: dockerfile add jinja2 (#2982) 2025-12-05 14:47:58 +08:00
Snowykami
82e1e715ad fix: remove elasticsearch version validate (#2986) 2025-12-05 14:47:10 +08:00
ning
d1058639fc fix: event concurrent map writes 2025-12-04 20:16:21 +08:00
ning
709eda93a8 fix: edge get datasource config 2025-12-03 11:41:20 +08:00
SenCoder
48e69449c5 fix: edge panic (#2983) 2025-12-01 21:47:08 +08:00
SenCoder
e5218bdba0 refactor: add config OpenRSA in Center.RSA. if open, settings/auth will encode in api "/v1/n9e/datasources" (#2981) 2025-11-29 13:33:21 +08:00
ning
543b334e64 refactor: update process handle log 2025-11-28 11:51:57 +08:00
ning
3644200488 Merge branch 'main' of github.com:ccfos/nightingale 2025-11-27 21:04:22 +08:00
ning
ceddf1f552 docs: update alert_rule model 2025-11-27 20:59:24 +08:00
SenCoder
faa4c4f438 refactor: es datasource QueryMapData (#2978) 2025-11-27 20:16:06 +08:00
laiwei
4f8b6157a3 Revise CCF ODTC and Nightingale project descriptions of ZH 2025-11-25 20:13:18 +08:00
ning
7fd7040c7f refactor: update gomod 2025-11-24 14:48:02 +08:00
jie210
7fa1a41437 feat: sso support dingtalk (#2968) 2025-11-24 14:19:23 +08:00
ning
f7b406078f refactor: user group api 2025-11-21 11:30:11 +08:00
dependabot[bot]
f6b10403d9 chore(deps): bump golang.org/x/crypto from 0.32.0 to 0.45.0 (#2966)
Bumps [golang.org/x/crypto](https://github.com/golang/crypto) from 0.32.0 to 0.45.0.
- [Commits](https://github.com/golang/crypto/compare/v0.32.0...v0.45.0)

---
updated-dependencies:
- dependency-name: golang.org/x/crypto
  dependency-version: 0.45.0
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-11-20 19:22:52 +08:00
cyberslack_lee
f4ce0bccfc chore: fix error typos (#2958) 2025-11-20 19:20:46 +08:00
Snowykami
f26ce4487d feat: notify config support target biz group filter (#2964) 2025-11-20 17:02:40 +08:00
Ulric Qin
9f31f3b57d update integrations: delete unnecessary docs 2025-11-17 16:04:37 +08:00
co63oc
c7a97a9767 fix nvidia_smi.toml (#2962) 2025-11-17 15:56:47 +08:00
Ulric Qin
f94068e611 Merge branch 'main' of https://github.com/ccfos/nightingale 2025-11-17 10:39:44 +08:00
Ulric Qin
2cd5edf691 fix typo in zookeeper dashboard 2025-11-17 10:39:30 +08:00
cyberslack_lee
0ffc67f35f chore: fix typos (#2951) 2025-11-14 22:16:45 +08:00
co63oc
6dc5ac47b7 chore: fix some words (#2952) 2025-11-14 22:14:58 +08:00
cyberslack_lee
2526440efa chore: fix typo error (#2950) 2025-11-14 22:13:30 +08:00
cyberslack_lee
2f8b8fad62 chore: fix typos (#2948) 2025-11-14 22:12:25 +08:00
ning
9c19201c13 Merge branch 'main' of github.com:ccfos/nightingale 2025-11-14 18:27:34 +08:00
ning
4758c14a46 refactor: validate query_configs length to prevent data truncation in recording_rule 2025-11-14 18:26:32 +08:00
Snowykami
2e54ab8c2f fix: builtin metrics pagination total (#2953) 2025-11-14 18:03:42 +08:00
ning
67f79c2f88 refactor: optimize init salt 2025-11-14 16:37:15 +08:00
ning
749ae70bd7 refactor: optimize db query 2025-11-14 16:25:31 +08:00
ning
e2dba9b3d3 Merge branch 'main' of github.com:ccfos/nightingale 2025-11-14 15:59:04 +08:00
ning
2228842b2f refactor: update migrate BuiltinPayloads 2025-11-14 14:59:49 +08:00
Snowykami
38fe37a286 feat: add secure connection for ClickHouse native and http (#2945) 2025-11-14 14:18:00 +08:00
ning
7daf1e8c43 Merge branch 'main' of github.com:ccfos/nightingale 2025-11-14 11:54:20 +08:00
ning
8706ded776 refactor: es query data time range 2025-11-14 11:51:19 +08:00
Snowykami
f637078dd9 fix: skip processing pagerduty_integration_ids in GetNotifyConfigParams (#2949) 2025-11-14 11:10:50 +08:00
ning
8aa7b1060d chore: fix typos 2025-11-14 11:09:23 +08:00
co63oc
18634a33b2 chore: fix some words (#2946) 2025-11-14 10:29:52 +08:00
cyberslack_lee
7ed1b80759 chore: fix typos (#2947) 2025-11-14 10:21:22 +08:00
ning
3d240704f6 Merge branch 'main' of github.com:ccfos/nightingale 2025-11-13 20:27:32 +08:00
ning
ce0322bbd7 refactor: update message tpl, add domain 2025-11-13 20:25:45 +08:00
ning
66f62ca8c5 docs: update message tpl 2025-11-13 19:39:36 +08:00
cyberslack_lee
d11d73f6bc chore: fix typos about HostUpdteTime (#2942) 2025-11-13 17:18:46 +08:00
cyberslack_lee
dee1fe2d61 chore: fix typos about NotificaitonRecord to NotificationRecord (#2943) 2025-11-13 17:12:43 +08:00
ning
b3da24f18a refactor: update builtin payload api 2025-11-13 16:54:14 +08:00
ning
29ea4f6ed2 refactor: board add note 2025-11-13 16:49:30 +08:00
co63oc
5272b11efc chore: fix typos (#2941) 2025-11-13 11:40:22 +08:00
Snowykami
c322601138 feat: clickhouse protocol support (#2938) 2025-11-13 11:37:53 +08:00
ning
f1357d6f33 Merge branch 'main' of github.com:ccfos/nightingale 2025-11-12 20:10:54 +08:00
co63oc
728d70c707 fix repo path (#2933) 2025-11-12 19:46:29 +08:00
ning
bf93932b22 refactor: update board api 2025-11-12 19:12:06 +08:00
co63oc
57581be350 chore: fix typos mesurement (#2934) 2025-11-12 16:25:22 +08:00
Snowykami
5793f089f6 feat: pagerduty support (#2930) 2025-11-12 16:24:19 +08:00
ning
fa49449588 fix: user phone encrypt 2025-11-11 16:47:49 +08:00
ning
876f1d1084 fix prom datasource update 2025-11-10 10:51:44 +08:00
ning
678830be37 refactor: add /builtin-component service api 2025-11-10 10:29:14 +08:00
ning
5e30f3a00d Merge branch 'main' of github.com:ccfos/nightingale 2025-11-06 01:39:58 +08:00
ning
7f1eefd033 fix: dsn 2025-11-05 15:08:08 +08:00
Wenyu Su
c8dd26ca4c fix(notify): http callback retry interval in millisecond (#2928) 2025-11-04 18:11:30 +08:00
ning
37c57e66ea refactor: optimize db dsn 2025-11-04 15:09:03 +08:00
ning
878e940325 docs: update migrate.sql 2025-11-03 19:16:00 +08:00
Ulric Qin
cbc715305d Merge branch 'main' of https://github.com/ccfos/nightingale 2025-10-31 18:35:39 +08:00
Ulric Qin
5011766c70 delete single config dir 2025-10-31 18:35:32 +08:00
Snowykami
b3ed8a1e8c fix: elasticsearch v9 range query compatibility (#2692) (#2922) 2025-10-31 15:05:45 +08:00
Ulric Qin
814ded90b6 remove no use configuration: EnableAutoMigrate and add etc/single/config.toml 2025-10-31 11:38:17 +08:00
ning
43e89040eb refactor: update oidc logout 2025-10-31 10:26:41 +08:00
710leo
3d339fe03c update issue-translator 2025-10-30 22:50:53 +08:00
710leo
7618858912 update issue-translator 2025-10-30 22:24:49 +08:00
710leo
15b4ef8611 update issue-translator 2025-10-30 22:19:20 +08:00
710leo
5083a5cc96 add issue-translator 2025-10-30 22:10:09 +08:00
Yening Qin
d51e83d7d4 feat: alert rule add event process (#2921) 2025-10-28 16:18:12 +08:00
ning
601d4f0c95 update built in 2025-10-28 15:42:37 +08:00
Ulric Qin
90fac12953 update readme 2025-10-26 22:25:16 +08:00
ning
19d76824d9 update user group api 2025-10-25 16:45:31 +08:00
ning
1341554bbc refactor: optimize push data log 2025-10-19 12:12:06 +08:00
Ulric Qin
fd3ce338cb Merge branch 'main' of https://github.com/ccfos/nightingale 2025-10-15 17:21:58 +08:00
Ulric Qin
b8f36ce3cb add more prometheus rule severity choices 2025-10-15 17:21:52 +08:00
ning
037112a9e6 refactor: update alert mute api 2025-10-15 15:19:01 +08:00
zjxpsetp
c6e75d31a1 update jmx dashboard to support multi gc mode 2025-10-15 12:41:01 +08:00
zjxpsetp
bd24f5b056 Merge remote-tracking branch 'origin/main'
# Conflicts:
#	integrations/Java/dashboards/jmx_by_kubernetes.json
2025-10-15 12:36:14 +08:00
zjxpsetp
89551c8edb update jmx dashboard to support multi gc mode 2025-10-15 12:29:11 +08:00
ning
042b44940d docs: update i18n 2025-10-14 17:38:25 +08:00
ning
8cd8674848 refactor: optimize alert mute match 2025-10-14 16:59:57 +08:00
ning
7bb6ac8a03 refactor: update alert mute sync 2025-10-14 16:45:15 +08:00
ning
76b35276af refactor: update event api 2025-10-13 20:24:52 +08:00
SaladDay
439a21b784 feat: add expired filter for alert mute rules (#2907) 2025-10-13 14:04:32 +08:00
Yening Qin
47e70a2dba fix: sql order (#2908) 2025-10-13 12:18:16 +08:00
Yening Qin
16b3cb1abc feat: support encrypt user phone (#2906)
* feature: add a new configuration option to encrypt user phone numbers in the database (#2902)

Co-authored-by: yuanzaiping_dxm <yuanzaiping@duxiaoman.com>
---------

Co-authored-by: zaipingY <30775871+zaipingy@users.noreply.github.com>
Co-authored-by: yuanzaiping_dxm <yuanzaiping@duxiaoman.com>
2025-10-11 14:37:14 +08:00
ning
32995c1b2d fix: event pipeline insert by PostgreSQL 2025-09-23 16:40:27 +08:00
ning
b4fa36fa0e refactor: update message tpl 2025-09-22 18:18:19 +08:00
ning
f412f82eb8 fix: event value is inf 2025-09-19 19:32:40 +08:00
ning
9da1cd506b fix: datasource sync 2025-09-19 17:41:57 +08:00
ning
99ea838863 refactor: optimize query target 2025-09-19 15:00:07 +08:00
ning
7feb003b72 refactor: change feishucard body 2025-09-19 10:37:58 +08:00
ning
b0a053361f refactor: change log 2025-09-17 20:28:22 +08:00
ning
959f75394b refactor: alert rule api 2025-09-17 12:10:43 +08:00
ning
03e95973b2 refactor: message tpl api 2025-09-16 20:13:25 +08:00
ning
e890705167 refactor: event notify 2025-09-16 19:24:05 +08:00
ning
6716f1bdf1 refactor: update event notify 2025-09-15 20:19:18 +08:00
ning
739b9406a4 Merge branch 'main' of github.com:ccfos/nightingale 2025-09-15 17:37:38 +08:00
ning
77f280d1cc fix: event delete api 2025-09-15 17:37:25 +08:00
pioneerlfn
04fe1b9dd6 for slice, when marshal, return [] instead null (#2878) 2025-09-15 17:28:57 +08:00
ning
552758e0e1 refactor: pushgw writer support async 2025-09-14 15:00:40 +08:00
ning
68bc474c1b refactor: update message tpl 2025-09-11 20:38:17 +08:00
ning
f692035deb refactor: change datasource log 2025-09-11 16:14:59 +08:00
ning
eb441353c3 refactor: message tpl 2025-09-11 15:33:02 +08:00
ning
b606b22ae6 fix: opensearch alert 2025-09-10 22:24:50 +08:00
ning
1de0428860 docs: update init.sql 2025-09-09 18:51:23 +08:00
ning
3d0c288c9f refactor: event api 2025-09-09 16:36:40 +08:00
ning
343814a802 refactor: notify rule update 2025-09-09 11:05:58 +08:00
ning
12e2761467 fix: dingtalk message tpl 2025-09-09 10:18:08 +08:00
Yening Qin
0edd5ee772 refactor: event notify (#2869) 2025-09-08 15:04:33 +08:00
ning
5e430cedc7 fix: build, router_target miss idents 2025-09-03 14:17:15 +08:00
ning
a791a9901e refactor: push ts to kafka 2025-09-03 12:17:32 +08:00
totoro
222cdd76f0 refactor : es support search_after (#2859) 2025-09-03 12:12:07 +08:00
arch3754
ed4e3937e0 feat: add target update (#2853) 2025-09-03 12:05:38 +08:00
Yening Qin
60f9e1c48e refactor: dscache sync add datasource process hook (#2792) 2025-09-02 18:12:36 +08:00
Ulric Qin
276dfe7372 update linux dash 2025-09-02 08:57:36 +08:00
Ulric Qin
4a6dacbe30 add host table ng 2025-09-02 08:54:54 +08:00
Ulric Qin
48eebba11a update linux dashboard 2025-09-02 08:52:58 +08:00
ning
eca82e5ec2 change ops update 2025-09-01 14:24:17 +08:00
Yening Qin
21478fcf3d fix: send http notify retry (#2849) 2025-08-30 01:57:32 +08:00
ulricqin
a87c856299 fix: call flashduty to push event (#2848) 2025-08-30 00:06:53 +08:00
ning
ba035a446d refactor: change some log 2025-08-29 16:32:57 +08:00
ning
bf840e6bb2 docs: update dashboard tpl 2025-08-29 10:14:59 +08:00
ning
cd01092aed refactor: update alert rule import api 2025-08-28 19:44:05 +08:00
ning
e202fd50c8 refactor: datasource api 2025-08-28 16:48:04 +08:00
ning
f0e5062485 refactor: optimize edge ident update 2025-08-28 16:24:33 +08:00
ning
861fe96de5 add UpdateDBTargetTimestampDisable 2025-08-27 19:12:56 +08:00
Ulric Qin
5b66ada96d Merge branch 'main' of https://github.com/ccfos/nightingale 2025-08-27 10:06:19 +08:00
Ulric Qin
d5a98debff upgrade ibex 2025-08-27 10:06:11 +08:00
ning
4977052a67 Merge branch 'main' of github.com:ccfos/nightingale 2025-08-22 15:03:17 +08:00
ning
dcc461e587 refactor: push writer support sync 2025-08-22 15:00:49 +08:00
Ulric Qin
f5ce1733bb update minio dashboard 2025-08-21 18:58:47 +08:00
Ulric Qin
436cf25409 Merge branch 'main' of https://github.com/ccfos/nightingale 2025-08-21 10:34:56 +08:00
Ulric Qin
038f68b0b7 add minio dashboard for new version 2025-08-21 10:34:50 +08:00
ning
96ef1895b7 refactor: event_script_notify_result log add stdin 2025-08-20 14:24:28 +08:00
zjxpsetp
eeaa7b46f1 update es dashboard for categraf version bigger than 0.3.102 2025-08-19 23:43:54 +08:00
zjxpsetp
dc525352f1 Merge remote-tracking branch 'origin/main' 2025-08-19 17:36:12 +08:00
zjxpsetp
98a3fe9375 update jmx dashboard in kubernetes 2025-08-19 17:35:55 +08:00
ning
74b0f802ec Merge branch 'main' of github.com:ccfos/nightingale 2025-08-18 11:19:07 +08:00
ning
85bd3148d5 refactor: add update db metric 2025-08-18 11:18:52 +08:00
ning
0931fa9603 fix: target update ts 2025-08-18 11:18:39 +08:00
zjxpsetp
65cdb2da9e 更新 jmx 的仪表盘,新的jmx Exporter 指标和之前有一些差别 2025-08-17 17:23:39 +08:00
ning
9ad6514af6 refactor: ds query api 2025-08-13 11:50:01 +08:00
ning
302c6549e4 refactor: ds query api 2025-08-13 11:12:57 +08:00
ning
a3122270e6 refactor: ds query api 2025-08-13 11:02:00 +08:00
Yening Qin
1245c453bb refactor: send flashduty (#2824) 2025-08-12 20:55:23 +08:00
Ulric Qin
9c5ccf0c8f fix: update update_at when batch-updating-rules 2025-08-06 20:21:57 +08:00
Ulric Qin
cd468af250 refactor batch updating rules 2025-08-06 17:16:02 +08:00
Ulric Qin
2d3449c0ec code refactor for batch updating 2025-08-06 15:45:36 +08:00
ning
e15bdbce92 refactor: optimize import prom alert rule 2025-08-06 11:09:53 +08:00
ning
3890243d42 fix: new mysql db client 2025-08-04 18:40:04 +08:00
ning
37fb4ee867 add case-insensitive search for builtin payload filtering 2025-08-04 16:31:28 +08:00
ning
6db63eafc1 refactor: change import prom rule 2025-08-01 19:00:06 +08:00
ning
1e9cbfc316 fix: event query log 2025-08-01 16:47:14 +08:00
ning
4f95554fe3 refactor: update msg tpl 2025-07-31 18:08:12 +08:00
ning
8eba9aa92f refactor: update msg tpl 2025-07-31 15:31:29 +08:00
ning
6ba74b8e21 fix: pgsql cross database query 2025-07-31 11:51:27 +08:00
ning
8ea4632681 refactor: update duty user sync 2025-07-28 14:36:08 +08:00
ning
f958f27de1 fix: AlertRuleExists 2025-07-27 13:28:46 +08:00
ning
1bdfa3e032 refactor: update TargetDel 2025-07-27 12:46:22 +08:00
ning
143880cd46 Merge branch 'main' of github.com:ccfos/nightingale 2025-07-25 13:21:55 +08:00
ning
38f0b4f1bb refactor: modify add loki api resp 2025-07-25 13:01:27 +08:00
dependabot[bot]
2bccd5be99 build(deps): bump golang.org/x/oauth2 from 0.23.0 to 0.27.0 (#2793)
Bumps [golang.org/x/oauth2](https://github.com/golang/oauth2) from 0.23.0 to 0.27.0.
- [Commits](https://github.com/golang/oauth2/compare/v0.23.0...v0.27.0)

---
updated-dependencies:
- dependency-name: golang.org/x/oauth2
  dependency-version: 0.27.0
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-07-24 21:21:29 +08:00
ning
7b328b3eaa refactor: update prom rule import 2025-07-24 21:17:12 +08:00
Ulric Qin
8bd5b90e94 fix: support old rule format when importing 2025-07-23 09:36:28 +08:00
ning
96629e284f refactor: add email notify record 2025-07-17 11:45:58 +08:00
ning
67d2875690 fix: update record rule datasource ids api 2025-07-15 16:29:30 +08:00
ning
238895a1f8 refactor: init tpl 2025-07-15 15:52:38 +08:00
ning
fb341b645d refactor: sub alert add host filter 2025-07-15 14:46:09 +08:00
Haobo Zhang
2d84fd8cf3 fix: ai summary customize parameter parse from interface (#2788) 2025-07-14 14:54:15 +08:00
ning
2611f87c41 refactor: drop builtin_components idx_ident 2025-07-11 19:12:24 +08:00
ning
a5b7aa7a26 refactor: drop builtin_components idx_ident 2025-07-11 18:57:03 +08:00
ning
0714a0f8f1 refactor: change log level 2025-07-11 16:41:14 +08:00
ning
063cc750e1 refactor: update notify channel api 2025-07-11 12:25:08 +08:00
ning
b2a912d72f refactor: log level 2025-07-11 12:06:18 +08:00
ning
4ba745f442 fix: alert rule batch update notify rule 2025-07-11 11:38:09 +08:00
smx_Morgan
fa7d46ecad fix: compatible user_token table with postgresql (#2785) 2025-07-10 11:03:33 +08:00
pioneerlfn
a5a43df44f refactor: doris search sql (#2778)
* doris:support search sql with macro

* Update doris.go

---------

Co-authored-by: Yening Qin <710leo@gmail.com>
2025-07-09 21:33:17 +08:00
smx_Morgan
fbf1d68b84 fix: update postgresql init sql (#2784) 2025-07-09 20:53:56 +08:00
ulricqin
ca712f62a4 fix execution of notify script (#2769) 2025-07-06 08:40:13 +08:00
ulricqin
84ee14d21e add img (#2767) 2025-07-03 19:48:39 +08:00
ning
c9cf1cfdd2 refactor: change alert rule update api 2025-07-02 20:32:56 +08:00
ning
9d1c01107f refactor: builtin tpl 2025-07-02 20:12:43 +08:00
ning
7ea31b5c6d refactor: builtin tpl 2025-07-02 19:37:25 +08:00
ning
e8e1c67cc8 refactor: event notify filter 2025-07-02 15:44:02 +08:00
ning
8079bcd288 docs: enbale token auth 2025-07-01 18:38:28 +08:00
ning
33b178ce82 refactor: roles api 2025-07-01 15:37:42 +08:00
ning
28c9cd7b43 refactor: change eval for duration check 2025-06-30 15:10:20 +08:00
ning
b771e8a3e8 refactor: change eval for duration check 2025-06-30 12:06:48 +08:00
Yening Qin
4945e98200 refactor: builtin tpl gets api (#2760) 2025-06-27 19:45:28 +08:00
ning
a938ea3e56 docs: update migrate.sql 2025-06-26 18:44:55 +08:00
ning
25c339025b Merge branch 'main' of github.com:ccfos/nightingale 2025-06-25 18:05:46 +08:00
ning
bb0ee35275 refactor: optimize notify rule check api 2025-06-25 18:05:34 +08:00
Ulric Qin
0fc54ad173 add some icon 2025-06-25 16:57:59 +08:00
Ulric Qin
1f95e2df94 Merge branch 'main' of https://github.com/ccfos/nightingale 2025-06-25 16:52:00 +08:00
Ulric Qin
d2969f34ef update READE. set en version as default 2025-06-25 16:51:52 +08:00
Yening Qin
d9a34959dc feat: support doris and opensearch alert (#2758) 2025-06-25 16:34:08 +08:00
smx_Morgan
bc6ff7f4ba fix : add offset to es date_histogam (#2757) 2025-06-25 15:37:51 +08:00
Asklv
514913a97a feat: support deletion in datasource series. (#2753) 2025-06-23 19:20:50 +08:00
ning
affc610b7b refactor: change rulename tag handling in alert processing 2025-06-23 16:04:37 +08:00
ning
a098d5d39c refactor: update subscribe api 2025-06-20 18:01:01 +08:00
ning
05c3f1e0e4 refactor: update message tpl 2025-06-20 16:31:30 +08:00
ning
d5740164f2 refactor: update mute tryrun api 2025-06-20 14:42:10 +08:00
ning
8c2383c410 refactor: api add i18n 2025-06-20 14:31:03 +08:00
ning
9af024fb99 refactor: api add i18n 2025-06-20 14:23:03 +08:00
ning
12f3cc21e1 refactor: change rule test api 2025-06-19 16:18:30 +08:00
Asklv
0b3bb54eb4 feat: add time cost in alert history. (#2744)
Signed-off-by: Asklv <boironic@gmail.com>
2025-06-18 21:57:53 +08:00
Yening Qin
da813e2b0c refactor: optimize event notify (#2750) 2025-06-18 18:39:33 +08:00
Ulric Qin
50fa2499b7 add some alert_eval log 2025-06-18 15:15:56 +08:00
Ulric Qin
2c5ae5b3a9 delete some info log 2025-06-18 15:14:58 +08:00
ning
522932aeb4 refactor: api auth check 2025-06-17 20:57:32 +08:00
Yening Qin
35ac0ddea5 fix: api for agent auth (#2749) 2025-06-17 20:52:55 +08:00
ning
26fa750309 refactor: event process test api 2025-06-17 11:58:39 +08:00
710leo
1eba607aeb feat: add install date api 2025-06-16 22:46:47 +08:00
ning
6aadd159af fix: optimize api for agent auth 2025-06-16 20:22:24 +08:00
xtan
b6ad87523e feat: support redis password encryption (#2739) 2025-06-16 20:11:28 +08:00
Yening Qin
ea5b6845de refactor: optimize event processor (#2742) 2025-06-16 16:46:53 +08:00
Yening Qin
5ba5096da2 feat: add mute and sub rule tryrun api (#2737) 2025-06-13 18:08:46 +08:00
Yening Qin
85786d985d feat: add ai summary event processor (#2734)
Co-authored-by: Haobo Zhang <43698160+haobo8@users.noreply.github.com>
2025-06-12 11:33:59 +08:00
Yening Qin
cff211364a feat: support postgresql alert (#2732) 2025-06-11 17:43:34 +08:00
Ulric Qin
0190b2b432 Merge branch 'main' of https://github.com/ccfos/nightingale 2025-06-11 11:50:47 +08:00
Ulric Qin
d8081129f1 replace blank in append_tags 2025-06-11 11:50:37 +08:00
ning
66d4d0c494 refactor: event api perm check 2025-06-11 11:37:18 +08:00
ning
d936d57863 refactor: event api perm check 2025-06-11 11:30:27 +08:00
ning
d819691b78 refactor: change event processor api log 2025-06-10 16:54:52 +08:00
ning
6f0b415821 refactor: mysql set default maxQueryRows 2025-06-09 17:36:33 +08:00
ning
f482efd9ce refactor: add alert rule func 2025-06-09 10:13:14 +08:00
ning
b39d5a742e refactor: event pipline tryrun api 2025-06-08 23:03:03 +08:00
ning
59c3d62c6b refactor: mysql datasource param 2025-06-06 19:23:49 +08:00
ning
624ae125d5 Merge branch 'main' of github.com:ccfos/nightingale 2025-06-06 19:08:10 +08:00
ning
b9c822b220 refactor: mysql datasource param 2025-06-06 19:07:42 +08:00
smx_Morgan
c13baf3a9d refactor : add smtp notify test (#2723) 2025-06-06 18:07:49 +08:00
ning
bc46ff1912 fix: original_tags is nil 2025-06-06 17:38:27 +08:00
ning
2f7c76c275 refactor: message tpl add 2025-06-06 15:45:25 +08:00
Yening Qin
1edf305952 feat: support mysql alert (#2725) 2025-06-06 15:26:22 +08:00
Ulric Qin
c026a6d2b2 update README 2025-06-06 08:47:19 +08:00
smx_Morgan
1853e89f7c feat: add alert history events delete api (#2720) 2025-06-05 19:02:46 +08:00
zjxpsetp
a41a00fba3 Merge remote-tracking branch 'origin/main' 2025-06-05 00:00:36 +08:00
zjxpsetp
ceb9a1d7ff update JAVA for jvm dashboard by opentelementry 2025-06-04 23:58:26 +08:00
710leo
0b5223acdb docs: update postgres sql 2025-06-04 23:02:30 +08:00
710leo
4b63c6b4b1 refactor: change event_pipeline column type 2025-06-04 22:51:19 +08:00
zjxpsetp
edd024306a update JAVA for jvm dashboard by opentelementry 2025-06-03 23:43:35 +08:00
ning
cddf5e7d37 refactor: event list api 2025-06-03 18:59:25 +08:00
ning
f07baa276e docs: update sql 2025-06-03 18:54:04 +08:00
Ulric Qin
2c2d5004f4 Merge branch 'main' of https://github.com/ccfos/nightingale 2025-06-03 11:27:44 +08:00
Ulric Qin
9982666e44 update dashboard 2025-06-03 11:27:36 +08:00
ning
2b448f738c refactor: change role ops 2025-06-02 09:34:20 +08:00
ning
e4c258de8e refactor: change user ops 2025-06-02 09:24:37 +08:00
Ulric Qin
4f128a9b44 rename Null to Others in active events page 2025-05-30 12:19:09 +08:00
Ulric Qin
deb85b9c68 update README img 2025-05-30 08:51:00 +08:00
ning
1b84324147 refactor: rm blockEventNotify 2025-05-29 21:49:57 +08:00
ning
c73b66848e fix: cur event api 2025-05-29 20:30:39 +08:00
ning
cd74442819 refactor: add UpdateBy field assignment in alertMuteAdd function 2025-05-29 17:15:58 +08:00
ning
252a8284f9 refactor: update datasource 2025-05-29 11:02:47 +08:00
ning
7d2e998078 refactor: merge 2025-05-29 10:30:54 +08:00
Yening Qin
69582bacdf feat: add source token api 2025-05-29 10:05:49 +08:00
ning
1bede4eeb8 refactor: build event tags 2025-05-28 16:09:53 +08:00
ning
16ed81020a Merge branch 'main' of github.com:ccfos/nightingale 2025-05-28 14:15:37 +08:00
ning
7b020ae238 refactor: datasource init add recover 2025-05-28 14:15:21 +08:00
ning
05eabcf00d refactor: handle ibex 2025-05-28 14:15:07 +08:00
ning
e316842022 fix: ibex after event relabel 2025-05-28 14:14:51 +08:00
Ulric Qin
8b3c4749aa Merge branch 'main' of https://github.com/ccfos/nightingale 2025-05-27 20:17:37 +08:00
Ulric Qin
16be04c3e9 use s3 as default card severity 2025-05-27 20:17:30 +08:00
ning
ccbadba9ff refactor: update send duty 2025-05-27 20:07:28 +08:00
ning
ce5bf2e473 refactor: event processor 2025-05-27 19:44:31 +08:00
Ulric Qin
80cdf9d0bb if eventcard.Severity < 1: set it to 1 2025-05-27 19:34:03 +08:00
ning
7514086ae6 fix: different notify channel use notify script 2025-05-27 14:27:00 +08:00
ning
116f8b1590 Merge branch 'main' of github.com:ccfos/nightingale 2025-05-27 14:14:31 +08:00
ning
0fb4e4b723 refactor: add eval duration 2025-05-27 14:14:16 +08:00
710leo
07fb427eea refactor: update relabel processor 2025-05-26 23:40:27 +08:00
ulricqin
d8f8fed95f Update README.md 2025-05-26 10:21:33 +08:00
ulricqin
f2e0ec10f7 更新 README.md 2025-05-25 13:09:37 +08:00
ulricqin
db467a8811 更新 README.md 2025-05-25 13:05:03 +08:00
Ulric Qin
b839bd3e16 code refactor 2025-05-24 21:45:47 +08:00
Ulric Qin
8033ca590b update README 2025-05-24 21:20:06 +08:00
Ulric Qin
0974f33d16 update README 2025-05-23 19:05:32 +08:00
Ulric Qin
d52a19b1f7 update README 2025-05-23 18:59:18 +08:00
Ulric Qin
f11c4dc87d Merge branch 'main' of https://github.com/ccfos/nightingale 2025-05-23 18:57:22 +08:00
Ulric Qin
d7f3bc8841 update README 2025-05-23 18:57:05 +08:00
ning
2ae8c35a50 refactor: change notify rule list sort 2025-05-23 16:00:27 +08:00
ning
da0697c5ce refactor: event api and event processors 2025-05-23 15:45:09 +08:00
ning
2eff1159e5 refactor: event add notify rule ids 2025-05-23 13:19:48 +08:00
ning
6c19c0adf4 refactor: update AlertCurEvent 2025-05-22 19:28:35 +08:00
ning
5e5525ef57 refactor: update AlertCurEvent 2025-05-22 19:00:57 +08:00
ning
58c2a3cc71 update event db2fe 2025-05-22 17:11:11 +08:00
ning
cef6d5fe49 refactor: alert_aggr_view delete format 2025-05-22 16:32:15 +08:00
ulricqin
49cda8b58a modify alerting aggr verify rules (#2694) 2025-05-22 15:45:36 +08:00
ning
d6a585ccbd refactor: update cur event api 2025-05-21 20:29:50 +08:00
ning
764c254833 fix: AlertAggrView update 2025-05-21 20:11:13 +08:00
ning
c427abdfa3 fix: AlertAggrView update 2025-05-21 20:04:28 +08:00
shardingHe
3749f62adc docs: add config for ntp (#2690) 2025-05-21 16:25:21 +08:00
Yening Qin
f932f93a94 feat: add new processors (#2688) 2025-05-20 18:27:07 +08:00
smx_Morgan
5bbc432db0 feat : add event_Ids to alert-cur-events/list (#2681) 2025-05-20 15:55:45 +08:00
Yening Qin
0712baa6e1 refactor: change TimeSpanMuteStrategy (#2686) 2025-05-20 15:51:37 +08:00
ning
b4d595d5f5 docs: update ops 2025-05-19 17:40:56 +08:00
Yening Qin
95090055e0 refactor: change redis cli timeout (#2684) 2025-05-19 11:12:46 +08:00
smx_Morgan
880b92bf36 fix: telegram notify channel template (#2683) 2025-05-17 21:42:41 +08:00
Yening Qin
744eb44f19 feat: add event pipelines (#2682) 2025-05-16 14:50:13 +08:00
Ulric Qin
6ddc78ea11 refactor n9e-v8 dashboard 2025-05-15 09:56:47 +08:00
Ulric Qin
823568081b update n9e-v8 dashboard 2025-05-15 08:42:56 +08:00
Ulric Qin
2f8e63f821 add some metrics to observe redis operations 2025-05-15 08:27:39 +08:00
Ulric Qin
bdc9fa4638 update target's update_at one by one 2025-05-15 08:01:11 +08:00
Ulric Qin
9e1d69c8b0 refactor pushgw metrics 2025-05-15 07:52:39 +08:00
Ulric Qin
85d8607be8 add some panel for n9e-v8 dashboard 2025-05-15 07:31:27 +08:00
Ulric Qin
ec6a4f134a update target's timestamp in redis support batch 2025-05-15 06:21:29 +08:00
Ulric Qin
798f9e5536 Merge branch 'main' of https://github.com/ccfos/nightingale 2025-05-15 04:43:39 +08:00
Ulric Qin
92095ea89c fix categraf-detail-dashboard: add filter for promql 2025-05-15 04:43:33 +08:00
Yening Qin
eb85c9c78b feat: add alert mute test function 2025-05-14 21:08:26 +08:00
Ulric Qin
bd8bf1cf9e use topk in linux-overview dashboard 2025-05-14 15:53:09 +08:00
Ulric Qin
b27ddf45cf Merge branch 'main' of https://github.com/ccfos/nightingale 2025-05-14 15:17:10 +08:00
Ulric Qin
c8e004ba51 update n9e_v8 dashboard 2025-05-14 15:16:54 +08:00
Yening Qin
eb330f00b2 feat: embedded product api (#2671) 2025-05-14 14:55:50 +08:00
Yening Qin
49d61bbd5d refactor: merge to main (#2670) 2025-05-14 14:46:05 +08:00
Ulric Qin
407a1b61a5 refactor linux dashboard 2025-05-14 11:58:28 +08:00
Ulric Qin
bc8a6f61be refactor node-exporter dashboard 2025-05-14 11:48:23 +08:00
Ulric Qin
94cd9796bf rename some dashboards of Linux 2025-05-13 20:45:24 +08:00
Ulric Qin
c3ee0143b2 refactor os dashboards 2025-05-13 20:43:14 +08:00
Ulric Qin
10d4faae4e refactor os dashboard 2025-05-13 20:21:38 +08:00
Yening Qin
ffac81a2ef fix: alert rule verify (#2668) 2025-05-13 18:53:08 +08:00
Yening Qin
d8d1a454b3 fix: default ds id update (#2664) 2025-05-13 15:39:42 +08:00
Yening Qin
94f9818fd2 docs: update k8s dashboards and fix alert rule name check (#2663) 2025-05-13 14:59:38 +08:00
Asklv
a5d820ddb3 fix: api panic when gomail dial tcp failed. (#2661) 2025-05-12 20:08:37 +08:00
smx_Morgan
da0224d010 fix: Solved the problem of NaN value of prom not parsing json (#2652) 2025-05-12 18:28:57 +08:00
Yening Qin
4a399a23c0 refactor: change log query api 2025-05-12 15:42:18 +08:00
Ulric Qin
95ecc61834 refactor ops and i18n 2025-05-08 18:43:57 +08:00
Ulric Qin
f72e29677f refactor test case 2025-05-08 17:41:28 +08:00
Ulric Qin
f876eb02e2 fix multi role_operation 2025-04-28 17:20:58 +08:00
Ulric Qin
cdcadefb03 Merge branch 'main' of https://github.com/ccfos/nightingale 2025-04-28 17:19:17 +08:00
Ulric Qin
582a3981fb delete Admon role_operation 2025-04-28 17:19:03 +08:00
smx_Morgan
8081c48450 fix :record rule name change is not synchronized (#2636) 2025-04-28 15:14:37 +08:00
Yening Qin
5e7541215a refactor: es add offset query and add es-index-pattern ops 2025-04-27 18:54:36 +08:00
ning
e95b5428b2 Merge branch 'main' of github.com:ccfos/nightingale 2025-04-25 23:32:39 +08:00
ning
8a47088d97 refactor: update datasource api 2025-04-25 23:32:23 +08:00
Ulric Qin
05ba5caf8a code refactor 2025-04-25 19:21:57 +08:00
Ulric Qin
dc7752c2af code refactor 2025-04-25 19:18:39 +08:00
smx_Morgan
a828603406 Fix: Fixed the issue of group synchronization flashduty (#2628) 2025-04-25 15:34:08 +08:00
Yening Qin
c5c4e00ab8 refactor: change query api (#2626) 2025-04-24 14:20:36 +08:00
ning
770e15db39 refactor: datasource model add identifier 2025-04-23 16:13:36 +08:00
ning
5096117b45 refactor: update api for agent auth 2025-04-23 15:53:40 +08:00
Yening Qin
dd3b68e4ab refactor: change notify channel api auth 2025-04-23 15:47:20 +08:00
Yening Qin
85947c08a8 refactor: sync user info to duty (#2615) 2025-04-18 17:07:54 +08:00
Ulric Qin
3f3c815171 set QueueWaterMark to 0.1 2025-04-17 19:30:21 +08:00
smx_Morgan
08f82e899a feat: support user get by emails and phones (#2613) 2025-04-16 19:07:07 +08:00
bowen
043628d4eb feat: add usernames query param for /api/n9e/users 2025-04-16 12:10:39 +08:00
smx_Morgan
ba33512d22 fix: prevent incorrect board matches caused by implicit type casting 2025-04-15 13:21:58 +08:00
YR Chen
a7cf658c1d refactor: allow valid Go template as rule name (#2549)
* fix: allow valid Go template as rule name

* feat: add back `str.Dangerous` check for texts in template

* Update models/alert_rule.go

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

---------

Co-authored-by: Yening Qin <710leo@gmail.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2025-04-14 18:28:42 +08:00
smx_Morgan
b62e6fda04 feat: return value add "avtivate" when get alert-mute (#2599) 2025-04-14 18:09:32 +08:00
Yening Qin
6243f9a05c refactor: update es event index (#2602) 2025-04-14 14:46:15 +08:00
Yening Qin
e8962b5646 refactor: change query tpl func (#2597) 2025-04-11 16:10:25 +08:00
ning
97a4ee2764 es_index_pattern add note 2025-04-10 17:28:06 +08:00
rechardwang
2fdb80f314 docs: add chinese verion rabbitmq dashboard 2025-04-08 22:07:24 +08:00
710leo
c0ab672cf7 change createTokens 2025-04-03 00:19:33 +08:00
Yening Qin
7664c15121 refactor: change get ids (#2583) 2025-04-02 18:50:01 +08:00
Ulric Qin
4059a2022c add logic: SingleLogin 2025-04-02 15:10:31 +08:00
smx_Morgan
e7263680a8 refactor: targets get api 2025-04-01 20:56:40 +08:00
alingse
4a67f7a108 fix: call errors.WithMessage with a nil value error err after check another error decryptErr (#2572) 2025-04-01 17:33:25 +08:00
ning
04ca6c5fd5 fix: auto migrate add cross_cluster_enabled of es_index_pattern 2025-03-27 15:45:47 +08:00
Ulric Qin
747211c78f fix builtin node-exporter-alerting-rules 2025-03-27 08:35:56 +08:00
ning
bf54fac1e8 refactor: change send script notify log 2025-03-26 16:09:35 +08:00
ning
76117ae440 refactor: change send script notify log 2025-03-26 16:00:02 +08:00
Yening Qin
9ad02075c6 refactor: es query add IgnoreUnavailable(true) (#2566) 2025-03-26 11:05:45 +08:00
ning
6d27ff673f docs: add migrate sql 2025-03-25 15:15:53 +08:00
kugarocks
ee4e2b3f7d chore: delete rule worker severity var (#2539) 2025-03-21 18:54:26 +08:00
smx_Morgan
e6de301c65 feat: verify host before add task (#2544)
Co-authored-by: Yening Qin <710leo@gmail.com>
2025-03-21 18:48:06 +08:00
Ulric Qin
d4f5871fba Merge branch 'main' of github.com:ccfos/nightingale 2025-03-21 18:39:05 +08:00
Ulric Qin
c2e61f3741 delete no use configurations in docker directory 2025-03-21 18:38:46 +08:00
Yening Qin
d26df3b331 refactor: es query add log (#2560) 2025-03-21 18:37:45 +08:00
Yening Qin
391c674d21 refactor: add metrics and change crontab (#2559) 2025-03-21 16:29:08 +08:00
Ulric Qin
b95457ee9c update n9e-v8 dashboard 2025-03-20 19:06:43 +08:00
Ulric Qin
09179b004c refactor pushgw stats 2025-03-20 18:54:23 +08:00
Ulric Qin
274de9b994 delete no use code 2025-03-20 18:42:06 +08:00
Ulric Qin
7fcb9f7e4a add n9e.v8 dashboard 2025-03-20 18:38:06 +08:00
Ulric Qin
06ca3c2579 refactor default settings of QueueNumber 2025-03-20 17:25:17 +08:00
Ulric Qin
68509a9ed4 refactor default settings of QueueNumber 2025-03-20 17:20:35 +08:00
Ulric Qin
ea88def18c refactor configurations 2025-03-20 17:16:52 +08:00
Ulric Qin
a22fded16f refactor pushgw queue logic 2025-03-20 17:12:13 +08:00
Ulric Qin
490dc62dad fix: init pushgw http transport 2025-03-20 13:43:58 +08:00
Ulric Qin
47dbe5f2e2 Merge branch 'main' of github.com:ccfos/nightingale 2025-03-20 11:41:46 +08:00
Ulric Qin
596ee8b26d refactor writer http transport initilization 2025-03-20 11:41:19 +08:00
Yening Qin
677bf50293 refactor: change migrate bg (#2550) 2025-03-18 23:16:03 +08:00
ning
99cc397290 fix: email notify channel send muti people 2025-03-17 15:42:09 +08:00
ning
938299a539 fix: edge panic where query with var 2025-03-17 15:20:07 +08:00
ning
f44964c876 refactor: change notify rule test api 2025-03-17 14:45:36 +08:00
ning
f284baf139 feat: add callback notify channel 2025-03-17 14:37:06 +08:00
Yening Qin
17495c8e01 feat: add slack notify channel (#2543) 2025-03-17 12:22:17 +08:00
ning
58100f9924 Merge branch 'main' of github.com:ccfos/nightingale 2025-03-14 17:37:11 +08:00
ning
13a7d64499 docs: init sql add index 2025-03-14 17:36:58 +08:00
kugarocks
94102e8fbc typo: rename relaodTpls to reloadTpls (#2541) 2025-03-14 17:18:07 +08:00
smx_Morgan
2d6e066d54 feat: allow clone alert rules (#2542) 2025-03-14 17:06:46 +08:00
Ulric Qin
a553aa5f78 refactor pushgw: add more queue for metric prefix 2025-03-14 17:03:50 +08:00
ning
4a50ae9ef1 add comment 2025-03-14 12:27:13 +08:00
ning
a86f5d7996 refactor: change queue full log 2025-03-14 11:54:25 +08:00
小炒肉
728af57d8e add feishuapp notify channel (#2537)
Co-authored-by: zhihuanzhu <zhihuanzhu@deeproute.ai>
Co-authored-by: Yening Qin <710leo@gmail.com>
2025-03-13 19:47:38 +08:00
ning
5c02fc64b8 fix: get user contacts 2025-03-13 12:15:52 +08:00
ning
d890476e5a docs: update go.mod 2025-03-13 10:29:15 +08:00
ning
c2af8b1064 refactor: delete script stdin json.sendto 2025-03-12 20:08:28 +08:00
ning
e64629dafd Merge branch 'main' of github.com:ccfos/nightingale 2025-03-12 19:57:41 +08:00
ning
9bcddf3457 fix ali sms notify 2025-03-12 19:57:27 +08:00
ningblue
2ea820645a docs: update migrate.sql (#2535) 2025-03-12 16:36:45 +08:00
Ulric Qin
70b7ed35b4 Merge branch 'main' of github.com:ccfos/nightingale 2025-03-12 12:23:42 +08:00
Ulric Qin
b4603dc012 code refactor 2025-03-12 12:23:28 +08:00
ning
9360433f96 change notify channel init 2025-03-11 21:02:06 +08:00
ning
3346a4aa29 update notify channel init 2025-03-11 19:29:29 +08:00
ning
7c7a560c55 add git commit -m 2025-03-11 18:18:39 +08:00
ning
88c5a7bbef add channel idents list api 2025-03-11 18:14:46 +08:00
ning
76654b64e7 refactor: add notify_channel_ident 2025-03-11 18:04:32 +08:00
ning
4648b16106 chore: ignore front/statik/statik.go 2025-03-11 16:56:34 +08:00
ning
0bec5b55c5 change user.ExtractToken 2025-03-11 16:49:24 +08:00
ning
3744e396c6 refactor: notify test api 2025-03-11 15:30:02 +08:00
ning
947365c5f3 refactor: change msg tpl 2025-03-11 15:27:52 +08:00
ning
71f8d6b1cb refactor: send http 2025-03-11 15:13:12 +08:00
ning
15a263f525 refactor: notify test api 2025-03-11 12:21:20 +08:00
ning
f3cc0e5b57 fix: load event from db 2025-03-11 11:30:19 +08:00
ning
6e15c88e26 refactor: change feishu tpl 2025-03-11 10:40:50 +08:00
710leo
ed37299118 refactor: send script 2025-03-11 00:19:03 +08:00
710leo
ec7c72d68c add jsonMarshal tpl func 2025-03-11 00:15:37 +08:00
710leo
20e986091b refactor: tpl init 2025-03-11 00:02:55 +08:00
710leo
f78e92f253 refactor: NotifyContact api 2025-03-10 23:46:58 +08:00
ning
94d6c3a075 fix: aliyun sms and voice send 2025-03-10 23:19:01 +08:00
ning
b830622cbf fix: notify rule get user group 2025-03-10 19:59:11 +08:00
ning
ba63f512c3 refactor: send script 2025-03-10 19:39:12 +08:00
ning
c3db7d0d51 refactor: migrate and alert_subscribe notify rule 2025-03-10 16:43:50 +08:00
ning
c0d0d48a83 fix: get notify rule server api 2025-03-10 13:16:40 +08:00
ning
e22103ff7f refactor: change msg tpl 2025-03-09 23:55:02 +08:00
ning
31362e41d5 refactor: notify test 2025-03-08 19:40:27 +08:00
Yening Qin
00b502579d feat: add Discord notifications (#2519)
Co-authored-by: smx_Morgan <86641888+smx-Morgan@users.noreply.github.com>
2025-03-08 19:13:05 +08:00
Yening Qin
52d032b6f5 refactor: optimize dingtalk html (#2522) 2025-03-08 19:09:10 +08:00
Yening Qin
9026736acb refactor: support dingtalk ats by phone (#2521) 2025-03-08 12:41:27 +08:00
NinaLua
8ceea820db chore: change some comments (#2517) 2025-03-07 19:05:35 +08:00
Yening Qin
0686ea4fe7 fix add self metric (#2515) 2025-03-07 14:38:37 +08:00
Yening Qin
d1ea3ed450 refactor: change notify tpl init (#2513) 2025-03-07 12:29:31 +08:00
Yening Qin
0c6558f92f change notify channel init (#2511) 2025-03-07 11:54:10 +08:00
Yening Qin
446da9b8cb refactor: add self metrics (#2509) 2025-03-07 10:43:10 +08:00
710leo
8612a53ded refactor: event notify 2025-03-05 21:13:07 +08:00
Yening Qin
52b7890eac refactor: update notify channel api (#2508) 2025-03-05 18:03:06 +08:00
Yening Qin
0166405069 add contact perm (#2506) 2025-03-05 11:28:43 +08:00
Yening Qin
863b2f6659 refactor: change some noitfy api (#2505) 2025-03-04 20:35:31 +08:00
ning
e39cdabd8d refactor notify test api 2025-03-04 19:01:17 +08:00
ning
a5b4b09619 add service api 2025-03-04 17:49:56 +08:00
ning
8690a28619 refactor: notify rule add api 2025-03-04 16:37:34 +08:00
ning
0142cc36e6 refactor: notify rule api perm 2025-03-04 15:42:38 +08:00
ning
1fbe0889f6 refactor: index_pattern query 2025-03-04 14:46:32 +08:00
Yening Qin
f384a9a235 alert rule support index pattern (#2491) 2025-03-04 14:32:27 +08:00
ning
2d21249856 update ops 2025-03-04 14:28:04 +08:00
ning
69e58f53f3 refactor: alert subscribe api 2025-03-04 14:09:02 +08:00
Yening Qin
ab41eb58fa refactor notify rule (#2501) 2025-03-03 20:50:46 +08:00
Yening Qin
7fd415d7f7 feat: support notify rule (#2500)
Co-authored-by: flashbo <36443248+lwb0214@users.noreply.github.com>
Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
2025-03-03 19:59:01 +08:00
smx_Morgan
f7401b7b40 refactor: read config disable_integration_init to decide skip init integration (#2492) 2025-02-26 19:22:49 +08:00
ning
ef0430052a refactor: ensure proper UTF-8 character boundaries when truncating script output 2025-02-25 12:10:13 +08:00
ning
ab49b13596 fix: add recording rule 2025-02-25 11:20:01 +08:00
Yening Qin
b727c36b2a refactor: async alert notification record (#2474) (#2482)
Co-authored-by: smx_Morgan <86641888+smx-Morgan@users.noreply.github.com>
2025-02-24 14:13:12 +08:00
smx_Morgan
154c44b63e feat: support recording script alert results (#2469) 2025-02-18 13:26:42 +08:00
ning
91a8afbf1c docs: update config.toml add token auth 2025-02-14 15:00:20 +08:00
Ulric Qin
a7207cf4e1 set ibex.Enable to true by default 2025-02-13 16:36:10 +08:00
Yening Qin
bd6d1cf88d feat: support push data to kafka (#2463)
Co-authored-by: flashbo <36443248+lwb0214@users.noreply.github.com>
2025-02-12 15:47:00 +08:00
ning
12382b3b0e refactor: change new user token 2025-02-11 15:30:25 +08:00
ning
4803fa628b refactor: add user token api 2025-02-11 14:54:27 +08:00
ning
992f62cbf5 refactor: add token last used time 2025-02-11 10:58:28 +08:00
ning
3cb6d65bd1 refactor: add token last used time 2025-02-10 20:41:45 +08:00
ning
a0ec09669f refactor: target delete api 2025-02-10 17:36:29 +08:00
ning
82855d9c68 refactor: optimize poster pkg 2025-02-08 18:30:39 +08:00
ning
56d3031a6e refactor: change alert rule api 2025-02-08 14:46:02 +08:00
ning
22e9c99e46 refactor: add log 2025-02-08 14:32:48 +08:00
ning
200117b8b2 refactor: add log 2025-02-08 14:21:33 +08:00
ulricqin
836caabee8 Update host_generic_categraf_all.json 2025-02-08 11:54:11 +08:00
ulricqin
65ddd8c724 Update host_generic_categraf_all.json 2025-02-08 11:46:02 +08:00
ning
bb7556c75a docs: add migrate sql 2025-02-07 17:11:11 +08:00
ning
b83f118f1b refactor: delete user token 2025-02-06 20:15:10 +08:00
ning
9e0f0581d6 refactor: user token sync 2025-02-06 16:40:14 +08:00
Yening Qin
250c737174 feat: api support token auth (#2453)
Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
2025-02-06 16:01:57 +08:00
Xu Bin
cdf8140e3c fix: alert check when var contains .* 2025-02-05 20:02:27 +08:00
flashbo
f8d7e84ca0 refactor: query target (#2424) 2025-02-05 19:40:58 +08:00
ulricqin
542a98e708 Update README_en.md 2025-01-25 22:07:18 +08:00
ulricqin
fb8ee0be72 Update README_en.md 2025-01-25 22:06:36 +08:00
ulricqin
a4e9349dfd Update LICENSE COPYRIGHT 2025-01-25 21:49:49 +08:00
ning
8df3ff0f03 update ops desc 2025-01-24 17:08:58 +08:00
ning
a5d38d63ca update ops 2025-01-24 17:01:16 +08:00
ning
9cf147faf1 Merge branch 'main' of github.com:ccfos/nightingale 2025-01-23 18:11:32 +08:00
ning
0dd3d0e29d fix: es query delay 2025-01-23 18:11:18 +08:00
VicLai
9e95ab951a Update README.md 2025-01-23 16:18:13 +08:00
ning
2482ef45fb docs: remove dash tpl 2025-01-23 15:39:55 +08:00
ning
d33f1f1bdb docs: update ops 2025-01-22 15:03:53 +08:00
Yening Qin
0a9439446f refactor: optimize ops (#2447)
Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
2025-01-21 20:50:30 +08:00
Yening Qin
8d4137c5bb feat: es support nodata alert (#2445) 2025-01-21 18:01:50 +08:00
ulricqin
caabbba251 更新 README.md 2025-01-21 09:17:58 +08:00
ulricqin
3d21a5c426 更新 README.md 2025-01-21 08:48:15 +08:00
ning
e928363e5d docs: rename es metrics cate 2025-01-20 18:09:25 +08:00
Ulric Qin
6879181f00 refactor linux dashboards 2025-01-17 16:39:23 +08:00
Ulric Qin
a8808c5262 update img url 2025-01-17 11:32:11 +08:00
Yening Qin
9253145aad refactor: optimize recv and push data (#2437) 2025-01-16 17:23:14 +08:00
ulricqin
1968e13da6 更新 README.md 2025-01-15 08:04:50 +08:00
ulricqin
88d075ba13 更新 README.md 2025-01-14 21:33:49 +08:00
Yening Qin
562da5a73f refactor: data recv and push to queue (#2427) 2025-01-14 12:08:03 +08:00
Yening Qin
9780e1ee8f refactor: change metric type filter (#2425) 2025-01-10 17:32:22 +08:00
Yening Qin
db050ec781 filter metric type (#2423) 2025-01-10 15:11:41 +08:00
ning
6a31521b62 fix: es group by int 2025-01-09 12:09:48 +08:00
ning
61512857a5 refactor: builtin_components 2025-01-08 20:48:37 +08:00
ning
cb56037ef8 refactor: change eval sleep 2025-01-08 19:40:04 +08:00
ning
2ebd64dfa0 Merge branch 'main' of github.com:ccfos/nightingale 2025-01-08 11:28:58 +08:00
ning
4d2ffdf096 refactor: change builtin component model 2025-01-08 11:24:50 +08:00
kongfei605
1915701ce0 chore: default interval for cloudwatch collecting (#2418) 2025-01-07 10:35:53 +08:00
kongfei605
7fd9cd5a3d chore: update tpl and readme for cloudwatch (#2417) 2025-01-06 19:13:45 +08:00
ning
0e2f386419 refactor: change tdengine 2025-01-06 17:29:58 +08:00
ning
b96b08fb9e refactor: change tdengine 2025-01-06 17:22:22 +08:00
ning
eebd1021de refactor: change tdengine 2025-01-06 17:04:46 +08:00
ning
ef61a4cfa7 refactor: change tdengine 2025-01-06 16:50:29 +08:00
ning
2563d2891d feat: add cron pattern validation for alert rule 2025-01-06 11:45:43 +08:00
ning
6ae8ef0d9f feat: add random delay before starting alert rule worker 2025-01-06 11:21:26 +08:00
Yening Qin
38adbefe9c feat: dashboard support add annotations (#2416) 2025-01-05 17:46:17 +08:00
Yening Qin
3f5e0c056d feat: support elasticsearch alert (#2400)
Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
2025-01-05 17:44:17 +08:00
flashbo
b0131a3799 feat: support setting builtin component to disabled (#2406) 2025-01-02 11:11:54 +08:00
Yening Qin
cbb03a7c63 refactor: optimize enum type for alert rule with var
Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
2024-12-28 22:18:39 +08:00
ning
080d412124 docs: add sql 2024-12-26 14:29:50 +08:00
ning
752e02f32d docs: add sql 2024-12-26 14:26:15 +08:00
CRISPpp
e05d59d72a refactor: update target update group part (#2388) 2024-12-25 15:35:19 +08:00
Yening Qin
854e30551a fix: docker compose of postgres init error (#2370) (#2392)
Co-authored-by: CRISPpp <78430796+CRISPpp@users.noreply.github.com>
2024-12-24 15:55:03 +08:00
Yening Qin
0b6dc5beba refactor get user from context (#2391)
Co-authored-by: flashbo <36443248+lwb0214@users.noreply.github.com>
2024-12-24 15:52:55 +08:00
ning
8685a95fa5 Merge branch 'main' of github.com:ccfos/nightingale 2024-12-24 15:40:22 +08:00
ning
7ca7fd8d66 refactor: event set AnnotationsJSON 2024-12-24 15:40:08 +08:00
Yening Qin
1b5dc81b6c fix: the dedup logic when adding tags to target (#2386)
Co-authored-by: flashbo <36443248+lwb0214@users.noreply.github.com>
2024-12-24 11:48:58 +08:00
Ulric Qin
04495f0892 set ignore_host to true 2024-12-20 18:10:48 +08:00
Yening Qin
8158ce1b90 refactor: global webhook add env proxy (#2375)
Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
2024-12-20 14:21:47 +08:00
Yening Qin
a43952e168 refactor: es_index_pattern add cross_cluster_enabled (#2372) 2024-12-19 14:12:27 +08:00
Yening Qin
5702fc81d0 refactor: group delete check (#2368)
Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
2024-12-18 17:00:18 +08:00
Xu Bin
7cc65a2ca7 refactor: add id for configsGetAll (#2361) 2024-12-16 20:38:53 +08:00
ning
7bb6c6541a chore: uodate gomod 2024-12-15 19:42:00 +08:00
ning
8b4cfe65e3 Merge branch 'main' of github.com:ccfos/nightingale 2024-12-13 10:56:27 +08:00
ning
7227de8c22 docs: update migrate.sql 2024-12-13 10:56:15 +08:00
CRISPpp
069e267af8 docs: update sqlite.sql (#2356) 2024-12-13 10:18:59 +08:00
ning
7c5c9a95c3 refactor: change sqlite driver 2024-12-12 21:32:51 +08:00
ning
e3da7f344b docs: update goreleaser.yaml 2024-12-12 21:12:57 +08:00
Yening Qin
dd741a177f docs: rename es integration 2024-12-12 19:27:36 +08:00
ning
4fdd25f020 docs: set HTTP.APIForService.Enable to false 2024-12-12 19:24:07 +08:00
Yening Qin
62350bfbc6 fix: alert rule with var (#2357) 2024-12-12 16:59:09 +08:00
CRISPpp
5ee1baaf07 feat: add config dir and config file check (#2350)
Co-authored-by: Yening Qin <710leo@gmail.com>
2024-12-12 13:24:11 +08:00
Xu Bin
fa12889f06 fix: alert rule check with var when not exact match (#2354) 2024-12-12 11:04:48 +08:00
Yening Qin
39306a5bf0 refactor: optimize webhook send (#2352) 2024-12-11 17:51:20 +08:00
ning
0aea38e564 refactor: write queue limit 2024-12-10 21:03:55 +08:00
CRISPpp
45e9253b2a feat: add global metric write rate control (#2347) 2024-12-10 20:43:02 +08:00
CRISPpp
9385ca9931 feat: add pre check for deleting busi_group (#2346) 2024-12-09 20:32:46 +08:00
ning
fdd3d14871 docs: change default db type to sqlite 2024-12-06 21:04:10 +08:00
Yening Qin
e890034c19 feat: auto init db (#2345)
Co-authored-by: CRISPpp <78430796+CRISPpp@users.noreply.github.com>
2024-12-06 20:32:17 +08:00
Yening Qin
3aaab9e6ad fix: event prom eval interval (#2343) 2024-12-06 20:24:49 +08:00
CRISPpp
7f7d707cfc fix: role_operation abnormal count (#2338) 2024-12-06 16:31:47 +08:00
Xu Bin
98402e9f8a fix: quotation mark for alert rule var (#2339) 2024-12-06 16:07:47 +08:00
Xu Bin
017094fd78 fix: var support for aggregate function (#2334) 2024-12-06 11:57:51 +08:00
Yening Qin
8b6b896362 feat: redis support miniredis type (#2337)
Co-authored-by: CRISPpp <78430796+CRISPpp@users.noreply.github.com>
2024-12-06 10:46:05 +08:00
ning
acaa00cfb6 refactor: migrate add more log 2024-12-05 17:55:27 +08:00
flashbo
87f3d8595d fix: targets filter logic (#2333) 2024-12-05 14:31:57 +08:00
flashbo
42791a374d feat: targets support sorting by time (#2331) 2024-12-05 14:20:30 +08:00
kongfei605
3855c25805 chore: update dashboards for mongodb (#2332) 2024-12-04 16:19:21 +08:00
Xu Bin
10ec0ccbd1 fix: alert rule cron eval (#2330) 2024-12-03 16:58:30 +08:00
flashbo
94cf304222 refactor: improve target bind bg logic (#2327) 2024-12-03 15:00:25 +08:00
ning
994de4635a docs: update n9e.sql 2024-12-02 20:18:32 +08:00
ning
9a0013a406 docs: update n9e.sql 2024-12-02 09:16:50 +08:00
CRISPpp
6dcd5dd01e docs: complete initialization n9e.sql (#2325) 2024-11-29 18:52:00 +08:00
ning
70126e3aec refactor: sync.map clear 2024-11-29 18:23:45 +08:00
ning
767482d358 refactor: optimize prom query 2024-11-28 15:58:31 +08:00
YangHgRi
9a46106cc0 refactor: specify parameter type in function to improve type safety and clarity (#2317) 2024-11-26 20:55:49 +08:00
Yening Qin
da9ea67cee feat: alert rule annotation support prom query template func (#2314)
Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
2024-11-22 16:58:00 +08:00
6666walnut
c13ecd780b fix: get busi-groups api err (#2313)
Co-authored-by: wangjing17 <wangjing17@foundersc.com>
2024-11-22 16:55:32 +08:00
ning
cab37c796a refactor: target_busi_group set utf8mb4_general_ci 2024-11-22 13:28:36 +08:00
ning
078578772b refactor: change builtin component logo type 2024-11-21 20:22:37 +08:00
Yening Qin
31883ec844 refactor: alert rule support cron (#2309)
Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
2024-11-21 13:14:19 +08:00
ning
6100cd084a refactor: event recovery notify 2024-11-21 11:00:30 +08:00
Yening Qin
b82e260d65 feat: alert rule support var (#2307)
Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
2024-11-20 20:34:33 +08:00
ning
3983386af3 fix: get notify-record api err 2024-11-20 20:02:38 +08:00
Ulric Qin
83f2054062 lock version of prom 2024-11-20 17:49:21 +08:00
Ulric Qin
83e0b3cb98 Merge branch 'main' of github.com:ccfos/nightingale 2024-11-20 17:23:29 +08:00
Ulric Qin
f6bfa17e2e update init sql 2024-11-20 17:23:17 +08:00
flashbo
3d8019b738 refactor: event notify record (#2296) 2024-11-19 20:25:17 +08:00
Ulric Qin
ee1be71be6 Merge branch 'main' of github.com:ccfos/nightingale 2024-11-19 19:47:14 +08:00
Ulric Qin
7f2fb459bb update mysql and redis dashboard 2024-11-19 19:47:00 +08:00
ning
fde6a9c75e refactor: change is_recovered type 2024-11-19 19:23:21 +08:00
Ulric Qin
a2b506e263 add input.redis in docker-compose 2024-11-19 17:28:56 +08:00
Ulric Qin
30024a4951 add redis dashboard 2024-11-19 17:26:33 +08:00
Ulric Qin
2c3996812a remove global labels: source=categraf 2024-11-19 17:24:10 +08:00
Ulric Qin
51d35900f2 add input.mysql in docker-compose/etc-categraf 2024-11-19 17:05:46 +08:00
Ulric Qin
852fd2ea6e add field is_recovered when call ibex 2024-11-19 16:49:28 +08:00
Ulric Qin
e1a57217ab update mysql dashboard 2024-11-19 11:28:22 +08:00
Ulric Qin
1e7dad1a67 Merge branch 'main' of github.com:ccfos/nightingale 2024-11-19 11:26:40 +08:00
Ulric Qin
534e40ad63 add mysql dashboard 2024-11-19 11:26:27 +08:00
CRISPpp
15daa3826c feat: add console log with n9e address and root username/passwd when init root (#2302) 2024-11-19 10:31:41 +08:00
ning
d5efb5b6d4 update go.mod 2024-11-19 10:29:39 +08:00
ning
7ebd776881 docs: update doris dashboard tpl 2024-11-18 20:03:29 +08:00
Ulric Qin
0e5cda1cee support proxy when call center 2024-11-18 16:04:15 +08:00
Ulric Qin
64dad19377 Merge branch 'main' of github.com:ccfos/nightingale 2024-11-18 16:01:12 +08:00
Ulric Qin
48f199f8f5 sender support ProxyFromEnvironment 2024-11-18 16:00:56 +08:00
Yening Qin
f7e4df7415 refactor: self monitor metric (#2285)
Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
2024-11-18 11:46:34 +08:00
ning
37fe01ab54 docs: update workflows 2024-11-15 17:45:17 +08:00
ning
cbfe661bce docs: update go version in mod 2024-11-15 17:35:54 +08:00
Yening Qin
890c12f0d4 feat: alert rule query add unit (#2299)
Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
2024-11-15 17:09:41 +08:00
Yening Qin
643c6c997c fix: proxy parse url (#2297) 2024-11-15 16:37:01 +08:00
robin
b201836b40 fix:create user info for notify_tpl (#2292) 2024-11-15 11:57:23 +08:00
robin
b5eced1540 docs: add doris template (#2260) 2024-11-14 22:50:45 +08:00
Yening Qin
a13004eab7 feat: allow override global webhook (#2257)
Co-authored-by: flashbo <36443248+lwb0214@users.noreply.github.com>
2024-11-14 22:35:23 +08:00
Yening Qin
a0c56548e5 refactor: migrate label (#2293) 2024-11-14 22:25:20 +08:00
ning
e3d97386a8 refactor: dash tpl uuid 2024-11-14 22:14:18 +08:00
ning
051b0ca045 Merge branch 'main' of github.com:ccfos/nightingale 2024-11-14 19:18:20 +08:00
ning
2941ced011 fix: import builtin board 2024-11-14 19:15:02 +08:00
Ulric Qin
97d6908edd fix mongodb dashboard 2024-11-14 18:28:52 +08:00
710leo
c7117b9461 fix: proxy api parse url 2024-11-13 23:00:49 +08:00
Yening Qin
78417b1d5b refactor: optimize rule datasource set (#2288)
Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
2024-11-13 20:28:50 +08:00
Yening Qin
79f3404810 refactor event notify (#2287) 2024-11-13 19:50:11 +08:00
ning
81e51c60eb refactor: subscribe add check 2024-11-12 20:26:56 +08:00
shardingHe
af9cd55ca5 docs: add metrics config for oracle (#2276)
Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2024-11-12 13:59:35 +08:00
710leo
d4afdb2b6e refactor: change log 2024-11-06 22:34:30 +08:00
flashbo
2befc8b0f1 refactor: migrate bg label (#2269) 2024-11-06 21:48:29 +08:00
Yening Qin
14fd2eb26d refactor: update tdengine query (#2270) 2024-11-06 20:27:21 +08:00
ning
0a938518d7 refactor: target_busi_group table name 2024-11-06 13:00:35 +08:00
ning
0eed5afa7e refactor: update target_busi_group character 2024-11-05 14:46:41 +08:00
Yening Qin
f82eaf0a1f refactor: optimize tdentine (#2262) 2024-11-04 17:33:18 +08:00
ning
f03278d68d refactor: append tags 2024-11-04 16:43:39 +08:00
shardingHe
7d1e143f60 docs: sync configurations for bind & ldap (#2253)
Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2024-11-02 16:49:49 +08:00
ning
078a0c7b1c refactor: prom query log 2024-11-01 15:28:23 +08:00
flashbo
d9cac65a18 refactor: improve prom_rule import (#2251) 2024-10-30 14:28:00 +08:00
ning
dd025ca87c refactor: migrate db and host_miss tag append 2024-10-30 14:20:16 +08:00
ning
04734b8940 Merge branch 'main' of github.com:ccfos/nightingale 2024-10-29 12:09:50 +08:00
ning
bf7bcf4196 docs: update notify tpl 2024-10-29 12:09:26 +08:00
ulricqin
16195abb89 Update docker-compose.yaml 2024-10-29 12:08:40 +08:00
ning
3f4891d65d refactor: event queue push 2024-10-28 20:51:21 +08:00
Yening Qin
102549c6a1 refactor: webhook send event (#2248)
Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
2024-10-28 20:33:29 +08:00
Yening Qin
5213b1d7f1 refactor: es update config (#2247)
Co-authored-by: flashbo <36443248+lwb0214@users.noreply.github.com>
2024-10-28 20:32:45 +08:00
Yening Qin
24de97fb1e refactor: update default engine name (#2245) 2024-10-28 15:50:52 +08:00
ning
9c2cf679e0 refactor: center set default engine_name 2024-10-28 13:37:55 +08:00
Yening Qin
2aa4941010 refactor: optimize recover notify(#2242)
Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
2024-10-25 16:53:44 +08:00
flashbo
a812f14442 refactor: record notify for callback (#2231) 2024-10-25 16:50:12 +08:00
flashbo
4fb7e8e2b5 refactor: fill group names in target (#2241) 2024-10-25 16:30:09 +08:00
ulricqin
113ad67104 Update README.md 2024-10-25 12:10:28 +08:00
flashbo
49d843540a refactor: add ExtraInfoMap in alert event (#2240) 2024-10-25 11:03:56 +08:00
Yening Qin
21f0e3310f fix: event relabel when target_label is blank (#2228)
Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
2024-10-24 14:09:41 +08:00
ulricqin
31b3434e87 Update README.md 2024-10-22 14:19:33 +08:00
ning
2576a0f815 fix: edge get all configs 2024-10-21 19:30:13 +08:00
ning
0ac4bc7421 docs: update linux dashboard tpl 2024-10-21 18:07:52 +08:00
ning
95e6ea98f4 refactor: prom client query api add retry 2024-10-21 17:57:31 +08:00
ning
dc60c74c0d docs: update automq dashboard tpl 2024-10-21 16:50:36 +08:00
ning
a15adc196d docs: update linux dashboard tpl 2024-10-21 16:35:53 +08:00
ning
f89ef04e85 refactor: optimize code robustness 2024-10-21 14:54:48 +08:00
Yening Qin
f55cd9b32e feat: config access log in web (#2227) 2024-10-21 12:11:19 +08:00
Xu Bin
305a898f8b feat: alert recover ckeck (#2226) 2024-10-21 12:07:54 +08:00
Yening Qin
60c31d8eb2 feat: support query set opration (#2225) 2024-10-20 21:18:12 +08:00
ning
7da49a8c68 refactor: update go.mod 2024-10-20 14:04:31 +08:00
flashbo
65b1410b09 refactor: support output logs to one file (#2209) 2024-10-20 14:02:44 +08:00
ning
3901671c0e docs: update n9e.sql 2024-10-18 15:24:33 +08:00
Xu Bin
9c02937e81 refactor: alert mute retain (#2223) 2024-10-18 12:08:31 +08:00
flashbo
0a255ee33a fix: unbind bgids when delete target (#2219) 2024-10-16 10:00:08 +08:00
Xu Bin
8dc198b4b1 fix: smtp update (#2213) 2024-10-12 11:37:14 +08:00
Yening Qin
9696f63a71 rename tpl name 2024-10-11 16:23:57 +08:00
Xu Bin
03f56f73b4 feat: ldap support multi basecn (#2198) 2024-10-08 16:06:21 +08:00
Ulric Qin
7b415c91af update qrcode 2024-10-08 15:40:34 +08:00
flashbo
2abf089444 feat: rule list add user nickname (#2201) 2024-10-08 15:25:25 +08:00
mt
e504dab359 fix: update router_alert_cur_event.go (#2210) 2024-10-03 00:27:31 +08:00
710leo
989ed62e8d refactor: update GetAnomalyPoint 2024-09-29 19:34:25 +08:00
nl594
b7197d10eb docs: add new ipmi dashboard (#2204)
* add new ipmi dashboard

* Update IPMI_by_prometheus.json

---------

Co-authored-by: niulong <niulong@xylink.com>
Co-authored-by: Yening Qin <710leo@gmail.com>
2024-09-29 13:24:56 +08:00
Xu Bin
f4de256388 refactor: target delete hook (#2202) 2024-09-27 15:43:57 +08:00
Xu Bin
3f5126923f feat: get build payload by UUID (#2203) 2024-09-27 15:43:18 +08:00
flashbo
5d3e70bc4c refactor: datasouce support force save (#2200) 2024-09-27 14:40:48 +08:00
710leo
bb2c5202ad Merge branch 'main' of github.com:ccfos/nightingale 2024-09-27 14:26:48 +08:00
710leo
3acf3d7bf9 refactor: migrate target bg 2024-09-27 14:26:35 +08:00
shardingHe
a79810b15d add deployment & statefulset dashboard (#2196)
Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2024-09-26 17:38:47 +08:00
710leo
f61cb532f8 Merge branch 'main' of github.com:ccfos/nightingale 2024-09-26 15:49:28 +08:00
710leo
34a5a752f4 refactor: update aconf check 2024-09-26 15:49:14 +08:00
Ulric Qin
9be3deeebd update wechat qrcode 2024-09-26 10:14:16 +08:00
710leo
2ceed84120 fix: host alert host filter by busigroup 2024-09-25 15:07:54 +08:00
710leo
8fbe257090 docs: update i18n 2024-09-24 16:27:51 +08:00
710leo
ae35d780c6 refactor: update busigroup del api 2024-09-24 15:49:14 +08:00
710leo
4d2cdfce53 optimize target fill group 2024-09-24 15:29:51 +08:00
710leo
a0e4d0d46e refactor: target bind api 2024-09-24 15:20:21 +08:00
710leo
dd07d04e2f refactor: update target api 2024-09-24 14:37:27 +08:00
710leo
61203e8b75 feat: add boards api 2024-09-24 10:27:43 +08:00
710leo
f24bc53c94 refactor: update target bind group api 2024-09-23 13:13:09 +08:00
710leo
ef6abe3fdc refactor: update target bind api 2024-09-22 23:00:32 +08:00
710leo
461361d3d0 fix: heartbeat api auth check for n9e-edge 2024-09-22 21:05:31 +08:00
710leo
52b3afbd97 fix: recovery event tags map split 2024-09-22 19:14:24 +08:00
710leo
652439bb85 Merge branch 'main' of github.com:ccfos/nightingale 2024-09-22 00:33:40 +08:00
710leo
6f0c13d4e7 fix: edge target cache 2024-09-22 00:33:28 +08:00
ulricqin
c9f46bad02 Remove duplicate fields UseTLS 2024-09-21 20:44:02 +08:00
710leo
75146f3626 docs: add target_busi_group sql 2024-09-20 18:14:15 +08:00
710leo
50aafbd73d refactor: update target query 2024-09-20 18:09:07 +08:00
710leo
b975cb3c9d refactor: update append_labels 2024-09-20 16:24:47 +08:00
flashbo
11deb4ba26 feat: host bind muti group (#2185) 2024-09-19 20:32:08 +08:00
flashbo
ec927297d6 feat:support query alert event by rule id (#2179) 2024-09-19 11:04:14 +08:00
Yening Qin
f476d7cd63 fix: incorrect content in feishucard when sending a large number of messages (#2180) 2024-09-18 18:00:13 +08:00
ulricqin
410f3bbceb Update README.md wechat qrcode 2024-09-18 08:13:42 +08:00
cui fliter
2ad53d6862 refactor: make uids in NotifyTarget (#2169) 2024-09-13 19:26:18 +08:00
710leo
fc392e4af1 docs: update linux metrics tpl 2024-09-13 19:10:33 +08:00
fangpsh
9c83c7881a docs: update oom_kill alert rule tpl (#2170)
Co-authored-by: fangpsh <fangpsh@zego.im>
2024-09-13 19:07:08 +08:00
flashbo
f1259d1dff refactor: alert rule callback url dedup (#2165) 2024-09-13 16:24:04 +08:00
Yening Qin
d9d59b3205 fix: recording rule update (#2168) 2024-09-13 16:20:48 +08:00
Ulric Qin
d11cfb0278 Merge branch 'main' of github.com:ccfos/nightingale 2024-09-09 11:49:37 +08:00
Ulric Qin
5adcfc6eaa update README 2024-09-09 11:49:26 +08:00
710leo
037152ad72 refactor: update alert-cur-event api 2024-09-03 18:17:28 +08:00
Ulric Qin
2de304d4f2 move sqlite.sql to docker dir 2024-09-03 17:51:35 +08:00
Ulric Qin
03c56d048f modify column trigger_value to text 2024-09-03 17:50:13 +08:00
Ulric Qin
1cddb4eca0 Merge branch 'main' of github.com:ccfos/nightingale 2024-09-03 17:46:57 +08:00
Ulric Qin
2dc033944d bugfix InitBuiltinPayloads 2024-09-03 17:46:43 +08:00
flashbo
63e6c78e71 feat: targets support multi idents query (#2119) 2024-09-03 15:32:05 +08:00
Ulric Qin
e1f04eebe7 update README 2024-08-30 17:58:49 +08:00
Yening Qin
ce17e09f66 feat: notify event add target info (#2137)
* fix: tpl center update (#2125)
* put target into alert cur event (#2128)

---------

Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
Co-authored-by: flashbo <36443248+lwb0214@users.noreply.github.com>
2024-08-30 16:28:31 +08:00
710leo
c98c1d3b90 docs: update sql 2024-08-30 16:19:50 +08:00
710leo
ae3218e6d5 fix: target query api 2024-08-30 11:27:16 +08:00
Yening Qin
7497cc0f28 refactor: update alert rule clone api (#2126) 2024-08-28 20:15:25 +08:00
710leo
96c4cc7c98 refactor: import prom rule api 2024-08-28 15:54:39 +08:00
710leo
1f7314f6b4 refactor: sub rule event enable run notify script 2024-08-27 15:48:09 +08:00
yangkaa
86d478a0d4 fix: update notify template failed (#2117)
Signed-off-by: yangk <yangk@goodrain.com>
2024-08-27 11:26:05 +08:00
710leo
b45023630f merge main 2024-08-27 11:14:58 +08:00
710leo
2177049487 refactor: update target tag api 2024-08-27 11:14:20 +08:00
shardingHe
d3d1e7019f docs: update jvm dashboards (SpringBoot Actuator) (#2121) 2024-08-27 10:46:02 +08:00
710leo
f2ad0b9594 refactor: update targets api 2024-08-26 20:34:04 +08:00
Yening Qin
9c79233b3c feat: target add host tags (#2120)
* update target tags (#2091)

---------

Co-authored-by: flashbo <36443248+lwb0214@users.noreply.github.com>
2024-08-26 19:41:38 +08:00
710leo
9ea5de1257 refactor: users get api 2024-08-26 11:56:57 +08:00
ning
3ec97665ac refactor: set e.FirstTriggerTime 2024-08-22 20:18:19 +08:00
ning
bb4eeca2ab code refactor 2024-08-22 15:38:47 +08:00
ning
cc6a5be27f Merge branch 'main' of github.com:ccfos/nightingale 2024-08-22 15:02:32 +08:00
ning
630df8a954 fix: recover event when prom_for_duration is 0 2024-08-22 15:02:26 +08:00
ning
e28ab6368b fix: recover event when prom_for_duration is 0 2024-08-22 14:30:30 +08:00
ulricqin
751c78be4b Update a-n9e.sql 2024-08-22 10:01:04 +08:00
Xu Bin
5311bf90d5 feat: trigger set support operation (#2107) 2024-08-21 20:16:31 +08:00
ning
c464689c6a Merge branch 'main' of github.com:ccfos/nightingale 2024-08-21 19:56:10 +08:00
ning
442426be38 fix: event first trigger time 2024-08-21 19:55:56 +08:00
Yening Qin
9a28139d43 refactor: hostmeta add config (#2112) 2024-08-20 15:26:13 +08:00
710leo
25b768188f refactor: matchTag add strings.TrimSpace 2024-08-19 21:46:22 +08:00
ning
b794b62960 refactor: RecoverAlertCurEventFromDb 2024-08-19 17:33:40 +08:00
flashbo
d7e00a5a49 refactor: import promethues alert rule (improve api) (#2104) 2024-08-16 11:23:53 +08:00
Xu Bin
19e6cfe7d2 feat: generic are supported in alert rule calculation formulas (#2097) 2024-08-15 17:26:24 +08:00
shardingHe
63baa7b6f3 docs: remove process-exporter alerts & dashboards (#2102)
Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2024-08-15 16:57:41 +08:00
Vicla
407fc90677 update readme_en (#2099) 2024-08-14 12:20:11 +08:00
Dan218
7da4c99d92 feat: Provide Target.AfterFind to automatically calculated fields for Target (#2073) 2024-08-13 19:16:40 +08:00
Xu Bin
6b46e7e83f feat: support clone rules by idents (#2095) 2024-08-13 19:12:31 +08:00
ning
514ccd5f90 Merge branch 'main' of github.com:ccfos/nightingale 2024-08-13 15:11:52 +08:00
ning
4565b80717 fix: edge delete event 2024-08-13 15:11:40 +08:00
Ulric Qin
2bac6588c4 Merge branch 'main' of github.com:ccfos/nightingale 2024-08-13 11:25:42 +08:00
Ulric Qin
fc293cb01c use node_uname_info as dashboard var filter 2024-08-13 11:25:30 +08:00
ning
73f9548242 Merge branch 'main' of github.com:ccfos/nightingale 2024-08-13 10:59:39 +08:00
ning
7c91e51c08 fix: edge record notify 2024-08-13 10:59:17 +08:00
qinguoyi
a4867c406d fix:get configcache return nil need exit (#2094) 2024-08-12 19:52:22 +08:00
qinguoyi
bfea83ae75 fix: alert_cur_event return unmarshal json err (#2090) 2024-08-12 13:06:58 +08:00
Yening Qin
7a2832c377 fix: process recover duration (#2092) 2024-08-12 13:04:08 +08:00
ning
3f6c54a712 refactor: subscribe not run ibex and script 2024-08-09 17:42:48 +08:00
Yening Qin
1bb590ce6d feat: support record event notify detail (#2088)
* feat: record alert notification (#2045)

* record notification

---------

Co-authored-by: wenbo <bupt.lwb@gmail.com>
Co-authored-by: wenbo <1027758873@qq.com>
2024-08-09 17:06:49 +08:00
ning
656326458f refactor: event add task tpl name 2024-08-08 22:41:51 +08:00
Yening Qin
c6ab3ad2b3 feat: alert rule support import promethues alert rule (#2080) (#2085)
Co-authored-by: flashbo <36443248+lwb0214@users.noreply.github.com>
2024-08-07 16:05:10 +08:00
Yening Qin
d050cf72e9 Update elasticsearch_by_categraf.json 2024-08-06 16:42:49 +08:00
ning
084cc1893e docs: update migrate sql 2024-08-06 14:54:40 +08:00
Yening Qin
cd01123b59 feat: alert rule support add task tpl (#2079) 2024-08-05 17:54:23 +08:00
ning
23ce84d41c refactor: optimize event relabel process 2024-08-05 11:43:21 +08:00
Yening Qin
4764cc2419 feat: alert rule batch update support annotations (#2074)
* feat: batch update annotation (#2072)

* fix: annotations_del

---------

Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
2024-08-02 13:05:11 +08:00
ning
da66401576 docs: update tpl doc 2024-08-02 11:17:53 +08:00
ning
0024c9d99c docs: add migrate sql 2024-08-01 18:56:16 +08:00
flashbo
96d3b48f10 feat: target add os type (#2071) 2024-08-01 17:08:59 +08:00
Yening Qin
6a0e7a810f refactor: webhook notify support batch send events (#2070) 2024-08-01 15:25:39 +08:00
Yening Qin
5b2513b7a1 feat: support lark and larkcard notify channel (#2061)
* feat: support lark notify channel (#2056)

Co-authored-by: flashbo <36443248+lwb0214@users.noreply.github.com>
Co-authored-by: wenbo <1027758873@qq.com>
2024-07-27 21:21:43 +08:00
ning
7cec16eaf0 update center router init 2024-07-26 14:59:40 +08:00
ning
17dbb3ec77 code refactor 2024-07-25 12:06:10 +08:00
ning
00822c8404 refactor: add ibex enable check 2024-07-25 11:39:43 +08:00
ning
55de30d6c7 refactor: update mute rule api 2024-07-24 11:37:38 +08:00
Yening Qin
8b7dbed27e refactor: modify heartbeat api (#2051) 2024-07-24 11:23:56 +08:00
Dan218
71b8fa27d0 feat: Provide optional style for buildTargetWhere (#2038) 2024-07-24 11:12:17 +08:00
ning
31174d719e refactor: event relabel 2024-07-22 11:45:17 +08:00
ning
5b5bb22ffd fix: event relable process tagsmap 2024-07-22 10:46:29 +08:00
ning
e98fe9ea2e refactor: HandleTSFunc 2024-07-21 15:28:06 +08:00
ning
32e9ded393 refactor: server-clusters api perm 2024-07-21 11:04:35 +08:00
ning
8293ca20be refactor: assets file support md 2024-07-18 15:07:47 +08:00
Yening Qin
6c4ddfc349 refactor: update languageDetector (#2043) 2024-07-18 14:13:48 +08:00
ning
cd0c478515 refactor: event relabel add default value 2024-07-17 22:48:50 +08:00
Yening Qin
2cd25ac0e5 fix: optimize event recovery inhibit (#2042) 2024-07-17 22:30:31 +08:00
ning
bb99ba3d1c update sql 2024-07-17 11:57:20 +08:00
Yening Qin
64405dca5d feat: alert event support relabel (#2041) 2024-07-17 10:30:29 +08:00
ulricqin
69ea9ca8f8 Update README.md 2024-07-17 09:39:00 +08:00
ulricqin
41d0f2fcda Update README.md 2024-07-17 09:36:30 +08:00
710leo
93df1c0fbc docs: add perm point 2024-07-16 23:44:30 +08:00
flashbo
86e952788d refactor: targets get api support backend sorting (#2034)
Co-authored-by: wenbo <bupt.lwb@gmail.com>
2024-07-16 23:38:04 +08:00
ning
e890f2616f refactor: change webhook sleep time 2024-07-13 14:38:32 +08:00
yanli
6c2ee584e5 refactor: MetricDesc defaults to Chinese (#2032) 2024-07-12 21:50:51 +08:00
Dan218
5f07fc3010 Feat: Add skip Verify Insecure ssl/tls in sendWebhook (#2030) 2024-07-12 10:38:33 +08:00
ning
20fa310ba9 refactor: sync team to duty 2024-07-08 17:54:59 +08:00
ning
0e3b08be9a feat: ldap support defaultTeams 2024-07-08 17:35:39 +08:00
ning
b7d971d7c8 refactor: add alert rule pure api 2024-07-08 17:10:31 +08:00
ning
4373ae7f0b code refactor 2024-07-05 10:40:27 +08:00
dependabot[bot]
053325a691 build(deps): bump golang.org/x/image from 0.13.0 to 0.18.0 (#2017) 2024-07-04 17:56:36 +08:00
ning
c54267aa3a refactor: webhook support retry 2024-07-04 17:49:16 +08:00
ning
74dc430886 add migrate sql 2024-07-04 15:45:22 +08:00
Yening Qin
dc79ee4687 feat: recording rule support cron pattern (#2025) 2024-07-04 11:23:48 +08:00
shardingHe
e154c946e6 docs: add dashboard for aliyun-mysql (#2020)
* add dashboard for aliyun-mysql

* Update mysql.json

---------

Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
Co-authored-by: ulricqin <ulricqin@qq.com>
2024-07-03 11:41:50 +08:00
ning
08bfc0b388 refactor: add callbak log 2024-07-01 15:15:19 +08:00
ning
5338270aef feat: encrypt pass 2024-06-27 17:07:19 +08:00
Ulric Qin
00550ba2c7 add redis dashboard 2024-06-27 10:55:23 +08:00
Ulric Qin
c58bec23bf login fail count 2024-06-26 17:04:22 +08:00
ning
a5b77be0ab fix: recovered event id 2024-06-26 11:57:27 +08:00
Yening Qin
f529681c35 fix: embedded-dashboards api perm (#2012) 2024-06-25 18:13:13 +08:00
Ulric Qin
e3042dd6d5 Merge branch 'main' of github.com:ccfos/nightingale 2024-06-25 17:39:48 +08:00
Ulric Qin
1ebab4fcb0 add mysql dashboard 2024-06-25 17:39:15 +08:00
ning
ccf38b6da7 docs: update clickhouse integrations 2024-06-25 16:48:12 +08:00
Ulric Qin
9a0a687727 Merge branch 'main' of github.com:ccfos/nightingale 2024-06-25 16:42:59 +08:00
Ulric Qin
d00510978d add mysql dashboard 2024-06-25 16:42:40 +08:00
ning
9b478d98fd refactor: checkout heartbeat global label 2024-06-25 15:09:29 +08:00
ning
4845ca5bdb refactor: update compose sql 2024-06-22 00:33:45 +08:00
Yening Qin
a844d2b091 fix: use postgresql (#2008) 2024-06-21 18:13:58 +08:00
Ulric Qin
69ca7f3b93 validate heartbeat hostname 2024-06-21 17:51:44 +08:00
Ulric Qin
b9c6c33ceb refactor extractIdentFromTimeSeries 2024-06-21 17:43:43 +08:00
Ulric Qin
5099d3c040 add ignore_host querystring 2024-06-21 12:36:45 +08:00
Ulric Qin
e34f8ac701 Merge branch 'main' of github.com:ccfos/nightingale 2024-06-21 12:25:08 +08:00
Ulric Qin
ab82a6f910 modify ignore_ident logic 2024-06-21 12:24:54 +08:00
小炒肉
57f8bd3612 feat: callbackURL Parse Template (#2001)
Co-authored-by: zhihuanzhu <zhihuanzhu@deeproute.ai>
2024-06-19 14:46:06 +08:00
ning
8ab96e2cea refactor: add more mute log 2024-06-17 19:17:51 +08:00
ning
0a2e23c285 refactor: update users api 2024-06-17 17:09:33 +08:00
小炒肉
5c1d4077e2 fix: feishucard ats (#1997)
Co-authored-by: zhihuanzhu <zhihuanzhu@deeproute.ai>
2024-06-17 15:14:27 +08:00
Zoe
2a46d9f98e docs: add clickhouse alerts,dashboards,metrics (#1994) 2024-06-17 11:28:17 +08:00
Ulric Qin
ce5c213593 Merge branch 'main' of github.com:ccfos/nightingale 2024-06-14 19:03:01 +08:00
ning
771a8d121b refactor: change recovery event last_eval_time 2024-06-14 16:13:44 +08:00
Ulric Qin
af88b0e283 Merge branch 'main' of github.com:ccfos/nightingale 2024-06-14 15:27:24 +08:00
Ulric Qin
8e5d7f2a5b update dingtalk tpl 2024-06-14 15:27:06 +08:00
Yening Qin
1a22211a5d feat: oidc support default team (#1995) 2024-06-14 12:00:21 +08:00
Yening Qin
0a0049c6fb feat: callback support send event to im and remove alert subrule callback resend (#1992)
* feat: Callback operation adds IM connection function (#1984)

* refactor: change alert sub callback

---------

Co-authored-by: Yang Zhiyan <101268302+Yziyan@users.noreply.github.com>
2024-06-14 00:38:02 +08:00
Ulric Qin
1b56ebe62e Merge branch 'main' of github.com:ccfos/nightingale 2024-06-13 15:54:39 +08:00
Ulric Qin
a5e92b95b0 add link in github issue template 2024-06-13 15:54:24 +08:00
ulricqin
8e9d06d43e Update README.md 2024-06-13 15:03:04 +08:00
Ulric Qin
ab289de785 update github issue template 2024-06-13 12:32:47 +08:00
Ulric Qin
8667b7743a Merge branch 'main' of github.com:ccfos/nightingale 2024-06-13 12:21:10 +08:00
Ulric Qin
45b9436f69 update github issue template 2024-06-13 12:20:56 +08:00
ning
3d03bcf329 docs: add perm point 2024-06-11 19:10:04 +08:00
ning
1851601889 refactor: get usergroups service api 2024-06-07 20:07:13 +08:00
ning
fa9745decf refactor: update event api 2024-06-06 17:55:08 +08:00
ning
6f007deeaa refactor: change get list api 2024-06-06 16:42:17 +08:00
ning
8fad705065 fix: edge alert use ibex 2024-06-06 16:07:18 +08:00
ning
675076779e refactor: ibex migrate add charset 2024-06-06 12:13:22 +08:00
710leo
b9e78eee22 docs: change action 2024-06-05 22:26:26 +08:00
710leo
2219584abb docs: change action 2024-06-05 22:16:42 +08:00
710leo
ebe31fd6bc docs: change action 2024-06-05 22:12:12 +08:00
nīng
95ca69e170 docs: change action 2024-06-05 22:04:56 +08:00
nīng
ef1b5d8d16 docs: change action 2024-06-05 21:51:23 +08:00
ning
5b375cf037 docs: change action 2024-06-05 19:42:39 +08:00
ning
108b729cae Merge branch 'main' of github.com:ccfos/nightingale 2024-06-05 18:11:44 +08:00
ning
a385972fa9 refactor: add some i18n 2024-06-05 18:11:31 +08:00
yuweizzz
98a0a9d94c feat: support sqlite (#1978)
* demo sqlite
2024-06-05 17:28:56 +08:00
ning
c79eec648d fix: n9e-edge ibex 2024-06-05 17:12:52 +08:00
Yening Qin
603eadd1f2 feat: alert event support recovery value (#1982)
* feature: the alert response event supports query recovery values (#1975)

* refactor: rule note use

---------

Co-authored-by: Yang Zhiyan <101268302+Yziyan@users.noreply.github.com>
2024-06-05 17:01:31 +08:00
Yening Qin
61a2f552be refactor: integration init (#1981) 2024-06-05 15:14:01 +08:00
ning
e3453328a7 refactor: integration init 2024-06-03 11:54:29 +08:00
ning
4424a6b89c refactor: get event list api 2024-06-03 11:12:32 +08:00
ning
9fdb2f0753 refactor: get event list api 2024-06-03 10:53:30 +08:00
ning
3d358e367f refactor: get event list api 2024-06-03 10:47:36 +08:00
Ulric Qin
5264874628 Update automq metrics 2024-06-03 10:43:24 +08:00
Ulric Qin
e0a3ff248c update Linux integration's markdown 2024-06-03 10:24:02 +08:00
Ulric Qin
1fecf78ede update Linux alerting rules 2024-06-03 09:46:33 +08:00
Ulric Qin
839b45904b Merge branch 'main' of github.com:ccfos/nightingale 2024-06-03 09:23:08 +08:00
Ulric Qin
cd0f43f808 add Automq alerts 2024-06-03 09:22:41 +08:00
ning
8047f3deee refactor: get event api 2024-05-31 19:08:28 +08:00
ning
f209ed5bee refactor: get event api 2024-05-31 17:20:27 +08:00
Ulric Qin
8c61d8c14d Update AutoMQ dashboards 2024-05-31 15:47:43 +08:00
Ulric Qin
f7372b1c3b update AutomMQ markdown 2024-05-31 14:13:35 +08:00
Ulric Qin
a39ced86aa add markdown for automq 2024-05-31 12:15:51 +08:00
Ulric Qin
f365b7db2a Merge branch 'main' of github.com:ccfos/nightingale 2024-05-31 11:41:44 +08:00
Ulric Qin
7eaec13b6c add metrics for AutoMQ 2024-05-31 11:41:30 +08:00
ulricqin
2e824a165e Update README.md 2024-05-31 10:28:34 +08:00
ulricqin
f2909b6029 Update README.md 2024-05-31 10:27:56 +08:00
Ulric Qin
a543a5ad09 update automq dashboard: cluster_overview.json 2024-05-30 21:07:22 +08:00
Ulric Qin
2ee34bf1f9 Merge branch 'main' of github.com:ccfos/nightingale 2024-05-30 21:01:34 +08:00
Ulric Qin
4623622dd0 add Detailed metrics dashboard for Automq 2024-05-30 21:01:20 +08:00
ning
4f259137e5 Merge branch 'main' of github.com:ccfos/nightingale 2024-05-30 20:30:10 +08:00
ning
75f1e8a80b refactor: event list api 2024-05-30 20:29:56 +08:00
Ulric Qin
3648d8dc45 add Automq dashboards 2024-05-30 20:28:34 +08:00
Ulric Qin
8c90d7ab33 Merge branch 'main' of github.com:ccfos/nightingale 2024-05-30 19:59:03 +08:00
Ulric Qin
c6ac3fb959 add AutoMQ Dashboards 2024-05-30 19:58:48 +08:00
ning
ce854b3166 docs: change some sql 2024-05-30 17:54:37 +08:00
ning
a2be5230fa docs: change some sql 2024-05-30 17:53:45 +08:00
ning
21276a77b6 docs: change some sql 2024-05-30 17:52:11 +08:00
Yening Qin
cffd012ec6 feat: user add last_avtive_time (#1974) 2024-05-30 17:44:32 +08:00
ning
a9ebdad1cd docs: change sql 2024-05-30 17:36:33 +08:00
ning
785c577728 docs: change sql 2024-05-30 17:36:17 +08:00
ning
0e2a66570e fix: edge host miss alert 2024-05-30 16:57:04 +08:00
Ulric Qin
76583a6227 add automq icon 2024-05-30 16:15:12 +08:00
Yening Qin
48e0e1a9f8 feat: add integration tpl center (#1973) 2024-05-30 15:42:09 +08:00
Yang Zhiyan
17bb7fa468 feat: support event list view only by business group (#1969) 2024-05-30 15:33:43 +08:00
ulricqin
fc2638680a Update oracle_alert.json 2024-05-30 07:24:36 +08:00
ulricqin
e01a899ae1 Update README.md 2024-05-30 07:18:22 +08:00
ning
07c1ef6bd4 docs: add some sql 2024-05-28 15:37:04 +08:00
ning
bfa7059098 docs: add some sql 2024-05-28 15:31:23 +08:00
laiwei
096a2d3675 add nvidia gpu metrics dashboard 2024-05-24 14:02:14 +08:00
ning
2232733922 fix: delete target service api 2024-05-23 17:03:52 +08:00
ning
b15f638688 refactor: code format 2024-05-23 16:43:25 +08:00
ning
4f818e3642 Merge branch 'main' of github.com:ccfos/nightingale 2024-05-23 11:06:50 +08:00
ning
638c62da2f refactor: automatically generate jwt signing key 2024-05-23 11:06:33 +08:00
shardingHe
e1a9c995c2 docs: merge the metric data from metric.toml into oracle.toml (#1962)
Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2024-05-23 11:01:58 +08:00
Yang Zhiyan
1898675075 optimize: Optimize targets-related interfaces (#1961) 2024-05-23 10:27:09 +08:00
Resurgence72
ce7f0272d8 对 prometheus 2.50.0 版本引入的 NewPossibleNonCounterInfo warnings 做适配 (#1939) 2024-05-21 17:29:33 +08:00
赵尚
93159f07fd refactor: change the task time limit from 1 day to 5 days. (#1959) 2024-05-21 15:32:45 +08:00
Yening Qin
7d410baa2d refactor: recovery event support inhibit (#1958) 2024-05-20 20:35:35 +08:00
Ulric Qin
20b30c3e2c update ping metrics 2024-05-20 10:26:18 +08:00
Ulric Qin
8805bf6598 fix typo of logout router 2024-05-20 10:23:08 +08:00
Ulric Qin
fe6a64dae8 Merge branch 'main' of github.com:ccfos/nightingale 2024-05-20 10:22:39 +08:00
Ulric Qin
2c564a2c58 add cdn metrics doc 2024-05-20 10:22:16 +08:00
ulricqin
ae3c13224d Update host_generic_categraf.json 2024-05-17 10:52:22 +08:00
ulricqin
9a4015f13f Update host_generic_categraf.json 2024-05-16 17:46:48 +08:00
Yening Qin
274ca09551 Update switch branch.json 2024-05-16 13:44:08 +08:00
ulricqin
3d9b4fc14e Update categraf-procstat.json 2024-05-16 11:38:53 +08:00
ning
07436a5e0d refactor: change event order 2024-05-16 11:15:18 +08:00
ning
f7b2f1acb9 refactor: change event order 2024-05-16 11:08:35 +08:00
ning
4f4287030a docs: update snmp board tpl 2024-05-15 22:19:14 +08:00
ning
e25e712c48 refactor: change boards clone api 2024-05-15 22:16:16 +08:00
ning
66951d7e77 refactor: change boards clone api 2024-05-14 14:24:32 +08:00
ulricqin
f5ff27cd18 Create host_generic_categraf.json 2024-05-13 18:03:54 +08:00
ning
9e3f6e6285 refactor: add create user verify 2024-05-13 17:25:08 +08:00
ning
48e3df2cb4 refactor: new ldap conn 2024-05-13 17:07:18 +08:00
Yening Qin
ac5d69dba4 feat: ldap support role mapping (#1948)
* feature: LDAP implements role mapping capabilities (#1932)

* feature: Implement the team mapping function (#1934)

* refactor: ldap login add timeout

---------

Co-authored-by: Ciusyan <101268302+Yziyan@users.noreply.github.com>
Co-authored-by: ciusyan <yangzhiyan_i@didiglobal.com>
2024-05-13 16:56:19 +08:00
Ulric Qin
597351c424 code refactor 2024-05-13 10:39:27 +08:00
Ulric Qin
1f6b2e341a update README 2024-05-13 10:28:19 +08:00
ulricqin
035752ace2 Update README.md 2024-05-13 10:15:55 +08:00
ulricqin
60a1437207 Update README.md 2024-05-13 10:12:58 +08:00
ulricqin
e31414bc8c Update README.md 2024-05-13 10:12:08 +08:00
ning
785a294845 refactor: update event.TriggerValue 2024-05-11 11:17:38 +08:00
ning
98933eee34 docs: update sql 2024-05-10 16:32:05 +08:00
ulricqin
20905810d7 Delete integrations/Netstat/metrics directory 2024-05-10 15:30:08 +08:00
ulricqin
c1bde83639 Delete integrations/Kernel_Vmstat/metrics directory 2024-05-10 15:29:30 +08:00
ulricqin
782a0e9616 Delete integrations/Processes/metrics directory 2024-05-10 15:28:50 +08:00
ning
6a3720bc8b docs: update ops 2024-05-10 14:19:49 +08:00
ning
de252359d6 Merge branch 'main' of github.com:ccfos/nightingale 2024-05-10 14:11:54 +08:00
ning
deb313ca3d refactor: change server and server clusters routes to include permission check 2024-05-10 14:11:32 +08:00
yang xiaokai
d119de56be docs: creating SNMP monitoring for Cisco like switches (#1945)
* Add files via upload

* Add files via upload

* Update and rename DCN.toml to Cisco.toml

---------

Co-authored-by: Yening Qin <710leo@gmail.com>
2024-05-10 13:27:24 +08:00
tuogege
f05417fa23 docs: fix wrong table name about 'WriteRelabels' (#1942) 2024-05-10 11:56:28 +08:00
ning
9ab2eb591f docs: update integration 2024-05-10 10:55:22 +08:00
Yening Qin
3f476d770f feat: add builtin metrics (#1944) 2024-05-10 10:41:51 +08:00
ning
ced6759686 Merge branch 'main' of github.com:ccfos/nightingale 2024-05-09 16:48:43 +08:00
ning
eba3014c59 fix: alert engine rebuild hash 2024-05-09 16:48:29 +08:00
ulricqin
3aeb4e16e9 Update webhook.go. Refactor Host header settings 2024-05-09 15:54:37 +08:00
ning
3b62722251 refactor: change server and server clusters routes to include permission check 2024-05-09 11:42:09 +08:00
shardingHe
fb1cc4868e feat: add user variable for sso(decrypted). (#1936)
* add user variable for sso(decrypted).
---------

Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2024-05-08 14:10:25 +08:00
ning
4a0dcf0dbf refactor: add forbidden status check for empty group id list 2024-05-08 11:59:13 +08:00
ning
4f913f146e Remove table prefix from all config files for consistency 2024-05-07 21:04:12 +08:00
Ulric Qin
533560f432 Merge branch 'main' of github.com:ccfos/nightingale 2024-05-07 20:03:52 +08:00
Ulric Qin
cf7b479a1b update integration metrics 2024-05-07 20:01:41 +08:00
Yening Qin
2e4c29a0de docs: delete integrations/Ping/collect/ping2.toml 2024-05-07 12:21:43 +08:00
Ulric Qin
6f0ceb94c6 Merge branch 'main' of github.com:ccfos/nightingale 2024-04-29 16:57:08 +08:00
Ulric Qin
800d7ba04b update integrations metrics 2024-04-29 16:56:56 +08:00
Thomas Zhao
fb6a6d2b93 Resolve a problem of pushgw WriteRrelabel not working actually (#1928)
* fix: timeSeries is not updated after relabeled

* fix: GaugeSampleQueueSize defined but not registered

---------

Co-authored-by: zhaotuo <zhaotuo@mail.jj.cn>
2024-04-29 10:22:17 +08:00
ning
cf2b19ae90 Update the size of callbacks and runbook_url columns to varchar(4096) in the alert_rule table 2024-04-28 11:56:52 +08:00
Ulric Qin
fb1cc93613 Merge branch 'main' of github.com:ccfos/nightingale 2024-04-26 19:27:07 +08:00
Ulric Qin
c2bba796c2 add some integrations metrics 2024-04-26 19:26:53 +08:00
ning
a02bf83842 fix: query busigroup error by checking if t is not nil before accessing its GroupId property 2024-04-25 17:18:50 +08:00
ning
cd9f129e2d docs: remove memory metric reference from dashboard descriptions 2024-04-22 17:30:03 +08:00
dependabot[bot]
e85c80bdcf build(deps): bump golang.org/x/net from 0.17.0 to 0.23.0 (#1921)
Bumps [golang.org/x/net](https://github.com/golang/net) from 0.17.0 to 0.23.0.
- [Commits](https://github.com/golang/net/compare/v0.17.0...v0.23.0)

---
updated-dependencies:
- dependency-name: golang.org/x/net
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-04-22 10:52:44 +08:00
Ciusyan
7e83e0c482 Add fields to the users interface and fix email sending bugs (#1919)
* fix: SMTP configuration error may not exit the current sending email goroutine

* optimize: Add user information fields

---------

Co-authored-by: ciusyan <yangzhiyan_i@didiglobal.com>
2024-04-22 10:45:23 +08:00
ning
92ac3125f3 refactor: change ibex version 2024-04-19 00:05:20 +08:00
ning
a61feca369 Merge branch 'main' of github.com:ccfos/nightingale 2024-04-17 17:32:06 +08:00
ning
8b0b811919 Refactor redundant code for initializing Redis storage in alert/alert.go and cmd/edge/edge.go 2024-04-17 17:31:53 +08:00
Ulric Qin
8742526c7f check configDir exists 2024-04-17 17:29:49 +08:00
ning
ee757cfd92 Enable PProf for profiling and monitoring in config.toml 2024-04-17 16:54:00 +08:00
ulricqin
b12cfea379 Update edge.toml 2024-04-17 16:34:27 +08:00
kongfei605
45365e3e03 chore: update dashboards for ipmi (#1913) 2024-04-16 19:44:08 +08:00
kongfei605
1b676eefd2 Merge pull request #1912 from shardingHe/sync_dashboard_impi_and_sqlserver
docs: sync ipmi & sqlserver dashboards
2024-04-16 13:36:32 +08:00
shardingHe
0092dc44fd sync ipmi & sqlserver dashboards
update ipmi collect config
2024-04-16 13:05:12 +08:00
shardingHe
4941b376f3 feat: update buildIn dashboard custom (#1911) 2024-04-12 15:06:31 +08:00
ulricqin
e46813cd17 Update docker-compose.yaml 2024-04-12 10:02:48 +08:00
ning
58ebd224c2 refactor: change datasource api 2024-04-09 15:06:56 +08:00
ning
95ece6e16f refactor: update endpoint for deleting a datasource to remove unnecessary trailing slash 2024-04-09 15:04:38 +08:00
ning
b82cbd06fa merge main 2024-04-09 14:23:10 +08:00
ning
16210892da docs: change dockerfile 2024-04-09 14:10:52 +08:00
Ulric Qin
a452d63a56 Merge branch 'main' of github.com:ccfos/nightingale 2024-04-09 13:53:14 +08:00
Ulric Qin
51c7abedd3 Delete Dockerfile 2024-04-09 13:52:59 +08:00
ning
6d0a2420a8 Merge branch 'main' of github.com:ccfos/nightingale 2024-04-09 10:25:26 +08:00
ning
9cf687b73d fix: ldap user login info sync 2024-04-09 10:25:10 +08:00
ulricqin
49c9e41df5 Update host_table_view_demo.json 2024-04-08 16:01:31 +08:00
ning
2ec2e64213 refactor: Remove redundant DB2FE function from models 2024-04-08 15:29:19 +08:00
ning
867a61c8dc docs: change docker compose config 2024-04-07 19:36:36 +08:00
HongKuang
12263d1453 chore: fix function name in comment (#1905)
Signed-off-by: hongkuang <liurenhong@outlook.com>
2024-04-07 18:50:06 +08:00
Yening Qin
c0cacb2e64 refactor: change docker compose (#1906)
* update init sql

* change compose config
2024-04-07 18:49:32 +08:00
ning
0637b343b1 refactor: update ibex version 2024-04-06 23:21:10 +08:00
ning
2473e144ef refactor: update ibex version 2024-04-06 23:19:38 +08:00
Yening Qin
00a37d6de7 feat: Integration ibex (#1904)
* Ibex integrate (#1876)

---------

Co-authored-by: Deke Wang <94156972+wdkcc@users.noreply.github.com>
2024-04-06 22:02:07 +08:00
ning
50c664e6bf code refactor 2024-04-03 15:37:31 +08:00
Yening Qin
22b7d20455 refactor: sync user info to duty (#1903) 2024-04-02 21:49:32 +08:00
ning
141262e5a5 refactor: datasource update 2024-03-31 15:16:40 +08:00
ning
4717abfa77 update built-in rule 2024-03-26 15:43:18 +08:00
ning
1bf1a01c32 Merge branch 'main' of github.com:ccfos/nightingale 2024-03-21 16:38:35 +08:00
ning
05b714de38 fix: cas user login 2024-03-21 16:38:22 +08:00
xtan
11377d4e5f docs: docker remove ops.yaml (#1894) 2024-03-20 10:43:08 +08:00
ning
46ea46fdfe docs: update edge.toml 2024-03-19 17:46:32 +08:00
Yening Qin
d4f0483238 feat: ldap support sycn user and oidc suport logout (#1893)
* ldap user sync (#1858)

---------

Co-authored-by: Deke Wang <94156972+wdkcc@users.noreply.github.com>
2024-03-19 17:19:08 +08:00
ning
a79610f5ea fix: go mod deps 2024-03-19 15:29:27 +08:00
dependabot[bot]
d9fb71b9a0 build(deps): bump google.golang.org/protobuf from 1.31.0 to 1.33.0 (#1885)
Bumps google.golang.org/protobuf from 1.31.0 to 1.33.0.

---
updated-dependencies:
- dependency-name: google.golang.org/protobuf
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-03-19 15:25:35 +08:00
dependabot[bot]
37057fa0cf build(deps): bump github.com/jackc/pgproto3/v2 from 2.3.1 to 2.3.3 (#1886)
Bumps [github.com/jackc/pgproto3/v2](https://github.com/jackc/pgproto3) from 2.3.1 to 2.3.3.
- [Commits](https://github.com/jackc/pgproto3/compare/v2.3.1...v2.3.3)

---
updated-dependencies:
- dependency-name: github.com/jackc/pgproto3/v2
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-03-19 15:25:26 +08:00
yingjun
b234128a45 fix: typo error (#1891) 2024-03-19 15:25:06 +08:00
Yening Qin
67a2d57966 refactor: optimize alert mute and some config (#1892)
* Refactor alert mute and recording rule retrieval to handle cases where group IDs are empty in the current session

* Update `tags` column default value to '[]' in `alert_mute` table and `AlertMute` struct

* Update gids variable to handle empty query parameter in alertMuteGetsByGids and recordingRuleGetsByGids functions

* Refactor alertMuteGet function to include database to frontend transformation

* change mute datasource

* Refactor AlertSubscribeGetsByBGIds function to handle empty bgids parameter gracefully

* Refactor taskTplTotal and taskTplGets functions to allow filtering by groupIds when provided, enhancing flexibility and optimization

* Refactor taskGetsByGids to handle empty gids and non-admin user cases more efficiently
2024-03-19 15:24:24 +08:00
Ulric Qin
3a1516877e code refactor 2024-03-19 09:55:11 +08:00
Ulric Qin
53f31d175f code refactor 2024-03-19 09:54:24 +08:00
kongfei605
25323e9ce2 update dashboards for springboot (#1867)
* update dashboards for springboot

* update dashboard for springboot
2024-03-13 23:58:23 +08:00
shardingHe
3136596add docs: update aliyun dashboards (#1884)
* remove invalid dashboard for aliyun

* update dashboard for aliyun

* update dashboard for aliyun

---------

Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2024-03-13 23:57:45 +08:00
ning
e7200b0b23 refactor: compatible mute rule time period 00:00-23:59 2024-03-12 14:35:01 +08:00
shardingHe
dfb19c1dde docs: remove invalid dashboard for aliyun (#1881)
Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2024-03-08 10:23:36 +08:00
ning
2363b35263 docs: remove rsa config 2024-03-08 10:18:25 +08:00
shardingHe
99367aaf88 fix: sync smtp config (#1879)
Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2024-03-07 10:26:07 +08:00
xtan
ad17ef328f docs: fix pg docker config (#1874) 2024-03-05 21:16:53 +08:00
ning
5f149f6a38 Merge branch 'main' of github.com:ccfos/nightingale 2024-03-03 21:50:27 +08:00
ning
73ed57301b add iamleader 2024-03-03 21:49:45 +08:00
Ulric Qin
138b929db4 Merge branch 'main' of github.com:ccfos/nightingale 2024-03-01 18:27:10 +08:00
Ulric Qin
4585e94cd1 set DatasourceIdsJson to []int64{} if it is nil 2024-03-01 18:26:56 +08:00
ning
69ad6344f5 refactor: add ldap login log 2024-02-29 18:13:05 +08:00
Ulric Qin
a55665bd14 fix dash 2024-02-28 12:16:24 +08:00
Ulric Qin
b5e2053b0c Merge branch 'main' of github.com:ccfos/nightingale 2024-02-28 10:34:15 +08:00
Ulric Qin
94265eab9f add rsa configurations 2024-02-28 10:33:46 +08:00
ning
eb79d473b0 fix sync sso config 2024-02-27 12:00:53 +08:00
ning
c4e0a9962f fix sync sso config 2024-02-27 11:50:53 +08:00
shardingHe
ee613616ca docs: add a new IPMI dashboard for version v0.3.44-pre and subsequent versions. (#1869)
Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2024-02-23 22:36:19 +08:00
shardingHe
6bbf00c371 docs: update sqlserver config (#1868)
Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2024-02-23 22:35:26 +08:00
ulricqin
f9f45d315d Update edge.toml 2024-02-22 10:31:57 +08:00
Yening Qin
84f215b7f1 refactor: handle event (#1866) 2024-02-22 09:46:37 +08:00
ning
016220bb2a refactor: change notify debug log 2024-02-21 19:07:53 +08:00
ning
ba1eb73ace refactor: add more notify debug log 2024-02-21 18:44:54 +08:00
Yening Qin
b304091fb3 fix: handle host recovery event (#1865) 2024-02-21 16:14:57 +08:00
Ulric Qin
840eaea667 Merge branch 'main' of github.com:ccfos/nightingale 2024-02-20 15:18:00 +08:00
Ulric Qin
956cc9fd68 update integrations icon 2024-02-20 15:17:46 +08:00
ning
e78e212f83 replace parser pkg 2024-02-20 11:11:44 +08:00
Ulric Qin
cdc2d4c039 update integrations icon 2024-02-18 21:02:19 +08:00
Ulric Qin
cd4b0c4f94 update integrations icon 2024-02-18 20:43:16 +08:00
Deke Wang
53ada6cc40 feat: board batch clone (#1861)
* feat: sync user in ldap to mysql

* feat:board batch clone

* Revert "feat: sync user in ldap to mysql"

This reverts commit 6063c34f0e.

* fix: use transactions to store board and payload

* fix: the busigroup is incorrectly specified

* chore: adjust import order of pkg

* refactor: batch clone the board codes

* fix: set value='' in reterr if err==nil

* refactor: move AtomicAdd to board.go

* chore: adjust import order of pkg

* chore: adjust import order of pkg
2024-02-18 10:03:08 +08:00
ning
2e6cb0f21d update windows_by_categraf dashboards 2024-02-07 16:41:23 +08:00
laiwei
4287591a6b update n9e readme 2024-02-06 18:57:13 +08:00
laiwei
2fe0c21e36 update n9e readme 2024-02-06 18:55:46 +08:00
ning
bfa043aeba refactor: optimize trigger values 2024-02-01 23:43:03 +08:00
ning
f4336ca5e9 refactor alert process 2024-02-01 22:47:48 +08:00
ning
8125cb7090 refactor alert process 2024-02-01 22:25:53 +08:00
ning
0ae1e7fbc4 refactor: auto remove ops.yaml 2024-01-31 18:57:42 +08:00
Yening Qin
88f8111a56 refactor: alert eval (#1855)
* refactor
2024-01-30 18:08:37 +08:00
ning
dbfaa519ba fix: edge query db panic 2024-01-30 11:13:12 +08:00
ning
402e803146 Merge branch 'main' of github.com:ccfos/nightingale 2024-01-30 11:09:57 +08:00
Ulric Qin
5eae14a3c9 add default settings: ForceUseServerTS = true 2024-01-30 09:54:25 +08:00
Ulric Qin
e0bfc45f5a add default configurations: ForceUseServerTS 2024-01-30 09:53:19 +08:00
Ulric Qin
7d8fb7aab7 use fasttime instead of time.Now.Unix 2024-01-30 09:50:24 +08:00
Deke Wang
846ef00aed fix: Compose host network metric log (#1852)
* fix: The status of the log is all info in compose-host-network-metric-log

* fix: set KAFKA_CFG_MESSAGE_MAX_BYTES to avoid null value error

* chore: keep the same span

---------
Co-authored-by: wdk <wdk_cc@163.com>
2024-01-29 19:01:55 +08:00
ulricqin
f2f730e88c Update linux_by_categraf.json 2024-01-29 12:09:36 +08:00
ulricqin
311a9405e4 Update bug_report.yml 2024-01-29 07:33:01 +08:00
Yening Qin
6c53981883 refactor: optimize sync user to duty (#1850)
* fix: usergroup del

* refactor user to duty

* code refactor
2024-01-26 17:01:41 +08:00
ning
f23f960368 fix: usergroup del 2024-01-26 12:59:43 +08:00
ulricqin
f593c6d310 fix migrate sql: task_records 2024-01-25 09:17:42 +08:00
ning
3fb5ea96bc chage IdentDropThreshold 2024-01-24 19:28:01 +08:00
ning
30c697a3df refactor: flashtudy sycn team 2024-01-24 14:59:26 +08:00
ning
1d50d05329 fix: sync user to flashtudy 2024-01-19 20:35:36 +08:00
Yening Qin
840221d9ec feat: auto drop metrics by ident threshold (#1845)
* auto drop data by ident

* refactor drop ident
2024-01-19 19:01:13 +08:00
Deke Wang
e52a76921f feat: sync user and user_group to flashduty (#1842)
* fix build event

* fix:  append labels

* Add the function of batch subscription alert rules (#1825)

* add: docker-compose files for logs processing

* update: set restart:always

* fix: compose-host-network-metric-log

* update: regularize

* add: batch subscription

* add: sql columns for rule_ids and rule_names

* add: add migrate of AlertSubscribe

* update: Remove redundant codes

* fix: The question of 1821

* fix: Optimized for getting rule_ids and rule_names

* fix: error handle

* fix: add rule_ids for update api

* fix: Clear the rule_id to zero when updating

* refactor: Compatible with old rule_id

* refactor: rename

* fix: set rule_id=0 when updating subscription rules

---------

Co-authored-by: wdk <wdk_cc@163.com>

* feat: sync user and team to flashduty

* fix: sync to flashduty

* fix: failed to update team change to flashduty

* fix: sync default user when create team

* chore: delete the generated binary file

* refactor: user_group refact

* fix: func AddUsers(fdConf *cconf.FlashDuty, appKey string, users []User) error {

* fix: remove sync for user in router

* fix: user_grroup no change in n9e when put user_group

* chore: set default api_url=https://api.flashcat.cloud

* chore: refactor user_group

* chore: refact codes

* chore: set api=https://jira.flashcat.cloud/api for test

* chore: set api=https://api.flashcat.cloud

* chore: adjust the import order

* chore: remove excess code

* chore: refact codes

* chore: remove excess codes

* chore: adjust import order

* chore: adjust import order

* chore: adjust import order

* chore: refact code

* chore: optimized codes

* code refactor

* chore: remove excess code

---------

Co-authored-by: ning <710leo@gmail.com>
Co-authored-by: wdk <wdk_cc@163.com>
2024-01-19 18:20:00 +08:00
ning
80fdb37129 code refactor 2024-01-18 18:52:52 +08:00
ning
bbef4aa8d9 refactor datasource api 2024-01-18 12:01:34 +08:00
ning
35eba3b1e1 refactor: datasource api 2024-01-17 17:20:11 +08:00
Yening Qin
28a1230d26 refactor:host alert in edge (#1838)
* refactor update ident

* target add engine name

* refactor: heartbeat hash ring

* target update_ts to redis

* add log

* refactor GetHostUpdateTime

* update heartbeat

* add targets-of-alert-rule

* fix recovery
2024-01-17 16:04:37 +08:00
Yening Qin
86dd6a9608 feat: support subscribe multi alert rules (#1834)
* add: batch subscription

* add: sql columns for rule_ids and rule_names

* add: add migrate of AlertSubscribe

* update: Remove redundant codes

* fix: The question of 1821

* fix: Optimized for getting rule_ids and rule_names

* fix: error handle

* fix: add rule_ids for update api

* fix: Clear the rule_id to zero when updating

* refactor: Compatible with old rule_id

* refactor: rename

* fix: set rule_id=0 when updating subscription rules

---------

Co-authored-by: wdk <wdk_cc@163.com>

* refactor: remove prod and cate in alert mute (#1828)

* add: remove monitor type in alter mute

* chore: remove prod in alter mute

* chore: remove cate in alter mute

---------

Co-authored-by: wdk <wdk_cc@163.com>

---------

Co-authored-by: Deke Wang <94156972+wdkcc@users.noreply.github.com>
Co-authored-by: wdk <wdk_cc@163.com>
2024-01-12 20:01:57 +08:00
真小明同学
f7a40b7324 fix: remove stats code that could cause panic (#1831)
Co-authored-by: xem <xemxx@qq.com>
2024-01-11 10:49:49 +08:00
laiwei
e2e8eb837d make readme zh as default 2024-01-10 11:37:20 +08:00
Yening Qin
020f7ae07e fix: extractIdentFromTimeSeries append labels (#1824)
* fix:  append labels
2024-01-08 19:56:44 +08:00
Yening Qin
8311667930 refactor: share board (#1822)
* support board-share
2024-01-08 13:07:08 +08:00
Deke Wang
741ab94150 docs: add docker-compose files for logs processing (#1819)
* add: docker-compose files for logs processing

* update: set restart:always

* fix: compose-host-network-metric-log

* update: regularize

* fix: categraf will panic when it starts

* fix: add WAIT_HOSTS of kafka for categraf and set es index=n9e-y.m.d

---------

Co-authored-by: wdk <wdk_cc@163.com>
2024-01-04 19:23:05 +08:00
Yening Qin
5d6ca183be fix build event (#1817) 2024-01-04 16:49:07 +08:00
ning
0f937ad6d0 refactor: event trigger 2023-12-28 17:48:33 +08:00
ning
ab38f220f7 feat:add my alert rule callbacks api 2023-12-27 17:45:29 +08:00
Yening Qin
a8c0b3bfd5 refactor: optimize oidc get user info (#1811)
* update oidc
2023-12-27 16:28:27 +08:00
shardingHe
5d1629bf0b docs: add oracle alert rule template (#1806)
Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2023-12-27 16:13:21 +08:00
ning
da7fa40c70 refactor: extract ident 2023-12-27 10:59:49 +08:00
ning
f1f0ee193f refactor: update var config api 2023-12-26 19:34:52 +08:00
ning
deccccead0 refactor: add target GetTagsMap() 2023-12-25 11:59:58 +08:00
ning
47b4464ad8 update migrate.sql 2023-12-22 17:31:05 +08:00
liooooo
3cf4a2edc1 fix:error variable incorrect invocation,it needs decryptErr but got err,it will cause error message loss and while err == nil & decryptErr != nil the program will panic (#1802) 2023-12-21 23:17:12 +08:00
Yening Qin
350f3a66dd feat: host support extra meta (#1804) 2023-12-21 23:14:30 +08:00
dependabot[bot]
f8edcabb05 build(deps): bump golang.org/x/crypto from 0.14.0 to 0.17.0 (#1798)
Bumps [golang.org/x/crypto](https://github.com/golang/crypto) from 0.14.0 to 0.17.0.
- [Commits](https://github.com/golang/crypto/compare/v0.14.0...v0.17.0)

---
updated-dependencies:
- dependency-name: golang.org/x/crypto
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-12-21 14:18:58 +08:00
ning
00cafc613d add user service api 2023-12-20 21:01:38 +08:00
Yening Qin
8c614dc8a1 add /targets/bind perm & fix panic (#1801)
* fix panic

* add /targets/bind
2023-12-19 20:50:30 +08:00
ning
216c9d8852 feat: add single alert rule service api 2023-12-14 20:15:38 +08:00
ning
741e3eb89b fix: build 2023-12-14 15:18:04 +08:00
dependabot[bot]
bc06684694 build(deps): bump github.com/mojocn/base64Captcha from 1.3.5 to 1.3.6 (#1793)
Bumps [github.com/mojocn/base64Captcha](https://github.com/mojocn/base64Captcha) from 1.3.5 to 1.3.6.
- [Release notes](https://github.com/mojocn/base64Captcha/releases)
- [Commits](https://github.com/mojocn/base64Captcha/compare/v1.3.5...v1.3.6)

---
updated-dependencies:
- dependency-name: github.com/mojocn/base64Captcha
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-12-14 12:53:06 +08:00
shardingHe
2539cb9c1a update canal dashboards (#1794)
Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2023-12-14 12:52:36 +08:00
ning
32dd3d5098 refactor: add event log 2023-12-08 16:21:58 +08:00
Yening Qin
b6cf382b86 feat: optimize tplx (#1789)
* optimize-tplx
2023-12-08 14:40:48 +08:00
ning
03d19a797c fix refresh event group name 2023-12-08 11:30:40 +08:00
ning
98cbc14039 code refactor 2023-12-08 11:21:48 +08:00
ning
248bb50b3e Merge branch 'main' of github.com:ccfos/nightingale 2023-12-08 11:13:25 +08:00
ning
01f1dcf93e optimize show annotations 2023-12-08 11:13:10 +08:00
Yening Qin
fdac82b8dc refactor: add more self metrics (#1788)
*  add stats metrics
2023-12-08 00:32:18 +08:00
shardingHe
0f926cb218 feat: Dashboards for SQL Server (#1781)
* remove defaultValue for datasource.

* add sqlserver dashboards

---------

Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2023-12-08 00:11:21 +08:00
ning
de35b61b52 refactor: aop log 2023-12-05 20:32:50 +08:00
ning
341aa3f070 update migrate 2023-12-05 11:08:11 +08:00
ning
f47254e72d fix: notify tpl put 2023-12-05 10:59:29 +08:00
Yening Qin
0b75d4d2ed feat: support drop timeseries by labels (#1784)
* feat: drop sample
2023-12-02 11:44:52 +08:00
Yening Qin
d204aa0cd4 refactor build noitfy tpl (#1783)
* refactor build tpl

* change notify api
2023-12-02 11:19:34 +08:00
Yening Qin
4f6584a41d refactor: extract Ident from timeseries (#1782) 2023-12-01 16:37:03 +08:00
ning
8f8f24ccfe refactor: change sql-template api 2023-11-30 16:57:42 +08:00
ning
0f2257b8bb Merge branch 'main' of github.com:ccfos/nightingale 2023-11-30 12:06:59 +08:00
ning
8bd99f13c1 docs: change i18n 2023-11-30 12:06:41 +08:00
shardingHe
f8deb89592 feat: add gorm logger (#1768)
* add gorm logger

* add gorm logger implement, slow log save to warning-level file

* optimize gorm logger

* optimize code

---------

Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2023-11-30 12:02:23 +08:00
Yening Qin
701407581b feat: support multi resources get (#1780)
* get alert rules by gids

* add perm check

* code refactor

* add tasks api

* code refactor

* add site-info api

* update targets api

* add /site-settings
2023-11-28 15:37:40 +08:00
ning
ba2ee05bc0 docs: rename clone board name 2023-11-23 20:38:12 +08:00
ning
c6e649129e refactor: role ops api 2023-11-22 14:49:10 +08:00
Yening Qin
329249ea99 refactor: ops i18n (#1778)
* ops i18n

* update gomod

* code refactor
2023-11-22 13:57:30 +08:00
Yening Qin
65d8a30396 refactor: datasource api (#1777)
* ds refactor
2023-11-20 23:28:15 +08:00
ning
e29a45c4a3 refactor: update role_operation 2023-11-17 17:13:44 +08:00
Yening Qin
438078cdc5 target add agent version (#1776)
* add agent-version
2023-11-17 16:57:56 +08:00
ning
ae07ba7523 docs: update default cas config 2023-11-15 12:14:06 +08:00
ning
f201b12dd8 Merge branch 'main' of github.com:ccfos/nightingale 2023-11-15 11:58:36 +08:00
ning
ee5322f406 change alert_rule callbacks length 2023-11-15 11:58:21 +08:00
Yening Qin
60a2e0c963 feat: writer support mult addrs (#1775)
* support mult write
2023-11-15 10:56:13 +08:00
Ulric Qin
2b55ed9b46 code refactor 2023-11-15 10:33:38 +08:00
ning
68eb7cb57e docs: update migrate sql 2023-11-13 17:54:33 +08:00
ning
6387b601b1 docs: add auto migrate sql 2023-11-13 17:52:10 +08:00
ning
af58fa8802 code refactor 2023-11-13 17:40:31 +08:00
ning
80daea5744 Merge branch 'main' of github.com:ccfos/nightingale 2023-11-13 17:33:38 +08:00
ning
bf9a471484 docs: add auto migrate sql 2023-11-13 17:31:56 +08:00
shardingHe
195ed9761c docs: built-in dashboards remove defaultValue for datasource (#1773)
Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2023-11-13 14:31:13 +08:00
Ulric Qin
fdc0123681 update logo 2023-11-11 22:21:15 +08:00
ning
6fd75ae552 Merge branch 'main' of github.com:ccfos/nightingale 2023-11-10 14:42:31 +08:00
ning
10c462a477 fix: insert ibex-settings perm point 2023-11-10 14:42:18 +08:00
Ulric Qin
694c43292a add api: /api/n9e/user/busi-groups 2023-11-09 20:07:30 +08:00
ning
cfa78dc9e2 feat: alert_subscribe support note and disabled 2023-11-09 00:15:36 +08:00
ning
cc80f5b685 fix: add ibex-settings perm point 2023-11-08 19:08:58 +08:00
ning
58f4a11669 refactor: datasource update 2023-11-08 18:17:22 +08:00
Yening Qin
4f57624a67 refactor: timeteries handle hook (#1772)
* refactor:  timeteries handle hook
2023-11-08 15:11:39 +08:00
Yening Qin
9558520dcd feat: host filter support * (#1769) 2023-11-07 11:19:18 +08:00
dependabot[bot]
8ded3623a4 build(deps): bump golang.org/x/image from 0.5.0 to 0.10.0 (#1767)
Bumps [golang.org/x/image](https://github.com/golang/image) from 0.5.0 to 0.10.0.
- [Commits](https://github.com/golang/image/compare/v0.5.0...v0.10.0)

---
updated-dependencies:
- dependency-name: golang.org/x/image
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-11-07 11:12:54 +08:00
ning
12fcca2faf docs: change integrations 2023-11-03 15:32:27 +08:00
xtan
9dc20fc674 fix: fix windows dashboard (#1762) 2023-10-27 19:32:23 +08:00
Lars Lehtonen
16430550d1 models: fix clobbered error (#1764) 2023-10-27 19:32:07 +08:00
ning
b34b66785d change notify api perm 2023-10-27 19:22:13 +08:00
ning
2cf38b6027 docs: update built-in linux dashboard 2023-10-27 19:08:46 +08:00
ning
e1b4edaa68 refactor: user-variable-configs api 2023-10-27 18:58:22 +08:00
ning
97f3f70d57 refactor perm api 2023-10-27 18:13:04 +08:00
ning
cee0ce6620 code refactor 2023-10-27 17:40:11 +08:00
ning
89b659695f add ops 2023-10-27 17:35:00 +08:00
Yening Qin
d52848ab1b feat: http support print body log (#1757)
* add log
2023-10-24 11:37:34 +08:00
shardingHe
314a8d71ef Fix:use text template to replace data (#1756)
* add text/template func, avoid escape issues caused by special characters

* rename the function of template

* change use of template. ReplaceTemplateUseHtml replaced to ReplaceTemplateUseText

* change use of template. ReplaceTemplateUseHtml replaced to ReplaceTemplateUseText

---------

Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2023-10-23 17:22:34 +08:00
shardingHe
bfa85cd8f1 fix: add func for tplx, avoid escape issues caused by special characters (#1755)
* add text/template func, avoid escape issues caused by special characters

* rename the function of template

---------

Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2023-10-23 16:35:08 +08:00
shardingHe
2254cb1f87 fix: move dashboards to the right place (#1753)
* move dashboards to the right place

* move dashboards to the right place

---------

Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2023-10-21 11:49:48 +08:00
ning
17cbfb8453 change alert_rule extra_config type 2023-10-20 11:26:30 +08:00
ning
6b89b7b4a5 change configs cval type 2023-10-20 10:20:08 +08:00
shardingHe
4fe5828d8d feat: macro variable in Configs (#1725)
* configs form user crud

* configs form user crud

* code refactor

* code refactor

* configs for user variable update & add InitRSAConfig for alert.go

* configs for user variable update

* remove InitRSAConfig for alert.go

* add config of Center.Encryption

* migrate for config and set default value

* add annotation for Center.Encryption

* code refactor

* code refactor

* code refactor again

* code refactor

* remove userVariableCheck bool return

* remove userVariableCheck bool return

* optimize InitRSAConfig

* optimize InitRSAConfig

* optimize InitRSAConfig

* macro variable

* optimize config

* remove test function user-variable-ras

* code refactor config_cache

* code refactor again

* code refactor again

* ReplaceMacroVariables return string value & optimize code

* add user variable check on ckey, it is not recommended to use the period "."

* change configs ckey+external=uniqueness

* configs add add external value

* configs remove isInternal check(ckey is no longer unique field)

* update userVariableCheck

* migrate drop unique filed limit

* refactor rsa generate logic. read key pairs from config.toml(HTTP.RSA) if file is not exist generate rsa key paris and saving to database(configs).

* Improve the RSA key generation logic. In the first step, attempt to read key pairs from the database or the 'config.toml' file. If they don't exist, generate a new set of key pairs and store them in the database.

* optimize code

* ckey use C-style naming convention and optimize smtp validations

* change email logger. print host & port

* update init rsa config logically

* update migrate about DropUniqueFiled

* update migrate about DropUniqueFiled

* make migrating at the first step

* move config of rsa. generate in db.

---------

Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
Co-authored-by: Yening Qin <710leo@gmail.com>
2023-10-19 15:02:59 +08:00
Yening Qin
98422d696e refactor: heartbeat api (#1747) 2023-10-17 17:19:37 +08:00
shardingHe
e3103faeae refactor: Integrations sync from categraf (#1739)
* sync fist step

* integrations sync 14

* integrations sync 20

* integrations sync

* integrations sync 1 & add aliyun alerts

* update v2 dashboard to v3, and move dashboard to cloudwatch, cAdvisor, nginx_upstream_check. make sure all dashboards's name is unique.

* update readme

* delete nsq readme file

* rename switch_legacy,delete sockstat

* Rename the directory to match the lowercase plugin name

* add AMD_ROCm_SMI

* add ipvs

---------

Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2023-10-17 11:05:52 +08:00
837 changed files with 230506 additions and 85903 deletions

View File

@@ -1,67 +0,0 @@
name: Bug Report
description: Report a bug encountered while running Nightingale
labels: ["kind/bug"]
body:
- type: markdown
attributes:
value: |
Thanks for taking time to fill out this bug report!
The more detailed the form is filled in, the easier the problem will be solved.
- type: textarea
id: config
attributes:
label: Relevant server.conf | webapi.conf
description: Place config in the toml code section. This will be automatically formatted into toml, so no need for backticks.
render: toml
validations:
required: true
- type: textarea
id: logs
attributes:
label: Relevant logs
description: categraf | telegraf | server | webapi | prometheus | chrome request/response ...
render: text
validations:
required: true
- type: input
id: system-info
attributes:
label: System info
description: Include nightingale version, operating system, and other relevant details
placeholder: ex. n9e 5.9.2, n9e-fe 5.5.0, categraf 0.1.0, Ubuntu 20.04, Docker 20.10.8
validations:
required: true
- type: textarea
id: reproduce
attributes:
label: Steps to reproduce
description: Describe the steps to reproduce the bug.
value: |
1.
2.
3.
...
validations:
required: true
- type: textarea
id: expected-behavior
attributes:
label: Expected behavior
description: Describe what you expected to happen when you performed the above steps.
validations:
required: true
- type: textarea
id: actual-behavior
attributes:
label: Actual behavior
description: Describe what actually happened when you performed the above steps.
validations:
required: true
- type: textarea
id: additional-info
attributes:
label: Additional info
description: Include gist of relevant config, logs, etc.
validations:
required: false

33
.github/ISSUE_TEMPLATE/question.yml vendored Normal file
View File

@@ -0,0 +1,33 @@
name: Bug Report & Usage Question
description: Reporting a bug or asking a question about how to use Nightingale
labels: []
body:
- type: markdown
attributes:
value: |
The more detailed the form is filled in, the easier the problem will be solved.
提供的信息越详细,问题解决的可能性就越大。另外, 提问之前请先搜索历史 issue (包括 close 的), 以免重复提问。
- type: textarea
id: question
attributes:
label: Question and Steps to reproduce
description: Describe your question and steps to reproduce the bug. 描述问题以及复现步骤
validations:
required: true
- type: textarea
id: logs
attributes:
label: Relevant logs and configurations
description: Relevant logs and configurations. 报错日志([查看方法](https://flashcat.cloud/docs/content/flashcat-monitor/nightingale-v6/faq/how-to-check-logs/))以及各个相关组件的配置信息
render: text
validations:
required: true
- type: textarea
id: system-info
attributes:
label: Version
description: Include nightingale version, operating system, and other relevant details. 请告知夜莺的版本、操作系统的版本、CPU架构等信息
validations:
required: true

22
.github/workflows/issue-translator.yml vendored Normal file
View File

@@ -0,0 +1,22 @@
name: 'Issue Translator'
on:
issues:
types: [opened]
jobs:
translate:
runs-on: ubuntu-latest
permissions:
issues: write
contents: read
steps:
- name: Translate Issues
uses: usthe/issues-translate-action@v2.7
with:
# 是否翻译 issue 标题
IS_MODIFY_TITLE: true
# GitHub Token
BOT_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# 自定义翻译标注(可选)
# CUSTOM_BOT_NOTE: "Translation by bot"

View File

@@ -5,7 +5,7 @@ on:
tags:
- 'v*'
env:
GO_VERSION: 1.18
GO_VERSION: 1.23
jobs:
goreleaser:
@@ -26,7 +26,8 @@ jobs:
- name: Run GoReleaser
uses: goreleaser/goreleaser-action@v3
with:
version: latest
distribution: goreleaser
version: '~> v1'
args: release --rm-dist
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

11
.gitignore vendored
View File

@@ -9,6 +9,7 @@
*.o
*.a
*.so
*.db
*.sw[po]
*.tar.gz
*.[568vq]
@@ -46,6 +47,8 @@ _test
/docker/n9e
/docker/compose-bridge/mysqldata
/docker/compose-host-network/mysqldata
/docker/compose-host-network-metric-log/mysqldata
/docker/compose-host-network-metric-log/n9e-logs
/docker/compose-postgres/pgdata
/etc.local*
/front/statik/statik.go
@@ -55,6 +58,10 @@ _test
.idea
.index
.vscode
.issue
.issue/*
.cursor
.claude
.DS_Store
.cache-loader
.payload
@@ -62,3 +69,7 @@ queries.active
/n9e-*
n9e.sql
!/datasource
.env.json

41
.typos.toml Normal file
View File

@@ -0,0 +1,41 @@
# Configuration for typos tool
[files]
extend-exclude = [
# Ignore auto-generated easyjson files
"*_easyjson.go",
# Ignore binary files
"*.gz",
"*.tar",
"n9e",
"n9e-*"
]
[default.extend-identifiers]
# Didi is a company name (DiDi), not a typo
Didi = "Didi"
# datas is intentionally used as plural of data (slice variable)
datas = "datas"
# pendings is intentionally used as plural
pendings = "pendings"
pendingsUseByRecover = "pendingsUseByRecover"
pendingsUseByRecoverMap = "pendingsUseByRecoverMap"
# typs is intentionally used as shorthand for types (parameter name)
typs = "typs"
[default.extend-words]
# Some false positives
ba = "ba"
# Specific corrections for ambiguous typos
contigious = "contiguous"
onw = "own"
componet = "component"
Patten = "Pattern"
Requets = "Requests"
Mis = "Miss"
exporer = "exporter"
soruce = "source"
verison = "version"
Configations = "Configurations"
emmited = "emitted"
Utlization = "Utilization"
serie = "series"

634
LICENSE
View File

@@ -1,433 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "{}"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright (C) 2017 Beijing Didi Infinity Technology and Development Co.,Ltd. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright CCF ODC.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

131
README.md
View File

@@ -1,104 +1,115 @@
<p align="center">
<a href="https://github.com/ccfos/nightingale">
<img src="doc/img/nightingale_logo_h.png" alt="nightingale - cloud native monitoring" width="240" /></a>
<img src="doc/img/Nightingale_L_V.png" alt="nightingale - cloud native monitoring" width="100" /></a>
</p>
<p align="center">
<b>Open-Source Alerting Expert</b>
</p>
<p align="center">
<img alt="GitHub latest release" src="https://img.shields.io/github/v/release/ccfos/nightingale"/>
<a href="https://n9e.github.io">
<a href="https://flashcat.cloud/docs/">
<img alt="Docs" src="https://img.shields.io/badge/docs-get%20started-brightgreen"/></a>
<a href="https://hub.docker.com/u/flashcatcloud">
<img alt="Docker pulls" src="https://img.shields.io/docker/pulls/flashcatcloud/nightingale"/></a>
<img alt="GitHub Repo stars" src="https://img.shields.io/github/stars/ccfos/nightingale">
<img alt="GitHub Repo issues" src="https://img.shields.io/github/issues/ccfos/nightingale">
<img alt="GitHub Repo issues closed" src="https://img.shields.io/github/issues-closed/ccfos/nightingale">
<img alt="GitHub forks" src="https://img.shields.io/github/forks/ccfos/nightingale">
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
<img alt="GitHub contributors" src="https://img.shields.io/github/contributors-anon/ccfos/nightingale"/></a>
<img alt="GitHub Repo stars" src="https://img.shields.io/github/stars/ccfos/nightingale">
<img alt="GitHub forks" src="https://img.shields.io/github/forks/ccfos/nightingale">
<br/><img alt="GitHub Repo issues" src="https://img.shields.io/github/issues/ccfos/nightingale">
<img alt="GitHub Repo issues closed" src="https://img.shields.io/github/issues-closed/ccfos/nightingale">
<img alt="GitHub latest release" src="https://img.shields.io/github/v/release/ccfos/nightingale"/>
<img alt="License" src="https://img.shields.io/badge/license-Apache--2.0-blue"/>
<a href="https://n9e-talk.slack.com/">
<img alt="GitHub contributors" src="https://img.shields.io/badge/join%20slack-%23n9e-brightgreen.svg"/></a>
<img alt="License" src="https://img.shields.io/badge/license-Apache--2.0-blue"/>
</p>
<p align="center">
An open-source cloud-native monitoring system that is <b>all-in-one</b> <br/>
<b>Out-of-the-box</b>, it integrates data collection, visualization, and monitoring alert <br/>
We recommend upgrading your <b>Prometheus + AlertManager + Grafana</b> combination to Nightingale!
</p>
[English](./README.md) | [中文](./README_zh.md)
## 🎯 What is Nightingale
## Highlighted Features
Nightingale is an open-source monitoring project that focuses on alerting. Similar to Grafana, Nightingale also connects with various existing data sources. However, while Grafana emphasizes visualization, Nightingale places greater emphasis on the alerting engine, as well as the processing and distribution of alarms.
- **Out-of-the-box**
- Supports multiple deployment methods such as **Docker, Helm Chart, and cloud services**, integrates data collection, monitoring, and alerting into one system, and comes with various monitoring dashboards, quick views, and alert rule templates. **It greatly reduces the construction cost, learning cost, and usage cost of cloud-native monitoring systems**.
- **Professional Alerting**
- Provides visual alert configuration and management, supports various alert rules, offers the ability to configure silence and subscription rules, supports multiple alert delivery channels, and has features such as alert self-healing and event management.
- **Cloud-Native**
- Quickly builds an enterprise-level cloud-native monitoring system through a turnkey approach, supports multiple collectors such as [Categraf](https://github.com/flashcatcloud/categraf), Telegraf, and Grafana-agent, supports multiple data sources such as Prometheus, VictoriaMetrics, M3DB, ElasticSearch, and Jaeger, and is compatible with importing Grafana dashboards. **It seamlessly integrates with the cloud-native ecosystem**.
- **High Performance and High Availability**
- Due to the multi-data-source management engine of Nightingale and its excellent architecture design, and utilizing a high-performance time-series database, it can handle data collection, storage, and alert analysis scenarios with billions of time-series data, saving a lot of costs.
- Nightingale components can be horizontally scaled with no single point of failure. It has been deployed in thousands of enterprises and tested in harsh production practices. Many leading Internet companies have used Nightingale for cluster machines with hundreds of nodes, processing billions of time-series data.
- **Flexible Extension and Centralized Management**
- Nightingale can be deployed on a 1-core 1G cloud host, deployed in a cluster of hundreds of machines, or run in Kubernetes. Time-series databases, alert engines, and other components can also be decentralized to various data centers and regions, balancing edge deployment with centralized management. **It solves the problem of data fragmentation and lack of unified views**.
> The Nightingale project was initially developed and open-sourced by DiDi.inc. On May 11, 2022, it was donated to the Open Source Development Committee of the China Computer Federation (CCF ODC).
![](https://n9e.github.io/img/global/arch-bg.png)
#### If you are using Prometheus and have one or more of the following requirement scenarios, it is recommended that you upgrade to Nightingale:
## 💡 How Nightingale Works
- Multiple systems such as Prometheus, Alertmanager, Grafana, etc. are fragmented and lack a unified view and cannot be used out of the box;
- The way to manage Prometheus and Alertmanager by modifying configuration files has a big learning curve and is difficult to collaborate;
- Too much data to scale-up your Prometheus cluster;
- Multiple Prometheus clusters running in production environments, which faced high management and usage costs;
Many users have already collected metrics and log data. In this case, you can connect your storage repositories (such as VictoriaMetrics, ElasticSearch, etc.) as data sources in Nightingale. This allows you to configure alerting rules and notification rules within Nightingale, enabling the generation and distribution of alarms.
#### If you are using Zabbix and have the following scenarios, it is recommended that you upgrade to Nightingale:
![Nightingale Product Architecture](doc/img/readme/20240221152601.png)
- Monitoring too much data and wanting a better scalable solution;
- A high learning curve and a desire for better efficiency of collaborative use in a multi-person, multi-team model;
- Microservice and cloud-native architectures with variable monitoring data lifecycles and high monitoring data dimension bases, which are not easily adaptable to the Zabbix data model;
Nightingale itself does not provide monitoring data collection capabilities. We recommend using [Categraf](https://github.com/flashcatcloud/categraf) as the collector, which integrates seamlessly with Nightingale.
[Categraf](https://github.com/flashcatcloud/categraf) can collect monitoring data from operating systems, network devices, various middleware, and databases. It pushes this data to Nightingale via the `Prometheus Remote Write` protocol. Nightingale then stores the monitoring data in a time-series database (such as Prometheus, VictoriaMetrics, etc.) and provides alerting and visualization capabilities.
#### If you are using [open-falcon](https://github.com/open-falcon/falcon-plus), we recommend you to upgrade to Nightingale
- For more information about open-falcon and Nightingale, please refer to read [Ten features and trends of cloud-native monitoring](https://mp.weixin.qq.com/s?__biz=MzkzNjI5OTM5Nw==&mid=2247483738&idx=1&sn=e8bdbb974a2cd003c1abcc2b5405dd18&chksm=c2a19fb0f5d616a63185cd79277a79a6b80118ef2185890d0683d2bb20451bd9303c78d083c5#rd)。
For certain edge data centers with poor network connectivity to the central Nightingale server, we offer a distributed deployment mode for the alerting engine. In this mode, even if the network is disconnected, the alerting functionality remains unaffected.
## Getting Started
![Edge Deployment Mode](doc/img/readme/multi-region-arch.png)
[https://n9e.github.io/](https://n9e.github.io/)
> In the above diagram, Data Center A has a good network with the central data center, so it uses the Nightingale process in the central data center as the alerting engine. Data Center B has a poor network with the central data center, so it deploys `n9e-edge` as the alerting engine to handle alerting for its own data sources.
## Screenshots
## 🔕 Alert Noise Reduction, Escalation, and Collaboration
https://user-images.githubusercontent.com/792850/216888712-2565fcea-9df5-47bd-a49e-d60af9bd76e8.mp4
Nightingale focuses on being an alerting engine, responsible for generating alarms and flexibly distributing them based on rules. It supports 20 built-in notification medias (such as phone calls, SMS, email, DingTalk, Slack, etc.).
## Architecture
If you have more advanced requirements, such as:
- Want to consolidate events from multiple monitoring systems into one platform for unified noise reduction, response handling, and data analysis.
- Want to support personnel scheduling, practice on-call culture, and support alert escalation (to avoid missing alerts) and collaborative handling.
<img src="doc/img/arch-product.png" width="600">
Then Nightingale is not suitable. It is recommended that you choose on-call products such as PagerDuty and FlashDuty. These products are simple and easy to use.
Nightingale monitoring can receive monitoring data reported by various collectors (such as [Categraf](https://github.com/flashcatcloud/categraf) , telegraf, grafana-agent, Prometheus, etc.) and write them to various popular time-series databases (such as Prometheus, M3DB, VictoriaMetrics, Thanos, TDEngine, etc.). It provides configuration capabilities for alert rules, silence rules, and subscription rules, as well as the ability to view monitoring data. It also provides automatic alarm self-healing mechanisms (such as automatically calling back to a webhook address or executing a script after an alarm is triggered), and the ability to store and manage historical alarm events and view them in groups.
## 🗨️ Communication Channels
If the performance of a standalone time-series database (such as Prometheus) has bottlenecks or poor disaster recovery, we recommend using [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics). The VictoriaMetrics architecture is relatively simple, has excellent performance, and is easy to deploy and maintain. The architecture diagram is as shown above. For more detailed documentation on VictoriaMetrics, please refer to its [official website](https://victoriametrics.com/).
- **Report Bugs:** It is highly recommended to submit issues via the [Nightingale GitHub Issue tracker](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Fbug&projects=&template=bug_report.yml).
- **Documentation:** For more information, we recommend thoroughly browsing the [Nightingale Documentation Site](https://n9e.github.io/).
**We welcome you to participate in the Nightingale open-source project and community in various ways, including but not limited to**
- Adding and improving documentation => [n9e.github.io](https://n9e.github.io/)
- Sharing your best practices and experience in using Nightingale monitoring => [Article sharing]((https://n9e.github.io/docs/prologue/share/))
- Submitting product suggestions => [github issue](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Ffeature&template=enhancement.md)
- Submitting code to make Nightingale monitoring faster, more stable, and easier to use => [github pull request](https://github.com/didi/nightingale/pulls)
## 🔑 Key Features
![Nightingale Alerting rules](doc/img/readme/alerting-rules-en.png)
**Respecting, recognizing, and recording the work of every contributor** is the first guiding principle of the Nightingale open-source community. We advocate effective questioning, which not only respects the developer's time but also contributes to the accumulation of knowledge in the entire community
- Before asking a question, please first refer to the [FAQ](https://www.gitlink.org.cn/ccfos/nightingale/wiki/faq)
- We use [GitHub Discussions](https://github.com/ccfos/nightingale/discussions) as the communication forum. You can search and ask questions here.
- We also recommend that you join ours [Slack channel](https://n9e-talk.slack.com/) to exchange experiences with other Nightingale users.
- Nightingale supports alerting rules, mute rules, subscription rules, and notification rules. It natively supports 20 types of notification media and allows customization of message templates.
- It supports event pipelines for Pipeline processing of alarms, facilitating automated integration with in-house systems. For example, it can append metadata to alarms or perform relabeling on events.
- It introduces the concept of business groups and a permission system to manage various rules in a categorized manner.
- Many databases and middleware come with built-in alert rules that can be directly imported and used. It also supports direct import of Prometheus alerting rules.
- It supports alerting self-healing, which automatically triggers a script to execute predefined logic after an alarm is generated—such as cleaning up disk space or capturing the current system state.
![Nightingale Alarm Dashboard](doc/img/readme/active-events-en.png)
## Who is using Nightingale
You can register your usage and share your experience by posting on **[Who is Using Nightingale](https://github.com/ccfos/nightingale/issues/897)**.
- Nightingale archives historical alarms and supports multi-dimensional query and statistics.
- It supports flexible aggregation grouping, allowing a clear view of the distribution of alarms across the company.
## Stargazers over time
[![Stargazers over time](https://starchart.cc/ccfos/nightingale.svg)](https://starchart.cc/ccfos/nightingale)
![Nightingale Integration Center](doc/img/readme/integration-components-en.png)
## Contributors
- Nightingale has built-in metric descriptions, dashboards, and alerting rules for common operating systems, middleware, and databases, which are contributed by the community with varying quality.
- It directly receives data via multiple protocols such as Remote Write, OpenTSDB, Datadog, and Falcon, integrates with various Agents.
- It supports data sources like Prometheus, ElasticSearch, Loki, ClickHouse, MySQL, Postgres, allowing alerting based on data from these sources.
- Nightingale can be easily embedded into internal enterprise systems (e.g. Grafana, CMDB), and even supports configuring menu visibility for these embedded systems.
![Nightingale dashboards](doc/img/readme/dashboard-en.png)
- Nightingale supports dashboard functionality, including common chart types, and comes with pre-built dashboards. The image above is a screenshot of one of these dashboards.
- If you are already accustomed to Grafana, it is recommended to continue using Grafana for visualization, as Grafana has deeper expertise in this area.
- For machine-related monitoring data collected by Categraf, it is advisable to use Nightingale's built-in dashboards for viewing. This is because Categraf's metric naming follows Telegraf's convention, which differs from that of Node Exporter.
- Due to Nightingale's concept of business groups (where machines can belong to different groups), there may be scenarios where you only want to view machines within the current business group on the dashboard. Thus, Nightingale's dashboards can be linked with business groups for interactive filtering.
## 🌟 Stargazers over time
[![Stargazers over time](https://api.star-history.com/svg?repos=ccfos/nightingale&type=Date)](https://star-history.com/#ccfos/nightingale&Date)
## 🔥 Users
![User Logos](doc/img/readme/logos.png)
## 🤝 Community Co-Building
- ❇️ Please read the [Nightingale Open Source Project and Community Governance Draft](./doc/community-governance.md). We sincerely welcome every user, developer, company, and organization to use Nightingale, actively report bugs, submit feature requests, share best practices, and help build a professional and active open-source community.
- ❤️ Nightingale Contributors
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
<img src="https://contrib.rocks/image?repo=ccfos/nightingale" />
</a>
## License
[Apache License V2.0](https://github.com/didi/nightingale/blob/main/LICENSE)
## 📜 License
- [Apache License V2.0](https://github.com/ccfos/nightingale/blob/main/LICENSE)

View File

@@ -1,6 +1,9 @@
<p align="center">
<a href="https://github.com/ccfos/nightingale">
<img src="doc/img/nightingale_logo_h.png" alt="nightingale - cloud native monitoring" width="240" /></a>
<img src="doc/img/Nightingale_L_V.png" alt="nightingale - cloud native monitoring" width="100" /></a>
</p>
<p align="center">
<b>开源告警管理专家</b>
</p>
<p align="center">
@@ -11,64 +14,109 @@
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
<img alt="GitHub contributors" src="https://img.shields.io/github/contributors-anon/ccfos/nightingale"/></a>
<img alt="GitHub Repo stars" src="https://img.shields.io/github/stars/ccfos/nightingale">
<img alt="GitHub forks" src="https://img.shields.io/github/forks/ccfos/nightingale">
<br/><img alt="GitHub Repo issues" src="https://img.shields.io/github/issues/ccfos/nightingale">
<img alt="GitHub Repo issues closed" src="https://img.shields.io/github/issues-closed/ccfos/nightingale">
<img alt="GitHub forks" src="https://img.shields.io/github/forks/ccfos/nightingale">
<img alt="GitHub latest release" src="https://img.shields.io/github/v/release/ccfos/nightingale"/>
<img alt="License" src="https://img.shields.io/badge/license-Apache--2.0-blue"/>
<a href="https://n9e-talk.slack.com/">
<img alt="GitHub contributors" src="https://img.shields.io/badge/join%20slack-%23n9e-brightgreen.svg"/></a>
</p>
<p align="center">
告警管理专家,一体化的开源可观测平台
</p>
[English](./README.md) | [中文](./README_zh.md)
夜莺Nightingale是中国计算机学会托管的开源云原生可观测工具最早由滴滴于 2020 年孵化并开源,并于 2022 年正式捐赠予中国计算机学会。夜莺采用 All-in-One 的设计理念,集数据采集、可视化、监控告警、数据分析于一体,与云原生生态紧密集成,融入了顶级互联网公司可观测性最佳实践,沉淀了众多社区专家经验,开箱即用。
## 夜莺是什么
## 资料
夜莺 Nightingale 是一款开源云原生监控告警工具,是中国计算机学会接受捐赠并托管的第一个开源项目,在 GitHub 上有超过 12000 颗星,广受关注和使用。夜莺的统一告警引擎,可以对接 Prometheus、Elasticsearch、ClickHouse、Loki、MySQL 等多种数据源,提供全面的告警判定、丰富的事件处理和灵活的告警分发及通知能力。
- 文档:[flashcat.cloud/docs](https://flashcat.cloud/docs/)
- 提问:[answer.flashcat.cloud](https://answer.flashcat.cloud/)
- 报Bug[github.com/ccfos/nightingale/issues](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Fbug&projects=&template=bug_report.yml)
夜莺侧重于监控告警,类似于 Grafana 的数据源集成方式,夜莺也是对接多种既有的数据源,不过 Grafana 侧重于可视化,夜莺则是侧重于告警引擎、告警事件的处理和分发。
> 夜莺监控项目,最初由滴滴开发和开源,并于 2022 年 5 月 11 日捐赠予中国计算机学会开源发展技术委员会CCF ODTC为 CCF ODTC 成立后接受捐赠的第一个开源项目。
![](https://n9e.github.io/img/global/arch-bg.png)
## 夜莺的工作逻辑
很多用户已经自行采集了指标、日志数据此时就把存储库VictoriaMetrics、ElasticSearch等作为数据源接入夜莺即可在夜莺里配置告警规则、通知规则完成告警事件的生成和派发。
![夜莺产品架构](doc/img/readme/20240221152601.png)
夜莺项目本身不提供监控数据采集能力。推荐您使用 [Categraf](https://github.com/flashcatcloud/categraf) 作为采集器,可以和夜莺丝滑对接。
[Categraf](https://github.com/flashcatcloud/categraf) 可以采集操作系统、网络设备、各类中间件、数据库的监控数据,通过 Remote Write 协议推送给夜莺,夜莺把监控数据转存到时序库(如 Prometheus、VictoriaMetrics 等),并提供告警和可视化能力。
对于个别边缘机房,如果和中心夜莺服务端网络链路不好,希望提升告警可用性,夜莺也提供边缘机房告警引擎下沉部署模式,这个模式下,即便边缘和中心端网络割裂,告警功能也不受影响。
![边缘部署模式](doc/img/readme/20240222102119.png)
> 上图中机房A和中心机房的网络链路很好所以直接由中心端的夜莺进程做告警引擎机房B和中心机房的网络链路不好所以在机房B部署了 `n9e-edge` 做告警引擎对机房B的数据源做告警判定。
## 告警降噪、升级、协同
夜莺的侧重点是做告警引擎,即负责产生告警事件,并根据规则做灵活派发,内置支持 20 种通知媒介电话、短信、邮件、钉钉、飞书、企微、Slack 等)。
如果您有更高级的需求,比如:
- 想要把公司的多套监控系统产生的事件聚拢到一个平台,统一做收敛降噪、响应处理、数据分析
- 想要支持人员的排班,践行 On-call 文化,想要支持告警认领、升级(避免遗漏)、协同处理
那夜莺是不合适的,推荐您选用 [FlashDuty](https://flashcat.cloud/product/flashcat-duty/) 这样的 On-call 产品,产品简单易用,也有免费套餐。
## 功能和特点
- 统一接入各种时序库:支持对接 Prometheus、VictoriaMetrics、Thanos、Mimir、M3DB 等多种时序库,实现统一告警管理
- 专业告警能力:内置支持多种告警规则,可以扩展支持所有通知媒介,支持告警屏蔽、告警抑制、告警自愈、告警事件管理
- 高性能可视化引擎支持多种图表样式内置众多Dashboard模版也可导入Grafana模版开箱即用开源协议商业友好
- 无缝搭配 [Flashduty](https://flashcat.cloud/product/flashcat-duty/)实现告警聚合收敛、认领、升级、排班、IM集成确保告警处理不遗漏减少打扰更好协同
- 支持所有常见采集器:支持 [Categraf](https://flashcat.cloud/product/categraf)、telegraf、grafana-agent、datadog-agent、各种 exporter 作为采集器,没有什么数据是不能监控的
- 一体化观测平台:从 v6 版本开始,支持接入 ElasticSearch、Jaeger 数据源,实现日志、链路、指标多维度的统一可观测
## 相关资料 & 交流渠道
- 📚 [夜莺介绍PPT](https://mp.weixin.qq.com/s/Mkwx_46xrltSq8NLqAIYow) 对您了解夜莺各项关键特性会有帮助PPT链接在文末
- 👉 [文档中心](https://flashcat.cloud/docs/) 为了更快的访问速度,站点托管在 [FlashcatCloud](https://flashcat.cloud)
- ❤️ [报告 Bug](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=&projects=&template=question.yml) 写清楚问题描述、复现步骤、截图等信息,更容易得到答案
- 💡 前后端代码分离,前端代码仓库:[https://github.com/n9e/fe](https://github.com/n9e/fe)
- 🎯 关注[这个公众号](https://gitlink.org.cn/UlricQin)了解更多夜莺动态和知识
- 🌟 加我微信:`picobyte`(我已关闭好友验证)拉入微信群,备注:`夜莺互助群`,如果已经把夜莺上到生产环境,可联系我拉入资深监控用户群
## 产品演示
## 关键特性简介
![演示](doc/img/n9e-screenshot-gif-v6.gif)
![夜莺告警规则](doc/img/readme/2025-05-23_18-43-37.png)
## 部署架构
- 夜莺支持告警规则、屏蔽规则、订阅规则、通知规则,内置支持 20 种通知媒介,支持消息模板自定义
- 支持事件管道,对告警事件做 Pipeline 处理,方便和自有系统做自动化整合,比如给告警事件附加一些元信息,对事件做 relabel
- 支持业务组概念,引入权限体系,分门别类管理各类规则
- 很多数据库、中间件内置了告警规则,可以直接导入使用,也可以直接导入 Prometheus 的告警规则
- 支持告警自愈,即告警之后自动触发一个脚本执行一些预定义的逻辑,比如清理一下磁盘、抓一下现场等
![架构](doc/img/n9e-arch-latest.png)
![夜莺事件大盘](doc/img/readme/2025-05-30_08-49-28.png)
## 加入交流群
- 夜莺存档了历史告警事件,支持多维度的查询和统计
- 支持灵活的聚合分组,一目了然看到公司的告警事件分布情况
欢迎加入 QQ 交流群群号479290895QQ 群适合群友互助,夜莺研发人员通常不在群里。如果要报 bug 请到[这里](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Fbug&projects=&template=bug_report.yml),提问到[这里](https://answer.flashcat.cloud/)
![夜莺集成中心](doc/img/readme/2025-05-23_18-46-06.png)
## Stargazers over time
- 夜莺内置常用操作系统、中间件、数据库的的指标说明、仪表盘、告警规则,不过都是社区贡献的,整体也是参差不齐
- 夜莺直接接收 Remote Write、OpenTSDB、Datadog、Falcon 等多种协议的数据,故而可以和各类 Agent 对接
- 夜莺支持 Prometheus、ElasticSearch、Loki、TDEngine 等多种数据源,可以对其中的数据做告警
- 夜莺可以很方便内嵌企业内部系统,比如 Grafana、CMDB 等,甚至可以配置这些内嵌系统的菜单可见性
![夜莺仪表盘](doc/img/readme/2025-05-23_18-49-02.png)
- 夜莺支持仪表盘功能,支持常见的图表类型,也内置了一些仪表盘,上图是其中一个仪表盘的截图。
- 如果你已经习惯了 Grafana建议仍然使用 Grafana 看图。Grafana 在看图方面道行更深。
- 机器相关的监控数据,如果是 Categraf 采集的,建议使用夜莺自带的仪表盘查看,因为 Categraf 的指标命名 Follow 的是 Telegraf 的命名方式,和 Node Exporter 不同
- 因为夜莺有个业务组的概念,机器可以归属不同的业务组,有时在仪表盘里只想查看当前所属业务组的机器,所以夜莺的仪表盘可以和业务组联动
## 广受关注
[![Stargazers over time](https://api.star-history.com/svg?repos=ccfos/nightingale&type=Date)](https://star-history.com/#ccfos/nightingale&Date)
## 感谢众多企业的信赖
## Contributors
![夜莺客户](doc/img/readme/logos.png)
## 社区共建
- ❇️ 请阅读浏览[夜莺开源项目和社区治理架构草案](./doc/community-governance.md),真诚欢迎每一位用户、开发者、公司以及组织,使用夜莺监控、积极反馈 Bug、提交功能需求、分享最佳实践共建专业、活跃的夜莺开源社区。
- ❤️ 夜莺贡献者
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
<img src="https://contrib.rocks/image?repo=ccfos/nightingale" />
</a>
## 社区治理
[夜莺开源项目和社区治理架构(草案)](./doc/community-governance.md)
## License
[Apache License V2.0](https://github.com/didi/nightingale/blob/main/LICENSE)
- [Apache License V2.0](https://github.com/ccfos/nightingale/blob/main/LICENSE)

View File

@@ -32,6 +32,7 @@ type Alerting struct {
Timeout int64
TemplatesDir string
NotifyConcurrency int
WebhookBatchSend bool
}
type CallPlugin struct {
@@ -46,13 +47,6 @@ type RedisPub struct {
ChannelKey string
}
type Ibex struct {
Address string
BasicAuthUser string
BasicAuthPass string
Timeout int64
}
func (a *Alert) PreCheck(configDir string) {
if a.Alerting.TemplatesDir == "" {
a.Alerting.TemplatesDir = path.Join(configDir, "template")
@@ -66,10 +60,6 @@ func (a *Alert) PreCheck(configDir string) {
a.Heartbeat.Interval = 1000
}
if a.Heartbeat.EngineName == "" {
a.Heartbeat.EngineName = "default"
}
if a.EngineDelay == 0 {
a.EngineDelay = 30
}

View File

@@ -4,6 +4,8 @@ import (
"context"
"fmt"
"github.com/ccfos/nightingale/v6/dscache"
"github.com/ccfos/nightingale/v6/alert/aconf"
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/alert/dispatch"
@@ -21,10 +23,12 @@ import (
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/httpx"
"github.com/ccfos/nightingale/v6/pkg/logx"
"github.com/ccfos/nightingale/v6/pkg/macros"
"github.com/ccfos/nightingale/v6/prom"
"github.com/ccfos/nightingale/v6/pushgw/pconf"
"github.com/ccfos/nightingale/v6/pushgw/writer"
"github.com/ccfos/nightingale/v6/tdengine"
"github.com/ccfos/nightingale/v6/storage"
"github.com/flashcatcloud/ibex/src/cmd/ibex"
)
func Initialize(configDir string, cryptoKey string) (func(), error) {
@@ -40,27 +44,47 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
ctx := ctx.NewContext(context.Background(), nil, false, config.CenterApi)
var redis storage.Redis
redis, err = storage.NewRedis(config.Redis)
if err != nil {
return nil, err
}
syncStats := memsto.NewSyncStats()
alertStats := astats.NewSyncStats()
targetCache := memsto.NewTargetCache(ctx, syncStats, nil)
configCache := memsto.NewConfigCache(ctx, syncStats, nil, "")
targetCache := memsto.NewTargetCache(ctx, syncStats, redis)
busiGroupCache := memsto.NewBusiGroupCache(ctx, syncStats)
alertMuteCache := memsto.NewAlertMuteCache(ctx, syncStats)
alertRuleCache := memsto.NewAlertRuleCache(ctx, syncStats)
notifyConfigCache := memsto.NewNotifyConfigCache(ctx)
notifyConfigCache := memsto.NewNotifyConfigCache(ctx, configCache)
dsCache := memsto.NewDatasourceCache(ctx, syncStats)
userCache := memsto.NewUserCache(ctx, syncStats)
userGroupCache := memsto.NewUserGroupCache(ctx, syncStats)
taskTplsCache := memsto.NewTaskTplCache(ctx)
configCvalCache := memsto.NewCvalCache(ctx, syncStats)
notifyRuleCache := memsto.NewNotifyRuleCache(ctx, syncStats)
notifyChannelCache := memsto.NewNotifyChannelCache(ctx, syncStats)
messageTemplateCache := memsto.NewMessageTemplateCache(ctx, syncStats)
promClients := prom.NewPromClient(ctx, config.Alert.Heartbeat)
tdengineClients := tdengine.NewTdengineClient(ctx, config.Alert.Heartbeat)
promClients := prom.NewPromClient(ctx)
dispatch.InitRegisterQueryFunc(promClients)
externalProcessors := process.NewExternalProcessors()
Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache, alertRuleCache, notifyConfigCache, dsCache, ctx, promClients, tdengineClients, userCache, userGroupCache)
macros.RegisterMacro(macros.MacroInVain)
dscache.Init(ctx, false)
Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache, alertRuleCache, notifyConfigCache, taskTplsCache, dsCache, ctx, promClients, userCache, userGroupCache, notifyRuleCache, notifyChannelCache, messageTemplateCache, configCvalCache)
r := httpx.GinEngine(config.Global.RunMode, config.HTTP)
r := httpx.GinEngine(config.Global.RunMode, config.HTTP,
configCvalCache.PrintBodyPaths, configCvalCache.PrintAccessLog)
rt := router.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors)
if config.Ibex.Enable {
ibex.ServerStart(false, nil, redis, config.HTTP.APIForService.BasicAuth, config.Alert.Heartbeat, &config.CenterApi, r, nil, config.Ibex, config.HTTP.Port)
}
rt.Config(r)
dumper.ConfigRouter(r)
@@ -73,26 +97,36 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
}
func Start(alertc aconf.Alert, pushgwc pconf.Pushgw, syncStats *memsto.Stats, alertStats *astats.Stats, externalProcessors *process.ExternalProcessorsType, targetCache *memsto.TargetCacheType, busiGroupCache *memsto.BusiGroupCacheType,
alertMuteCache *memsto.AlertMuteCacheType, alertRuleCache *memsto.AlertRuleCacheType, notifyConfigCache *memsto.NotifyConfigCacheType, datasourceCache *memsto.DatasourceCacheType, ctx *ctx.Context,
promClients *prom.PromClientMap, tdendgineClients *tdengine.TdengineClientMap, userCache *memsto.UserCacheType, userGroupCache *memsto.UserGroupCacheType) {
alertMuteCache *memsto.AlertMuteCacheType, alertRuleCache *memsto.AlertRuleCacheType, notifyConfigCache *memsto.NotifyConfigCacheType, taskTplsCache *memsto.TaskTplCache, datasourceCache *memsto.DatasourceCacheType, ctx *ctx.Context,
promClients *prom.PromClientMap, userCache *memsto.UserCacheType, userGroupCache *memsto.UserGroupCacheType, notifyRuleCache *memsto.NotifyRuleCacheType, notifyChannelCache *memsto.NotifyChannelCacheType, messageTemplateCache *memsto.MessageTemplateCacheType, configCvalCache *memsto.CvalCache) {
alertSubscribeCache := memsto.NewAlertSubscribeCache(ctx, syncStats)
recordingRuleCache := memsto.NewRecordingRuleCache(ctx, syncStats)
targetsOfAlertRulesCache := memsto.NewTargetOfAlertRuleCache(ctx, alertc.Heartbeat.EngineName, syncStats)
go models.InitNotifyConfig(ctx, alertc.Alerting.TemplatesDir)
go models.InitNotifyChannel(ctx)
go models.InitMessageTemplate(ctx)
naming := naming.NewNaming(ctx, alertc.Heartbeat)
naming := naming.NewNaming(ctx, alertc.Heartbeat, alertStats)
writers := writer.NewWriters(pushgwc)
record.NewScheduler(alertc, recordingRuleCache, promClients, writers, alertStats)
record.NewScheduler(alertc, recordingRuleCache, promClients, writers, alertStats, datasourceCache)
eval.NewScheduler(alertc, externalProcessors, alertRuleCache, targetCache, busiGroupCache, alertMuteCache, datasourceCache, promClients, tdendgineClients, naming, ctx, alertStats)
eval.NewScheduler(alertc, externalProcessors, alertRuleCache, targetCache, targetsOfAlertRulesCache,
busiGroupCache, alertMuteCache, datasourceCache, promClients, naming, ctx, alertStats)
dp := dispatch.NewDispatch(alertRuleCache, userCache, userGroupCache, alertSubscribeCache, targetCache, notifyConfigCache, alertc.Alerting, ctx, alertStats)
consumer := dispatch.NewConsumer(alertc.Alerting, ctx, dp)
eventProcessorCache := memsto.NewEventProcessorCache(ctx, syncStats)
dp := dispatch.NewDispatch(alertRuleCache, userCache, userGroupCache, alertSubscribeCache, targetCache, notifyConfigCache, taskTplsCache, notifyRuleCache, notifyChannelCache, messageTemplateCache, eventProcessorCache, configCvalCache, alertc.Alerting, ctx, alertStats)
consumer := dispatch.NewConsumer(alertc.Alerting, ctx, dp, promClients, alertMuteCache)
notifyRecordConsumer := sender.NewNotifyRecordConsumer(ctx)
go dp.ReloadTpls()
go consumer.LoopConsume()
go notifyRecordConsumer.LoopConsume()
go queue.ReportQueueSize(alertStats)
go sender.InitEmailSender(notifyConfigCache.GetSMTP())
go sender.ReportNotifyRecordQueueSize(alertStats)
go sender.InitEmailSender(ctx, notifyConfigCache)
}

View File

@@ -16,9 +16,17 @@ type Stats struct {
GaugeAlertQueueSize prometheus.Gauge
CounterRuleEval *prometheus.CounterVec
CounterQueryDataErrorTotal *prometheus.CounterVec
CounterQueryDataTotal *prometheus.CounterVec
CounterVarFillingQuery *prometheus.CounterVec
CounterRecordEval *prometheus.CounterVec
CounterRecordEvalErrorTotal *prometheus.CounterVec
CounterMuteTotal *prometheus.CounterVec
CounterRuleEvalErrorTotal *prometheus.CounterVec
CounterHeartbeatErrorTotal *prometheus.CounterVec
CounterSubEventTotal *prometheus.CounterVec
GaugeQuerySeriesCount *prometheus.GaugeVec
GaugeRuleEvalDuration *prometheus.GaugeVec
GaugeNotifyRecordQueueSize prometheus.Gauge
}
func NewSyncStats() *Stats {
@@ -29,19 +37,40 @@ func NewSyncStats() *Stats {
Help: "Number of rule eval.",
}, []string{})
CounterRuleEvalErrorTotal := prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "rule_eval_error_total",
Help: "Number of rule eval error.",
}, []string{"datasource", "stage", "busi_group", "rule_id"})
CounterQueryDataErrorTotal := prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "query_data_error_total",
Help: "Number of rule eval query data error.",
}, []string{"datasource"})
CounterQueryDataTotal := prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "query_data_total",
Help: "Number of rule eval query data.",
}, []string{"datasource", "rule_id"})
CounterRecordEval := prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "record_eval_total",
Help: "Number of record eval.",
}, []string{})
}, []string{"datasource"})
CounterRecordEvalErrorTotal := prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "record_eval_error_total",
Help: "Number of record eval error.",
}, []string{})
}, []string{"datasource"})
AlertNotifyTotal := prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
@@ -73,30 +102,73 @@ func NewSyncStats() *Stats {
Help: "The size of alert queue.",
})
CounterQueryDataErrorTotal := prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "query_data_error_total",
Help: "Number of query data error.",
}, []string{"datasource"})
CounterMuteTotal := prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "mute_total",
Help: "Number of mute.",
}, []string{"group", "rule_id", "mute_rule_id", "datasource_id"})
CounterSubEventTotal := prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "sub_event_total",
Help: "Number of sub event.",
}, []string{"group"})
CounterHeartbeatErrorTotal := prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "heartbeat_error_count",
Help: "Number of heartbeat error.",
}, []string{})
GaugeQuerySeriesCount := prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "eval_query_series_count",
Help: "Number of series retrieved from data source after query.",
}, []string{"rule_id", "datasource_id", "ref"})
// 通知记录队列的长度
GaugeNotifyRecordQueueSize := prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "notify_record_queue_size",
Help: "The size of notify record queue.",
})
GaugeRuleEvalDuration := prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "rule_eval_duration_ms",
Help: "Duration of rule eval in milliseconds.",
}, []string{"rule_id", "datasource_id"})
CounterVarFillingQuery := prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "var_filling_query_total",
Help: "Number of var filling query.",
}, []string{"rule_id", "datasource_id", "ref", "typ"})
prometheus.MustRegister(
CounterAlertsTotal,
GaugeAlertQueueSize,
AlertNotifyTotal,
AlertNotifyErrorTotal,
CounterRuleEval,
CounterQueryDataTotal,
CounterQueryDataErrorTotal,
CounterRecordEval,
CounterRecordEvalErrorTotal,
CounterMuteTotal,
CounterRuleEvalErrorTotal,
CounterHeartbeatErrorTotal,
CounterSubEventTotal,
GaugeQuerySeriesCount,
GaugeRuleEvalDuration,
GaugeNotifyRecordQueueSize,
CounterVarFillingQuery,
)
return &Stats{
@@ -105,9 +177,17 @@ func NewSyncStats() *Stats {
AlertNotifyTotal: AlertNotifyTotal,
AlertNotifyErrorTotal: AlertNotifyErrorTotal,
CounterRuleEval: CounterRuleEval,
CounterQueryDataTotal: CounterQueryDataTotal,
CounterQueryDataErrorTotal: CounterQueryDataErrorTotal,
CounterRecordEval: CounterRecordEval,
CounterRecordEvalErrorTotal: CounterRecordEvalErrorTotal,
CounterMuteTotal: CounterMuteTotal,
CounterRuleEvalErrorTotal: CounterRuleEvalErrorTotal,
CounterHeartbeatErrorTotal: CounterHeartbeatErrorTotal,
CounterSubEventTotal: CounterSubEventTotal,
GaugeQuerySeriesCount: GaugeQuerySeriesCount,
GaugeRuleEvalDuration: GaugeRuleEvalDuration,
GaugeNotifyRecordQueueSize: GaugeNotifyRecordQueueSize,
CounterVarFillingQuery: CounterVarFillingQuery,
}
}

View File

@@ -1,7 +1,9 @@
package common
import (
"encoding/json"
"fmt"
"strings"
"github.com/ccfos/nightingale/v6/models"
)
@@ -12,6 +14,20 @@ func RuleKey(datasourceId, id int64) string {
func MatchTags(eventTagsMap map[string]string, itags []models.TagFilter) bool {
for _, filter := range itags {
// target_group in和not in优先特殊处理匹配通过则继续下一个 filter匹配失败则整组不匹配
if filter.Key == "target_group" {
// target 字段从 event.JsonTagsAndValue() 中获取的
v, ok := eventTagsMap["target"]
if !ok {
return false
}
if !targetGroupMatch(v, filter) {
return false
}
continue
}
// 普通标签按原逻辑处理
value, has := eventTagsMap[filter.Key]
if !has {
return false
@@ -34,9 +50,9 @@ func MatchGroupsName(groupName string, groupFilter []models.TagFilter) bool {
func matchTag(value string, filter models.TagFilter) bool {
switch filter.Func {
case "==":
return filter.Value == value
return strings.TrimSpace(fmt.Sprintf("%v", filter.Value)) == strings.TrimSpace(value)
case "!=":
return filter.Value != value
return strings.TrimSpace(fmt.Sprintf("%v", filter.Value)) != strings.TrimSpace(value)
case "in":
_, has := filter.Vset[value]
return has
@@ -48,6 +64,65 @@ func matchTag(value string, filter models.TagFilter) bool {
case "!~":
return !filter.Regexp.MatchString(value)
}
// unexpect func
// unexpected func
return false
}
// targetGroupMatch 处理 target_group 的特殊匹配逻辑
func targetGroupMatch(value string, filter models.TagFilter) bool {
var valueMap map[string]interface{}
if err := json.Unmarshal([]byte(value), &valueMap); err != nil {
return false
}
switch filter.Func {
case "in", "not in":
// float64 类型的 id 切片
filterValueIds, ok := filter.Value.([]interface{})
if !ok {
return false
}
filterValueIdsMap := make(map[float64]struct{})
for _, id := range filterValueIds {
filterValueIdsMap[id.(float64)] = struct{}{}
}
// float64 类型的 groupIds 切片
groupIds, ok := valueMap["group_ids"].([]interface{})
if !ok {
return false
}
// in 只要 groupIds 中有一个在 filterGroupIds 中出现,就返回 true
// not in 则相反
found := false
for _, gid := range groupIds {
if _, found = filterValueIdsMap[gid.(float64)]; found {
break
}
}
if filter.Func == "in" {
return found
}
// filter.Func == "not in"
return !found
case "=~", "!~":
// 正则满足一个就认为 matched
groupNames, ok := valueMap["group_names"].([]interface{})
if !ok {
return false
}
matched := false
for _, gname := range groupNames {
if filter.Regexp.MatchString(fmt.Sprintf("%v", gname)) {
matched = true
break
}
}
if filter.Func == "=~" {
return matched
}
// "!~": 只要有一个匹配就返回 false否则返回 true
return !matched
default:
return false
}
}

View File

@@ -1,15 +1,24 @@
package dispatch
import (
"context"
"encoding/json"
"fmt"
"strings"
"time"
"github.com/ccfos/nightingale/v6/alert/aconf"
"github.com/ccfos/nightingale/v6/alert/common"
"github.com/ccfos/nightingale/v6/alert/queue"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/poster"
promsdk "github.com/ccfos/nightingale/v6/pkg/prom"
"github.com/ccfos/nightingale/v6/pkg/tplx"
"github.com/ccfos/nightingale/v6/prom"
"github.com/prometheus/common/model"
"github.com/toolkits/pkg/concurrent/semaphore"
"github.com/toolkits/pkg/logger"
)
@@ -18,15 +27,36 @@ type Consumer struct {
alerting aconf.Alerting
ctx *ctx.Context
dispatch *Dispatch
dispatch *Dispatch
promClients *prom.PromClientMap
alertMuteCache *memsto.AlertMuteCacheType
}
type EventMuteHookFunc func(event *models.AlertCurEvent) bool
var EventMuteHook EventMuteHookFunc = func(event *models.AlertCurEvent) bool { return false }
func InitRegisterQueryFunc(promClients *prom.PromClientMap) {
tplx.RegisterQueryFunc(func(datasourceID int64, promql string) model.Value {
if promClients.IsNil(datasourceID) {
return nil
}
readerClient := promClients.GetCli(datasourceID)
value, _, _ := readerClient.Query(context.Background(), promql, time.Now())
return value
})
}
// 创建一个 Consumer 实例
func NewConsumer(alerting aconf.Alerting, ctx *ctx.Context, dispatch *Dispatch) *Consumer {
func NewConsumer(alerting aconf.Alerting, ctx *ctx.Context, dispatch *Dispatch, promClients *prom.PromClientMap, alertMuteCache *memsto.AlertMuteCacheType) *Consumer {
return &Consumer{
alerting: alerting,
ctx: ctx,
dispatch: dispatch,
alerting: alerting,
ctx: ctx,
dispatch: dispatch,
promClients: promClients,
alertMuteCache: alertMuteCache,
}
}
@@ -66,26 +96,28 @@ func (e *Consumer) consumeOne(event *models.AlertCurEvent) {
eventType = "recovery"
}
e.dispatch.astats.CounterAlertsTotal.WithLabelValues(event.Cluster, eventType, event.GroupName).Inc()
e.dispatch.Astats.CounterAlertsTotal.WithLabelValues(event.Cluster, eventType, event.GroupName).Inc()
if err := event.ParseRule("rule_name"); err != nil {
logger.Warningf("ruleid:%d failed to parse rule name: %v", event.RuleId, err)
event.RuleName = fmt.Sprintf("failed to parse rule name: %v", err)
}
if err := event.ParseRule("annotations"); err != nil {
logger.Warningf("ruleid:%d failed to parse annotations: %v", event.RuleId, err)
event.Annotations = fmt.Sprintf("failed to parse annotations: %v", err)
event.AnnotationsJSON["error"] = event.Annotations
}
e.queryRecoveryVal(event)
if err := event.ParseRule("rule_note"); err != nil {
logger.Warningf("ruleid:%d failed to parse rule note: %v", event.RuleId, err)
event.RuleNote = fmt.Sprintf("failed to parse rule note: %v", err)
}
if err := event.ParseRule("annotations"); err != nil {
event.Annotations = fmt.Sprintf("failed to parse rule note: %v", err)
}
e.persist(event)
if event.IsRecovered && event.NotifyRecovered == 0 {
return
}
e.dispatch.HandleEventNotify(event, false)
}
@@ -100,6 +132,7 @@ func (e *Consumer) persist(event *models.AlertCurEvent) {
event.Id, err = poster.PostByUrlsWithResp[int64](e.ctx, "/v1/n9e/event-persist", event)
if err != nil {
logger.Errorf("event:%+v persist err:%v", event, err)
e.dispatch.Astats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", event.DatasourceId), "persist_event", event.GroupName, fmt.Sprintf("%v", event.RuleId)).Inc()
}
return
}
@@ -107,5 +140,71 @@ func (e *Consumer) persist(event *models.AlertCurEvent) {
err := models.EventPersist(e.ctx, event)
if err != nil {
logger.Errorf("event%+v persist err:%v", event, err)
e.dispatch.Astats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", event.DatasourceId), "persist_event", event.GroupName, fmt.Sprintf("%v", event.RuleId)).Inc()
}
}
func (e *Consumer) queryRecoveryVal(event *models.AlertCurEvent) {
if !event.IsRecovered {
return
}
// If the event is a recovery event, execute the recovery_promql query
promql, ok := event.AnnotationsJSON["recovery_promql"]
if !ok {
return
}
promql = strings.TrimSpace(promql)
if promql == "" {
logger.Warningf("rule_eval:%s promql is blank", getKey(event))
return
}
if e.promClients.IsNil(event.DatasourceId) {
logger.Warningf("rule_eval:%s error reader client is nil", getKey(event))
return
}
readerClient := e.promClients.GetCli(event.DatasourceId)
var warnings promsdk.Warnings
value, warnings, err := readerClient.Query(e.ctx.Ctx, promql, time.Now())
if err != nil {
logger.Errorf("rule_eval:%s promql:%s, error:%v", getKey(event), promql, err)
event.AnnotationsJSON["recovery_promql_error"] = fmt.Sprintf("promql:%s error:%v", promql, err)
b, err := json.Marshal(event.AnnotationsJSON)
if err != nil {
event.AnnotationsJSON = make(map[string]string)
event.AnnotationsJSON["error"] = fmt.Sprintf("failed to parse annotations: %v", err)
} else {
event.Annotations = string(b)
}
return
}
if len(warnings) > 0 {
logger.Errorf("rule_eval:%s promql:%s, warnings:%v", getKey(event), promql, warnings)
}
anomalyPoints := models.ConvertAnomalyPoints(value)
if len(anomalyPoints) == 0 {
logger.Warningf("rule_eval:%s promql:%s, result is empty", getKey(event), promql)
event.AnnotationsJSON["recovery_promql_error"] = fmt.Sprintf("promql:%s error:%s", promql, "result is empty")
} else {
event.AnnotationsJSON["recovery_value"] = fmt.Sprintf("%v", anomalyPoints[0].Value)
}
b, err := json.Marshal(event.AnnotationsJSON)
if err != nil {
event.AnnotationsJSON = make(map[string]string)
event.AnnotationsJSON["error"] = fmt.Sprintf("failed to parse annotations: %v", err)
} else {
event.Annotations = string(b)
}
}
func getKey(event *models.AlertCurEvent) string {
return common.RuleKey(event.DatasourceId, event.RuleId)
}

View File

@@ -3,14 +3,20 @@ package dispatch
import (
"bytes"
"encoding/json"
"errors"
"fmt"
"html/template"
"net/url"
"strconv"
"strings"
"sync"
"time"
"github.com/ccfos/nightingale/v6/alert/aconf"
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/alert/common"
"github.com/ccfos/nightingale/v6/alert/pipeline"
"github.com/ccfos/nightingale/v6/alert/pipeline/engine"
"github.com/ccfos/nightingale/v6/alert/sender"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
@@ -19,6 +25,17 @@ import (
"github.com/toolkits/pkg/logger"
)
var ShouldSkipNotify func(*ctx.Context, *models.AlertCurEvent, int64) bool
var SendByNotifyRule func(*ctx.Context, *memsto.UserCacheType, *memsto.UserGroupCacheType, *memsto.NotifyChannelCacheType, *memsto.CvalCache,
[]*models.AlertCurEvent, int64, *models.NotifyConfig, *models.NotifyChannelConfig, *models.MessageTemplate)
var EventProcessorCache *memsto.EventProcessorCacheType
func init() {
ShouldSkipNotify = shouldSkipNotify
SendByNotifyRule = SendNotifyRuleMessage
}
type Dispatch struct {
alertRuleCache *memsto.AlertRuleCacheType
userCache *memsto.UserCacheType
@@ -26,16 +43,23 @@ type Dispatch struct {
alertSubscribeCache *memsto.AlertSubscribeCacheType
targetCache *memsto.TargetCacheType
notifyConfigCache *memsto.NotifyConfigCacheType
taskTplsCache *memsto.TaskTplCache
configCvalCache *memsto.CvalCache
notifyRuleCache *memsto.NotifyRuleCacheType
notifyChannelCache *memsto.NotifyChannelCacheType
messageTemplateCache *memsto.MessageTemplateCacheType
eventProcessorCache *memsto.EventProcessorCacheType
alerting aconf.Alerting
Senders map[string]sender.Sender
CallBacks map[string]sender.CallBacker
tpls map[string]*template.Template
ExtraSenders map[string]sender.Sender
BeforeSenderHook func(*models.AlertCurEvent) bool
ctx *ctx.Context
astats *astats.Stats
ctx *ctx.Context
Astats *astats.Stats
RwLock sync.RWMutex
}
@@ -43,14 +67,21 @@ type Dispatch struct {
// 创建一个 Notify 实例
func NewDispatch(alertRuleCache *memsto.AlertRuleCacheType, userCache *memsto.UserCacheType, userGroupCache *memsto.UserGroupCacheType,
alertSubscribeCache *memsto.AlertSubscribeCacheType, targetCache *memsto.TargetCacheType, notifyConfigCache *memsto.NotifyConfigCacheType,
alerting aconf.Alerting, ctx *ctx.Context, astats *astats.Stats) *Dispatch {
taskTplsCache *memsto.TaskTplCache, notifyRuleCache *memsto.NotifyRuleCacheType, notifyChannelCache *memsto.NotifyChannelCacheType,
messageTemplateCache *memsto.MessageTemplateCacheType, eventProcessorCache *memsto.EventProcessorCacheType, configCvalCache *memsto.CvalCache, alerting aconf.Alerting, c *ctx.Context, astats *astats.Stats) *Dispatch {
notify := &Dispatch{
alertRuleCache: alertRuleCache,
userCache: userCache,
userGroupCache: userGroupCache,
alertSubscribeCache: alertSubscribeCache,
targetCache: targetCache,
notifyConfigCache: notifyConfigCache,
alertRuleCache: alertRuleCache,
userCache: userCache,
userGroupCache: userGroupCache,
alertSubscribeCache: alertSubscribeCache,
targetCache: targetCache,
notifyConfigCache: notifyConfigCache,
taskTplsCache: taskTplsCache,
notifyRuleCache: notifyRuleCache,
notifyChannelCache: notifyChannelCache,
messageTemplateCache: messageTemplateCache,
eventProcessorCache: eventProcessorCache,
configCvalCache: configCvalCache,
alerting: alerting,
@@ -59,14 +90,21 @@ func NewDispatch(alertRuleCache *memsto.AlertRuleCacheType, userCache *memsto.Us
ExtraSenders: make(map[string]sender.Sender),
BeforeSenderHook: func(*models.AlertCurEvent) bool { return true },
ctx: ctx,
astats: astats,
ctx: c,
Astats: astats,
}
pipeline.Init()
EventProcessorCache = eventProcessorCache
// 设置通知记录回调函数
notifyChannelCache.SetNotifyRecordFunc(sender.NotifyRecord)
return notify
}
func (e *Dispatch) ReloadTpls() error {
err := e.relaodTpls()
err := e.reloadTpls()
if err != nil {
logger.Errorf("failed to reload tpls: %v", err)
}
@@ -74,13 +112,13 @@ func (e *Dispatch) ReloadTpls() error {
duration := time.Duration(9000) * time.Millisecond
for {
time.Sleep(duration)
if err := e.relaodTpls(); err != nil {
if err := e.reloadTpls(); err != nil {
logger.Warning("failed to reload tpls:", err)
}
}
}
func (e *Dispatch) relaodTpls() error {
func (e *Dispatch) reloadTpls() error {
tmpTpls, err := models.ListTpls(e.ctx)
if err != nil {
return err
@@ -95,6 +133,21 @@ func (e *Dispatch) relaodTpls() error {
models.Mm: sender.NewSender(models.Mm, tmpTpls),
models.Telegram: sender.NewSender(models.Telegram, tmpTpls),
models.FeishuCard: sender.NewSender(models.FeishuCard, tmpTpls),
models.Lark: sender.NewSender(models.Lark, tmpTpls),
models.LarkCard: sender.NewSender(models.LarkCard, tmpTpls),
}
// domain -> Callback()
callbacks := map[string]sender.CallBacker{
models.DingtalkDomain: sender.NewCallBacker(models.DingtalkDomain, e.targetCache, e.userCache, e.taskTplsCache, tmpTpls),
models.WecomDomain: sender.NewCallBacker(models.WecomDomain, e.targetCache, e.userCache, e.taskTplsCache, tmpTpls),
models.FeishuDomain: sender.NewCallBacker(models.FeishuDomain, e.targetCache, e.userCache, e.taskTplsCache, tmpTpls),
models.TelegramDomain: sender.NewCallBacker(models.TelegramDomain, e.targetCache, e.userCache, e.taskTplsCache, tmpTpls),
models.FeishuCardDomain: sender.NewCallBacker(models.FeishuCardDomain, e.targetCache, e.userCache, e.taskTplsCache, tmpTpls),
models.IbexDomain: sender.NewCallBacker(models.IbexDomain, e.targetCache, e.userCache, e.taskTplsCache, tmpTpls),
models.LarkDomain: sender.NewCallBacker(models.LarkDomain, e.targetCache, e.userCache, e.taskTplsCache, tmpTpls),
models.DefaultDomain: sender.NewCallBacker(models.DefaultDomain, e.targetCache, e.userCache, e.taskTplsCache, tmpTpls),
models.LarkCardDomain: sender.NewCallBacker(models.LarkCardDomain, e.targetCache, e.userCache, e.taskTplsCache, tmpTpls),
}
e.RwLock.RLock()
@@ -106,18 +159,460 @@ func (e *Dispatch) relaodTpls() error {
e.RwLock.Lock()
e.tpls = tmpTpls
e.Senders = senders
e.CallBacks = callbacks
e.RwLock.Unlock()
return nil
}
func (e *Dispatch) HandleEventWithNotifyRule(eventOrigin *models.AlertCurEvent) {
if len(eventOrigin.NotifyRuleIds) > 0 {
for _, notifyRuleId := range eventOrigin.NotifyRuleIds {
// 深拷贝新的 event避免并发修改 event 冲突
eventCopy := eventOrigin.DeepCopy()
logger.Infof("notify rule ids: %v, event: %+v", notifyRuleId, eventCopy)
notifyRule := e.notifyRuleCache.Get(notifyRuleId)
if notifyRule == nil {
continue
}
if !notifyRule.Enable {
continue
}
eventCopy.NotifyRuleId = notifyRuleId
eventCopy.NotifyRuleName = notifyRule.Name
eventCopy = HandleEventPipeline(notifyRule.PipelineConfigs, eventOrigin, eventCopy, e.eventProcessorCache, e.ctx, notifyRuleId, "notify_rule")
if ShouldSkipNotify(e.ctx, eventCopy, notifyRuleId) {
logger.Infof("notify_id: %d, event:%+v, should skip notify", notifyRuleId, eventCopy)
continue
}
// notify
for i := range notifyRule.NotifyConfigs {
err := NotifyRuleMatchCheck(&notifyRule.NotifyConfigs[i], eventCopy)
if err != nil {
logger.Errorf("notify_id: %d, event:%+v, channel_id:%d, template_id: %d, notify_config:%+v, err:%v", notifyRuleId, eventCopy, notifyRule.NotifyConfigs[i].ChannelID, notifyRule.NotifyConfigs[i].TemplateID, notifyRule.NotifyConfigs[i], err)
continue
}
notifyChannel := e.notifyChannelCache.Get(notifyRule.NotifyConfigs[i].ChannelID)
messageTemplate := e.messageTemplateCache.Get(notifyRule.NotifyConfigs[i].TemplateID)
if notifyChannel == nil {
sender.NotifyRecord(e.ctx, []*models.AlertCurEvent{eventCopy}, notifyRuleId, fmt.Sprintf("notify_channel_id:%d", notifyRule.NotifyConfigs[i].ChannelID), "", "", errors.New("notify_channel not found"))
logger.Warningf("notify_id: %d, event:%+v, channel_id:%d, template_id: %d, notify_channel not found", notifyRuleId, eventCopy, notifyRule.NotifyConfigs[i].ChannelID, notifyRule.NotifyConfigs[i].TemplateID)
continue
}
if notifyChannel.RequestType != "flashduty" && notifyChannel.RequestType != "pagerduty" && messageTemplate == nil {
logger.Warningf("notify_id: %d, channel_name: %v, event:%+v, template_id: %d, message_template not found", notifyRuleId, notifyChannel.Ident, eventCopy, notifyRule.NotifyConfigs[i].TemplateID)
sender.NotifyRecord(e.ctx, []*models.AlertCurEvent{eventCopy}, notifyRuleId, notifyChannel.Name, "", "", errors.New("message_template not found"))
continue
}
go SendByNotifyRule(e.ctx, e.userCache, e.userGroupCache, e.notifyChannelCache, e.configCvalCache, []*models.AlertCurEvent{eventCopy}, notifyRuleId, &notifyRule.NotifyConfigs[i], notifyChannel, messageTemplate)
}
}
}
}
func shouldSkipNotify(ctx *ctx.Context, event *models.AlertCurEvent, notifyRuleId int64) bool {
if event == nil {
// 如果 eventCopy 为 nil说明 eventCopy 被 processor drop 掉了, 不再发送通知
return true
}
if event.IsRecovered && event.NotifyRecovered == 0 {
// 如果 eventCopy 是恢复事件,且 NotifyRecovered 为 0则不发送通知
return true
}
return false
}
func HandleEventPipeline(pipelineConfigs []models.PipelineConfig, eventOrigin, event *models.AlertCurEvent, eventProcessorCache *memsto.EventProcessorCacheType, ctx *ctx.Context, id int64, from string) *models.AlertCurEvent {
workflowEngine := engine.NewWorkflowEngine(ctx)
for _, pipelineConfig := range pipelineConfigs {
if !pipelineConfig.Enable {
continue
}
eventPipeline := eventProcessorCache.Get(pipelineConfig.PipelineId)
if eventPipeline == nil {
logger.Warningf("processor_by_%s_id:%d pipeline_id:%d, event pipeline not found, event: %+v", from, id, pipelineConfig.PipelineId, event)
continue
}
if !PipelineApplicable(eventPipeline, event) {
logger.Debugf("processor_by_%s_id:%d pipeline_id:%d, event pipeline not applicable, event: %+v", from, id, pipelineConfig.PipelineId, event)
continue
}
// 统一使用工作流引擎执行(兼容线性模式和工作流模式)
triggerCtx := &models.WorkflowTriggerContext{
Mode: models.TriggerModeEvent,
TriggerBy: from,
}
resultEvent, result, err := workflowEngine.Execute(eventPipeline, event, triggerCtx)
if err != nil {
logger.Errorf("processor_by_%s_id:%d pipeline_id:%d, pipeline execute error: %v", from, id, pipelineConfig.PipelineId, err)
continue
}
if resultEvent == nil {
logger.Infof("processor_by_%s_id:%d pipeline_id:%d, event dropped, event: %+v", from, id, pipelineConfig.PipelineId, eventOrigin)
if from == "notify_rule" {
sender.NotifyRecord(ctx, []*models.AlertCurEvent{eventOrigin}, id, "", "", result.Message, fmt.Errorf("processor_by_%s_id:%d pipeline_id:%d, drop by pipeline", from, id, pipelineConfig.PipelineId))
}
return nil
}
event = resultEvent
logger.Infof("processor_by_%s_id:%d pipeline_id:%d, pipeline executed, status:%s, message:%s", from, id, pipelineConfig.PipelineId, result.Status, result.Message)
}
event.FE2DB()
event.FillTagsMap()
return event
}
func PipelineApplicable(pipeline *models.EventPipeline, event *models.AlertCurEvent) bool {
if pipeline == nil {
return true
}
if !pipeline.FilterEnable {
return true
}
tagMatch := true
if len(pipeline.LabelFilters) > 0 {
// Deep copy to avoid concurrent map writes on cached objects
labelFiltersCopy := make([]models.TagFilter, len(pipeline.LabelFilters))
copy(labelFiltersCopy, pipeline.LabelFilters)
for i := range labelFiltersCopy {
if labelFiltersCopy[i].Func == "" {
labelFiltersCopy[i].Func = labelFiltersCopy[i].Op
}
}
tagFilters, err := models.ParseTagFilter(labelFiltersCopy)
if err != nil {
logger.Errorf("pipeline applicable failed to parse tag filter: %v event:%+v pipeline:%+v", err, event, pipeline)
return false
}
tagMatch = common.MatchTags(event.TagsMap, tagFilters)
}
attributesMatch := true
if len(pipeline.AttrFilters) > 0 {
// Deep copy to avoid concurrent map writes on cached objects
attrFiltersCopy := make([]models.TagFilter, len(pipeline.AttrFilters))
copy(attrFiltersCopy, pipeline.AttrFilters)
tagFilters, err := models.ParseTagFilter(attrFiltersCopy)
if err != nil {
logger.Errorf("pipeline applicable failed to parse tag filter: %v event:%+v pipeline:%+v err:%v", tagFilters, event, pipeline, err)
return false
}
attributesMatch = common.MatchTags(event.JsonTagsAndValue(), tagFilters)
}
return tagMatch && attributesMatch
}
func NotifyRuleMatchCheck(notifyConfig *models.NotifyConfig, event *models.AlertCurEvent) error {
tm := time.Unix(event.TriggerTime, 0)
triggerTime := tm.Format("15:04")
triggerWeek := int(tm.Weekday())
timeMatch := false
if len(notifyConfig.TimeRanges) == 0 {
timeMatch = true
}
for j := range notifyConfig.TimeRanges {
if timeMatch {
break
}
enableStime := notifyConfig.TimeRanges[j].Start
enableEtime := notifyConfig.TimeRanges[j].End
enableDaysOfWeek := notifyConfig.TimeRanges[j].Week
length := len(enableDaysOfWeek)
// enableStime,enableEtime,enableDaysOfWeek三者长度肯定相同这里循环一个即可
for i := 0; i < length; i++ {
if enableDaysOfWeek[i] != triggerWeek {
continue
}
if enableStime < enableEtime {
if enableEtime == "23:59" {
// 02:00-23:59这种情况做个特殊处理相当于左闭右闭区间了
if triggerTime < enableStime {
// mute, 即没生效
continue
}
} else {
// 02:00-04:00 或者 02:00-24:00
if triggerTime < enableStime || triggerTime >= enableEtime {
// mute, 即没生效
continue
}
}
} else if enableStime > enableEtime {
// 21:00-09:00
if triggerTime < enableStime && triggerTime >= enableEtime {
// mute, 即没生效
continue
}
}
// 到这里说明当前时刻在告警规则的某组生效时间范围内,即没有 mute直接返回 false
timeMatch = true
break
}
}
if !timeMatch {
return fmt.Errorf("event time not match time filter")
}
severityMatch := false
for i := range notifyConfig.Severities {
if notifyConfig.Severities[i] == event.Severity {
severityMatch = true
}
}
if !severityMatch {
return fmt.Errorf("event severity not match severity filter")
}
tagMatch := true
if len(notifyConfig.LabelKeys) > 0 {
// Deep copy to avoid concurrent map writes on cached objects
labelKeysCopy := make([]models.TagFilter, len(notifyConfig.LabelKeys))
copy(labelKeysCopy, notifyConfig.LabelKeys)
for i := range labelKeysCopy {
if labelKeysCopy[i].Func == "" {
labelKeysCopy[i].Func = labelKeysCopy[i].Op
}
}
tagFilters, err := models.ParseTagFilter(labelKeysCopy)
if err != nil {
logger.Errorf("notify send failed to parse tag filter: %v event:%+v notify_config:%+v", err, event, notifyConfig)
return fmt.Errorf("failed to parse tag filter: %v", err)
}
tagMatch = common.MatchTags(event.TagsMap, tagFilters)
}
if !tagMatch {
return fmt.Errorf("event tag not match tag filter")
}
attributesMatch := true
if len(notifyConfig.Attributes) > 0 {
// Deep copy to avoid concurrent map writes on cached objects
attributesCopy := make([]models.TagFilter, len(notifyConfig.Attributes))
copy(attributesCopy, notifyConfig.Attributes)
tagFilters, err := models.ParseTagFilter(attributesCopy)
if err != nil {
logger.Errorf("notify send failed to parse tag filter: %v event:%+v notify_config:%+v err:%v", tagFilters, event, notifyConfig, err)
return fmt.Errorf("failed to parse tag filter: %v", err)
}
attributesMatch = common.MatchTags(event.JsonTagsAndValue(), tagFilters)
}
if !attributesMatch {
return fmt.Errorf("event attributes not match attributes filter")
}
logger.Infof("notify send timeMatch:%v severityMatch:%v tagMatch:%v attributesMatch:%v event:%+v notify_config:%+v", timeMatch, severityMatch, tagMatch, attributesMatch, event, notifyConfig)
return nil
}
func GetNotifyConfigParams(notifyConfig *models.NotifyConfig, contactKey string, userCache *memsto.UserCacheType, userGroupCache *memsto.UserGroupCacheType) ([]string, []int64, []string, map[string]string) {
customParams := make(map[string]string)
var flashDutyChannelIDs []int64
var pagerDutyRoutingKeys []string
var userInfoParams models.CustomParams
for key, value := range notifyConfig.Params {
switch key {
case "user_ids", "user_group_ids", "ids":
if data, err := json.Marshal(value); err == nil {
var ids []int64
if json.Unmarshal(data, &ids) == nil {
if key == "user_ids" {
userInfoParams.UserIDs = ids
} else if key == "user_group_ids" {
userInfoParams.UserGroupIDs = ids
} else if key == "ids" {
flashDutyChannelIDs = ids
}
}
}
case "pagerduty_integration_keys", "pagerduty_integration_ids":
if key == "pagerduty_integration_ids" {
// 不处理ids直接跳过这个字段只给前端标记用
continue
}
if data, err := json.Marshal(value); err == nil {
var keys []string
if json.Unmarshal(data, &keys) == nil {
pagerDutyRoutingKeys = keys
break
}
}
default:
// 避免直接 value.(string) 导致 panic支持多种类型并统一为字符串
customParams[key] = value.(string)
}
}
if len(userInfoParams.UserIDs) == 0 && len(userInfoParams.UserGroupIDs) == 0 {
return []string{}, flashDutyChannelIDs, pagerDutyRoutingKeys, customParams
}
userIds := make([]int64, 0)
userIds = append(userIds, userInfoParams.UserIDs...)
if len(userInfoParams.UserGroupIDs) > 0 {
userGroups := userGroupCache.GetByUserGroupIds(userInfoParams.UserGroupIDs)
for _, userGroup := range userGroups {
userIds = append(userIds, userGroup.UserIds...)
}
}
users := userCache.GetByUserIds(userIds)
visited := make(map[int64]bool)
sendtos := make([]string, 0)
for _, user := range users {
if visited[user.Id] {
continue
}
var sendto string
if contactKey == "phone" {
sendto = user.Phone
} else if contactKey == "email" {
sendto = user.Email
} else {
sendto, _ = user.ExtractToken(contactKey)
}
if sendto == "" {
continue
}
sendtos = append(sendtos, sendto)
visited[user.Id] = true
}
return sendtos, flashDutyChannelIDs, pagerDutyRoutingKeys, customParams
}
func SendNotifyRuleMessage(ctx *ctx.Context, userCache *memsto.UserCacheType, userGroupCache *memsto.UserGroupCacheType, notifyChannelCache *memsto.NotifyChannelCacheType, configCvalCache *memsto.CvalCache,
events []*models.AlertCurEvent, notifyRuleId int64, notifyConfig *models.NotifyConfig, notifyChannel *models.NotifyChannelConfig, messageTemplate *models.MessageTemplate) {
if len(events) == 0 {
logger.Errorf("notify_id: %d events is empty", notifyRuleId)
return
}
siteInfo := configCvalCache.GetSiteInfo()
tplContent := make(map[string]interface{})
if notifyChannel.RequestType != "flashduty" {
tplContent = messageTemplate.RenderEvent(events, siteInfo.SiteUrl)
}
var contactKey string
if notifyChannel.ParamConfig != nil && notifyChannel.ParamConfig.UserInfo != nil {
contactKey = notifyChannel.ParamConfig.UserInfo.ContactKey
}
sendtos, flashDutyChannelIDs, pagerdutyRoutingKeys, customParams := GetNotifyConfigParams(notifyConfig, contactKey, userCache, userGroupCache)
switch notifyChannel.RequestType {
case "flashduty":
if len(flashDutyChannelIDs) == 0 {
flashDutyChannelIDs = []int64{0} // 如果 flashduty 通道没有配置,则使用 0, 给 SendFlashDuty 判断使用, 不给 flashduty 传 channel_id 参数
}
for i := range flashDutyChannelIDs {
start := time.Now()
respBody, err := notifyChannel.SendFlashDuty(events, flashDutyChannelIDs[i], notifyChannelCache.GetHttpClient(notifyChannel.ID))
respBody = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), respBody)
logger.Infof("duty_sender notify_id: %d, channel_name: %v, event:%+v, IntegrationUrl: %v dutychannel_id: %v, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0], notifyChannel.RequestConfig.FlashDutyRequestConfig.IntegrationUrl, flashDutyChannelIDs[i], respBody, err)
sender.NotifyRecord(ctx, events, notifyRuleId, notifyChannel.Name, strconv.FormatInt(flashDutyChannelIDs[i], 10), respBody, err)
}
case "pagerduty":
for _, routingKey := range pagerdutyRoutingKeys {
start := time.Now()
respBody, err := notifyChannel.SendPagerDuty(events, routingKey, siteInfo.SiteUrl, notifyChannelCache.GetHttpClient(notifyChannel.ID))
respBody = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), respBody)
logger.Infof("pagerduty_sender notify_id: %d, channel_name: %v, event:%+v, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0], respBody, err)
sender.NotifyRecord(ctx, events, notifyRuleId, notifyChannel.Name, "", respBody, err)
}
case "http":
// 使用队列模式处理 http 通知
// 创建通知任务
task := &memsto.NotifyTask{
Events: events,
NotifyRuleId: notifyRuleId,
NotifyChannel: notifyChannel,
TplContent: tplContent,
CustomParams: customParams,
Sendtos: sendtos,
}
// 将任务加入队列
success := notifyChannelCache.EnqueueNotifyTask(task)
if !success {
logger.Errorf("failed to enqueue notify task for channel %d, notify_id: %d", notifyChannel.ID, notifyRuleId)
// 如果入队失败,记录错误通知
sender.NotifyRecord(ctx, events, notifyRuleId, notifyChannel.Name, getSendTarget(customParams, sendtos), "", errors.New("failed to enqueue notify task, queue is full"))
}
case "smtp":
notifyChannel.SendEmail(notifyRuleId, events, tplContent, sendtos, notifyChannelCache.GetSmtpClient(notifyChannel.ID))
case "script":
start := time.Now()
target, res, err := notifyChannel.SendScript(events, tplContent, customParams, sendtos)
res = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), res)
logger.Infof("script_sender notify_id: %d, channel_name: %v, event:%+v, tplContent:%s, customParams:%v, target:%s, res:%s, err:%v", notifyRuleId, notifyChannel.Name, events[0], tplContent, customParams, target, res, err)
sender.NotifyRecord(ctx, events, notifyRuleId, notifyChannel.Name, target, res, err)
default:
logger.Warningf("notify_id: %d, channel_name: %v, event:%+v send type not found", notifyRuleId, notifyChannel.Name, events[0])
}
}
func NeedBatchContacts(requestConfig *models.HTTPRequestConfig) bool {
b, _ := json.Marshal(requestConfig)
return strings.Contains(string(b), "$sendtos")
}
// HandleEventNotify 处理event事件的主逻辑
// event: 告警/恢复事件
// isSubscribe: 告警事件是否由subscribe的配置产生
func (e *Dispatch) HandleEventNotify(event *models.AlertCurEvent, isSubscribe bool) {
go e.HandleEventWithNotifyRule(event)
if event.IsRecovered && event.NotifyRecovered == 0 {
return
}
rule := e.alertRuleCache.Get(event.RuleId)
if rule == nil {
return
}
fillUsers(event, e.userCache, e.userGroupCache)
var (
@@ -145,8 +640,7 @@ func (e *Dispatch) HandleEventNotify(event *models.AlertCurEvent, isSubscribe bo
notifyTarget.AndMerge(handler(rule, event, notifyTarget, e))
}
// 处理事件发送,这里用一个goroutine处理一个event的所有发送事件
go e.Send(rule, event, notifyTarget)
go e.Send(rule, event, notifyTarget, isSubscribe)
// 如果是不是订阅规则出现的event, 则需要处理订阅规则的event
if !isSubscribe {
@@ -185,6 +679,10 @@ func (e *Dispatch) handleSub(sub *models.AlertSubscribe, event models.AlertCurEv
return
}
if !sub.MatchCate(event.Cate) {
return
}
if !common.MatchTags(event.TagsMap, sub.ITags) {
return
}
@@ -209,18 +707,20 @@ func (e *Dispatch) handleSub(sub *models.AlertSubscribe, event models.AlertCurEv
}
}
e.Astats.CounterSubEventTotal.WithLabelValues(event.GroupName).Inc()
sub.ModifyEvent(&event)
LogEvent(&event, "subscribe")
event.SubRuleId = sub.Id
LogEvent(&event, "subscribe")
e.HandleEventNotify(&event, true)
}
func (e *Dispatch) Send(rule *models.AlertRule, event *models.AlertCurEvent, notifyTarget *NotifyTarget) {
func (e *Dispatch) Send(rule *models.AlertRule, event *models.AlertCurEvent, notifyTarget *NotifyTarget, isSubscribe bool) {
needSend := e.BeforeSenderHook(event)
if needSend {
for channel, uids := range notifyTarget.ToChannelUserMap() {
msgCtx := sender.BuildMessageContext(rule, []*models.AlertCurEvent{event}, uids, e.userCache, e.astats)
msgCtx := sender.BuildMessageContext(e.ctx, rule, []*models.AlertCurEvent{event},
uids, e.userCache, e.Astats)
e.RwLock.RLock()
s := e.Senders[channel]
e.RwLock.RUnlock()
@@ -228,18 +728,119 @@ func (e *Dispatch) Send(rule *models.AlertRule, event *models.AlertCurEvent, not
logger.Debugf("no sender for channel: %s", channel)
continue
}
var event *models.AlertCurEvent
if len(msgCtx.Events) > 0 {
event = msgCtx.Events[0]
}
logger.Debugf("send to channel:%s event:%+v users:%+v", channel, event, msgCtx.Users)
s.Send(msgCtx)
}
}
// handle event callbacks
sender.SendCallbacks(e.ctx, notifyTarget.ToCallbackList(), event, e.targetCache, e.userCache, e.notifyConfigCache.GetIbex(), e.astats)
e.SendCallbacks(rule, notifyTarget, event)
// handle global webhooks
sender.SendWebhooks(notifyTarget.ToWebhookList(), event, e.astats)
if !event.OverrideGlobalWebhook() {
if e.alerting.WebhookBatchSend {
sender.BatchSendWebhooks(e.ctx, notifyTarget.ToWebhookMap(), event, e.Astats)
} else {
sender.SingleSendWebhooks(e.ctx, notifyTarget.ToWebhookMap(), event, e.Astats)
}
}
// handle plugin call
go sender.MayPluginNotify(e.genNoticeBytes(event), e.notifyConfigCache.GetNotifyScript(), e.astats)
go sender.MayPluginNotify(e.ctx, e.genNoticeBytes(event), e.notifyConfigCache.
GetNotifyScript(), e.Astats, event)
if !isSubscribe {
// handle ibex callbacks
e.HandleIbex(rule, event)
}
}
func (e *Dispatch) SendCallbacks(rule *models.AlertRule, notifyTarget *NotifyTarget, event *models.AlertCurEvent) {
uids := notifyTarget.ToUidList()
urls := notifyTarget.ToCallbackList()
whMap := notifyTarget.ToWebhookMap()
ogw := event.OverrideGlobalWebhook()
for _, urlStr := range urls {
if len(urlStr) == 0 {
continue
}
cbCtx := sender.BuildCallBackContext(e.ctx, urlStr, rule, []*models.AlertCurEvent{event}, uids, e.userCache, e.alerting.WebhookBatchSend, e.Astats)
if wh, ok := whMap[cbCtx.CallBackURL]; !ogw && ok && wh.Enable {
logger.Debugf("SendCallbacks: webhook[%s] is in global conf.", cbCtx.CallBackURL)
continue
}
if strings.HasPrefix(urlStr, "${ibex}") {
e.CallBacks[models.IbexDomain].CallBack(cbCtx)
continue
}
if !(strings.HasPrefix(urlStr, "http://") || strings.HasPrefix(urlStr, "https://")) {
cbCtx.CallBackURL = "http://" + urlStr
}
parsedURL, err := url.Parse(urlStr)
if err != nil {
logger.Errorf("SendCallbacks: failed to url.Parse(urlStr=%s): %v", urlStr, err)
continue
}
// process feishu card
if parsedURL.Host == models.FeishuDomain && parsedURL.Query().Get("card") == "1" {
e.CallBacks[models.FeishuCardDomain].CallBack(cbCtx)
continue
}
// process lark card
if parsedURL.Host == models.LarkDomain && parsedURL.Query().Get("card") == "1" {
e.CallBacks[models.LarkCardDomain].CallBack(cbCtx)
continue
}
callBacker, ok := e.CallBacks[parsedURL.Host]
if ok {
callBacker.CallBack(cbCtx)
} else {
e.CallBacks[models.DefaultDomain].CallBack(cbCtx)
}
}
}
func (e *Dispatch) HandleIbex(rule *models.AlertRule, event *models.AlertCurEvent) {
// 解析 RuleConfig 字段
var ruleConfig struct {
TaskTpls []*models.Tpl `json:"task_tpls"`
}
json.Unmarshal([]byte(rule.RuleConfig), &ruleConfig)
if event.IsRecovered {
// 恢复事件不需要走故障自愈的逻辑
return
}
for _, t := range ruleConfig.TaskTpls {
if t.TplId == 0 {
continue
}
if len(t.Host) == 0 {
sender.CallIbex(e.ctx, t.TplId, event.TargetIdent,
e.taskTplsCache, e.targetCache, e.userCache, event, "")
continue
}
for _, host := range t.Host {
sender.CallIbex(e.ctx, t.TplId, host,
e.taskTplsCache, e.targetCache, e.userCache, event, "")
}
}
}
type Notice struct {
@@ -303,3 +904,22 @@ func mapKeys(m map[int64]struct{}) []int64 {
}
return lst
}
func getSendTarget(customParams map[string]string, sendtos []string) string {
if len(customParams) == 0 {
return strings.Join(sendtos, ",")
}
values := make([]string, 0)
for _, value := range customParams {
runes := []rune(value)
if len(runes) <= 4 {
values = append(values, value)
} else {
maskedValue := string(runes[:len(runes)-4]) + "****"
values = append(values, maskedValue)
}
}
return strings.Join(values, ",")
}

View File

@@ -18,15 +18,18 @@ func LogEvent(event *models.AlertCurEvent, location string, err ...error) {
}
logger.Infof(
"event(%s %s) %s: rule_id=%d cluster:%s %v%s@%d %s",
"event(%s %s) %s: rule_id=%d sub_id:%d notify_rule_ids:%v cluster:%s %v%s@%d last_eval_time:%d %s",
event.Hash,
status,
location,
event.RuleId,
event.SubRuleId,
event.NotifyRuleIds,
event.Cluster,
event.TagsJSON,
event.TriggerValue,
event.TriggerTime,
event.LastEvalTime,
message,
)
}

View File

@@ -76,12 +76,16 @@ func (s *NotifyTarget) ToCallbackList() []string {
return callbacks
}
func (s *NotifyTarget) ToWebhookList() []*models.Webhook {
webhooks := make([]*models.Webhook, 0, len(s.webhooks))
for _, wh := range s.webhooks {
webhooks = append(webhooks, wh)
func (s *NotifyTarget) ToWebhookMap() map[string]*models.Webhook {
return s.webhooks
}
func (s *NotifyTarget) ToUidList() []int64 {
uids := make([]int64, 0, len(s.userMap))
for uid, _ := range s.userMap {
uids = append(uids, uid)
}
return webhooks
return uids
}
// Dispatch 抽象由告警事件到信息接收者的路由策略

View File

@@ -3,17 +3,17 @@ package eval
import (
"context"
"fmt"
"strconv"
"time"
"github.com/ccfos/nightingale/v6/alert/aconf"
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/alert/naming"
"github.com/ccfos/nightingale/v6/alert/process"
"github.com/ccfos/nightingale/v6/datasource/commons/eslike"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/prom"
"github.com/ccfos/nightingale/v6/tdengine"
"github.com/toolkits/pkg/logger"
)
@@ -25,14 +25,14 @@ type Scheduler struct {
aconf aconf.Alert
alertRuleCache *memsto.AlertRuleCacheType
targetCache *memsto.TargetCacheType
busiGroupCache *memsto.BusiGroupCacheType
alertMuteCache *memsto.AlertMuteCacheType
datasourceCache *memsto.DatasourceCacheType
alertRuleCache *memsto.AlertRuleCacheType
targetCache *memsto.TargetCacheType
targetsOfAlertRuleCache *memsto.TargetsOfAlertRuleCacheType
busiGroupCache *memsto.BusiGroupCacheType
alertMuteCache *memsto.AlertMuteCacheType
datasourceCache *memsto.DatasourceCacheType
promClients *prom.PromClientMap
tdengineClients *tdengine.TdengineClientMap
promClients *prom.PromClientMap
naming *naming.Naming
@@ -40,28 +40,30 @@ type Scheduler struct {
stats *astats.Stats
}
func NewScheduler(aconf aconf.Alert, externalProcessors *process.ExternalProcessorsType, arc *memsto.AlertRuleCacheType, targetCache *memsto.TargetCacheType,
func NewScheduler(aconf aconf.Alert, externalProcessors *process.ExternalProcessorsType, arc *memsto.AlertRuleCacheType,
targetCache *memsto.TargetCacheType, toarc *memsto.TargetsOfAlertRuleCacheType,
busiGroupCache *memsto.BusiGroupCacheType, alertMuteCache *memsto.AlertMuteCacheType, datasourceCache *memsto.DatasourceCacheType,
promClients *prom.PromClientMap, tdengineClients *tdengine.TdengineClientMap, naming *naming.Naming, ctx *ctx.Context, stats *astats.Stats) *Scheduler {
promClients *prom.PromClientMap, naming *naming.Naming, ctx *ctx.Context, stats *astats.Stats) *Scheduler {
scheduler := &Scheduler{
aconf: aconf,
alertRules: make(map[string]*AlertRuleWorker),
ExternalProcessors: externalProcessors,
alertRuleCache: arc,
targetCache: targetCache,
busiGroupCache: busiGroupCache,
alertMuteCache: alertMuteCache,
datasourceCache: datasourceCache,
alertRuleCache: arc,
targetCache: targetCache,
targetsOfAlertRuleCache: toarc,
busiGroupCache: busiGroupCache,
alertMuteCache: alertMuteCache,
datasourceCache: datasourceCache,
promClients: promClients,
tdengineClients: tdengineClients,
naming: naming,
promClients: promClients,
naming: naming,
ctx: ctx,
stats: stats,
}
eslike.SetEsIndexPatternCacheType(memsto.NewEsIndexPatternCacheType(ctx))
go scheduler.LoopSyncRules(context.Background())
return scheduler
@@ -91,11 +93,10 @@ func (s *Scheduler) syncAlertRules() {
}
ruleType := rule.GetRuleType()
if rule.IsPrometheusRule() || rule.IsLokiRule() || rule.IsTdengineRule() {
datasourceIds := s.promClients.Hit(rule.DatasourceIdsJson)
datasourceIds = append(datasourceIds, s.tdengineClients.Hit(rule.DatasourceIdsJson)...)
if rule.IsPrometheusRule() || rule.IsInnerRule() {
datasourceIds := s.datasourceCache.GetIDsByDsCateAndQueries(rule.Cate, rule.DatasourceQueries)
for _, dsId := range datasourceIds {
if !naming.DatasourceHashRing.IsHit(dsId, fmt.Sprintf("%d", rule.Id), s.aconf.Heartbeat.Endpoint) {
if !naming.DatasourceHashRing.IsHit(strconv.FormatInt(dsId, 10), fmt.Sprintf("%d", rule.Id), s.aconf.Heartbeat.Endpoint) {
continue
}
ds := s.datasourceCache.GetById(dsId)
@@ -113,23 +114,24 @@ func (s *Scheduler) syncAlertRules() {
logger.Debugf("datasource %d status is %s", dsId, ds.Status)
continue
}
processor := process.NewProcessor(rule, dsId, s.alertRuleCache, s.targetCache, s.busiGroupCache, s.alertMuteCache, s.datasourceCache, s.ctx, s.stats)
processor := process.NewProcessor(s.aconf.Heartbeat.EngineName, rule, dsId, s.alertRuleCache, s.targetCache, s.targetsOfAlertRuleCache, s.busiGroupCache, s.alertMuteCache, s.datasourceCache, s.ctx, s.stats)
alertRule := NewAlertRuleWorker(rule, dsId, processor, s.promClients, s.tdengineClients, s.ctx)
alertRule := NewAlertRuleWorker(rule, dsId, processor, s.promClients, s.ctx)
alertRuleWorkers[alertRule.Hash()] = alertRule
}
} else if rule.IsHostRule() && s.ctx.IsCenter {
} else if rule.IsHostRule() {
// all host rule will be processed by center instance
if !naming.DatasourceHashRing.IsHit(naming.HostDatasource, fmt.Sprintf("%d", rule.Id), s.aconf.Heartbeat.Endpoint) {
if !naming.DatasourceHashRing.IsHit(s.aconf.Heartbeat.EngineName, strconv.FormatInt(rule.Id, 10), s.aconf.Heartbeat.Endpoint) {
continue
}
processor := process.NewProcessor(rule, 0, s.alertRuleCache, s.targetCache, s.busiGroupCache, s.alertMuteCache, s.datasourceCache, s.ctx, s.stats)
alertRule := NewAlertRuleWorker(rule, 0, processor, s.promClients, s.tdengineClients, s.ctx)
processor := process.NewProcessor(s.aconf.Heartbeat.EngineName, rule, 0, s.alertRuleCache, s.targetCache, s.targetsOfAlertRuleCache, s.busiGroupCache, s.alertMuteCache, s.datasourceCache, s.ctx, s.stats)
alertRule := NewAlertRuleWorker(rule, 0, processor, s.promClients, s.ctx)
alertRuleWorkers[alertRule.Hash()] = alertRule
} else {
// 如果 rule 不是通过 prometheus engine 来告警的,则创建为 externalRule
// if rule is not processed by prometheus engine, create it as externalRule
for _, dsId := range rule.DatasourceIdsJson {
dsIds := s.datasourceCache.GetIDsByDsCateAndQueries(rule.Cate, rule.DatasourceQueries)
for _, dsId := range dsIds {
ds := s.datasourceCache.GetById(dsId)
if ds == nil {
logger.Debugf("datasource %d not found", dsId)
@@ -140,7 +142,7 @@ func (s *Scheduler) syncAlertRules() {
logger.Debugf("datasource %d status is %s", dsId, ds.Status)
continue
}
processor := process.NewProcessor(rule, dsId, s.alertRuleCache, s.targetCache, s.busiGroupCache, s.alertMuteCache, s.datasourceCache, s.ctx, s.stats)
processor := process.NewProcessor(s.aconf.Heartbeat.EngineName, rule, dsId, s.alertRuleCache, s.targetCache, s.targetsOfAlertRuleCache, s.busiGroupCache, s.alertMuteCache, s.datasourceCache, s.ctx, s.stats)
externalRuleWorkers[processor.Key()] = processor
}
}
@@ -149,6 +151,7 @@ func (s *Scheduler) syncAlertRules() {
for hash, rule := range alertRuleWorkers {
if _, has := s.alertRules[hash]; !has {
rule.Prepare()
time.Sleep(time.Duration(20) * time.Millisecond)
rule.Start()
s.alertRules[hash] = rule
}

File diff suppressed because it is too large Load Diff

458
alert/eval/eval_test.go Normal file
View File

@@ -0,0 +1,458 @@
package eval
import (
"reflect"
"testing"
"golang.org/x/exp/slices"
)
var (
reHashTagIndex1 = map[uint64][][]uint64{
1: {
{1, 2}, {3, 4},
},
2: {
{5, 6}, {7, 8},
},
}
reHashTagIndex2 = map[uint64][][]uint64{
1: {
{9, 10}, {11, 12},
},
3: {
{13, 14}, {15, 16},
},
}
seriesTagIndex1 = map[uint64][]uint64{
1: {1, 2, 3, 4},
2: {5, 6, 7, 8},
}
seriesTagIndex2 = map[uint64][]uint64{
1: {9, 10, 11, 12},
3: {13, 14, 15, 16},
}
)
func Test_originalJoin(t *testing.T) {
type args struct {
seriesTagIndex1 map[uint64][]uint64
seriesTagIndex2 map[uint64][]uint64
}
tests := []struct {
name string
args args
want map[uint64][]uint64
}{
{
name: "original join",
args: args{
seriesTagIndex1: map[uint64][]uint64{
1: {1, 2, 3, 4},
2: {5, 6, 7, 8},
},
seriesTagIndex2: map[uint64][]uint64{
1: {9, 10, 11, 12},
3: {13, 14, 15, 16},
},
},
want: map[uint64][]uint64{
1: {1, 2, 3, 4, 9, 10, 11, 12},
2: {5, 6, 7, 8},
3: {13, 14, 15, 16},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := originalJoin(tt.args.seriesTagIndex1, tt.args.seriesTagIndex2); !reflect.DeepEqual(got, tt.want) {
t.Errorf("originalJoin() = %v, want %v", got, tt.want)
}
})
}
}
func Test_exclude(t *testing.T) {
type args struct {
reHashTagIndex1 map[uint64][][]uint64
reHashTagIndex2 map[uint64][][]uint64
}
tests := []struct {
name string
args args
want map[uint64][]uint64
}{
{
name: "left exclude",
args: args{
reHashTagIndex1: reHashTagIndex1,
reHashTagIndex2: reHashTagIndex2,
},
want: map[uint64][]uint64{
0: {5, 6},
1: {7, 8},
},
},
{
name: "right exclude",
args: args{
reHashTagIndex1: reHashTagIndex2,
reHashTagIndex2: reHashTagIndex1,
},
want: map[uint64][]uint64{
3: {13, 14},
4: {15, 16},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := exclude(tt.args.reHashTagIndex1, tt.args.reHashTagIndex2); !allValueDeepEqual(flatten(got), tt.want) {
t.Errorf("exclude() = %v, want %v", got, tt.want)
}
})
}
}
func Test_noneJoin(t *testing.T) {
type args struct {
seriesTagIndex1 map[uint64][]uint64
seriesTagIndex2 map[uint64][]uint64
}
tests := []struct {
name string
args args
want map[uint64][]uint64
}{
{
name: "none join, direct splicing",
args: args{
seriesTagIndex1: seriesTagIndex1,
seriesTagIndex2: seriesTagIndex2,
},
want: map[uint64][]uint64{
0: {1, 2, 3, 4},
1: {5, 6, 7, 8},
2: {9, 10, 11, 12},
3: {13, 14, 15, 16},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := noneJoin(tt.args.seriesTagIndex1, tt.args.seriesTagIndex2); !allValueDeepEqual(got, tt.want) {
t.Errorf("noneJoin() = %v, want %v", got, tt.want)
}
})
}
}
func Test_cartesianJoin(t *testing.T) {
type args struct {
seriesTagIndex1 map[uint64][]uint64
seriesTagIndex2 map[uint64][]uint64
}
tests := []struct {
name string
args args
want map[uint64][]uint64
}{
{
name: "cartesian join",
args: args{
seriesTagIndex1: seriesTagIndex1,
seriesTagIndex2: seriesTagIndex2,
},
want: map[uint64][]uint64{
0: {1, 2, 3, 4, 9, 10, 11, 12},
1: {5, 6, 7, 8, 9, 10, 11, 12},
2: {5, 6, 7, 8, 13, 14, 15, 16},
3: {1, 2, 3, 4, 13, 14, 15, 16},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := cartesianJoin(tt.args.seriesTagIndex1, tt.args.seriesTagIndex2); !allValueDeepEqual(got, tt.want) {
t.Errorf("cartesianJoin() = %v, want %v", got, tt.want)
}
})
}
}
func Test_onJoin(t *testing.T) {
type args struct {
reHashTagIndex1 map[uint64][][]uint64
reHashTagIndex2 map[uint64][][]uint64
joinType JoinType
}
tests := []struct {
name string
args args
want map[uint64][]uint64
}{
{
name: "left join",
args: args{
reHashTagIndex1: reHashTagIndex1,
reHashTagIndex2: reHashTagIndex2,
joinType: Left,
},
want: map[uint64][]uint64{
1: {1, 2, 9, 10},
2: {3, 4, 9, 10},
3: {1, 2, 11, 12},
4: {3, 4, 11, 12},
5: {5, 6},
6: {7, 8},
},
},
{
name: "right join",
args: args{
reHashTagIndex1: reHashTagIndex2,
reHashTagIndex2: reHashTagIndex1,
joinType: Right,
},
want: map[uint64][]uint64{
1: {1, 2, 9, 10},
2: {3, 4, 9, 10},
3: {1, 2, 11, 12},
4: {3, 4, 11, 12},
5: {13, 14},
6: {15, 16},
},
},
{
name: "inner join",
args: args{
reHashTagIndex1: reHashTagIndex1,
reHashTagIndex2: reHashTagIndex2,
joinType: Inner,
},
want: map[uint64][]uint64{
1: {1, 2, 9, 10},
2: {3, 4, 9, 10},
3: {1, 2, 11, 12},
4: {3, 4, 11, 12},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := onJoin(tt.args.reHashTagIndex1, tt.args.reHashTagIndex2, tt.args.joinType); !allValueDeepEqual(flatten(got), tt.want) {
t.Errorf("onJoin() = %v, want %v", got, tt.want)
}
})
}
}
// allValueDeepEqual 判断 map 的 value 是否相同,不考虑 key
func allValueDeepEqual(got, want map[uint64][]uint64) bool {
if len(got) != len(want) {
return false
}
for _, v1 := range got {
curEqual := false
slices.Sort(v1)
for _, v2 := range want {
slices.Sort(v2)
if reflect.DeepEqual(v1, v2) {
curEqual = true
break
}
}
if !curEqual {
return false
}
}
return true
}
// allValueDeepEqualOmitOrder 判断两个字符串切片是否相等,不考虑顺序
func allValueDeepEqualOmitOrder(got, want []string) bool {
if len(got) != len(want) {
return false
}
slices.Sort(got)
slices.Sort(want)
for i := range got {
if got[i] != want[i] {
return false
}
}
return true
}
func Test_removeVal(t *testing.T) {
type args struct {
promql string
}
tests := []struct {
name string
args args
want string
}{
// TODO: Add test cases.
{
name: "removeVal1",
args: args{
promql: "mem{test1=\"$test1\",test2=\"$test2\",test3=\"$test3\"} > $val",
},
want: "mem{} > $val",
},
{
name: "removeVal2",
args: args{
promql: "mem{test1=\"test1\",test2=\"$test2\",test3=\"$test3\"} > $val",
},
want: "mem{test1=\"test1\"} > $val",
},
{
name: "removeVal3",
args: args{
promql: "mem{test1=\"$test1\",test2=\"test2\",test3=\"$test3\"} > $val",
},
want: "mem{test2=\"test2\"} > $val",
},
{
name: "removeVal4",
args: args{
promql: "mem{test1=\"$test1\",test2=\"$test2\",test3=\"test3\"} > $val",
},
want: "mem{test3=\"test3\"} > $val",
},
{
name: "removeVal5",
args: args{
promql: "mem{test1=\"$test1\",test2=\"test2\",test3=\"test3\"} > $val",
},
want: "mem{test2=\"test2\",test3=\"test3\"} > $val",
},
{
name: "removeVal6",
args: args{
promql: "mem{test1=\"test1\",test2=\"$test2\",test3=\"test3\"} > $val",
},
want: "mem{test1=\"test1\",test3=\"test3\"} > $val",
},
{
name: "removeVal7",
args: args{
promql: "mem{test1=\"test1\",test2=\"test2\",test3='$test3'} > $val",
},
want: "mem{test1=\"test1\",test2=\"test2\"} > $val",
},
{
name: "removeVal8",
args: args{
promql: "mem{test1=\"test1\",test2=\"test2\",test3=\"test3\"} > $val",
},
want: "mem{test1=\"test1\",test2=\"test2\",test3=\"test3\"} > $val",
},
{
name: "removeVal9",
args: args{
promql: "mem{test1=\"$test1\",test2=\"test2\"} > $val1 and mem{test3=\"test3\",test4=\"test4\"} > $val2",
},
want: "mem{test2=\"test2\"} > $val1 and mem{test3=\"test3\",test4=\"test4\"} > $val2",
},
{
name: "removeVal10",
args: args{
promql: "mem{test1=\"test1\",test2='$test2'} > $val1 and mem{test3=\"test3\",test4=\"test4\"} > $val2",
},
want: "mem{test1=\"test1\"} > $val1 and mem{test3=\"test3\",test4=\"test4\"} > $val2",
},
{
name: "removeVal11",
args: args{
promql: "mem{test1='test1',test2=\"test2\"} > $val1 and mem{test3=\"$test3\",test4=\"test4\"} > $val2",
},
want: "mem{test1='test1',test2=\"test2\"} > $val1 and mem{test4=\"test4\"} > $val2",
},
{
name: "removeVal12",
args: args{
promql: "mem{test1=\"test1\",test2=\"test2\"} > $val1 and mem{test3=\"test3\",test4=\"$test4\"} > $val2",
},
want: "mem{test1=\"test1\",test2=\"test2\"} > $val1 and mem{test3=\"test3\"} > $val2",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := removeVal(tt.args.promql); got != tt.want {
t.Errorf("removeVal() = %v, want %v", got, tt.want)
}
})
}
}
func TestExtractVarMapping(t *testing.T) {
tests := []struct {
name string
promql string
want map[string]string
}{
{
name: "单个花括号单个变量",
promql: `mem_used_percent{host="$my_host"} > $val`,
want: map[string]string{"my_host": "host"},
},
{
name: "单个花括号多个变量",
promql: `mem_used_percent{host="$my_host",region="$region",env="prod"} > $val`,
want: map[string]string{"my_host": "host", "region": "region"},
},
{
name: "多个花括号多个变量",
promql: `sum(rate(mem_used_percent{host="$my_host"})) by (instance) + avg(node_load1{region="$region"}) > $val`,
want: map[string]string{"my_host": "host", "region": "region"},
},
{
name: "相同变量出现多次",
promql: `sum(rate(mem_used_percent{host="$my_host"})) + avg(node_load1{host="$my_host"}) > $val`,
want: map[string]string{"my_host": "host"},
},
{
name: "没有变量",
promql: `mem_used_percent{host="localhost",region="cn"} > 80`,
want: map[string]string{},
},
{
name: "没有花括号",
promql: `80 > $val`,
want: map[string]string{},
},
{
name: "格式不规范的标签",
promql: `mem_used_percent{host=$my_host,region = $region} > $val`,
want: map[string]string{"my_host": "host", "region": "region"},
},
{
name: "空花括号",
promql: `mem_used_percent{} > $val`,
want: map[string]string{},
},
{
name: "不完整的花括号",
promql: `mem_used_percent{host="$my_host"`,
want: map[string]string{},
},
{
name: "复杂表达式",
promql: `sum(rate(http_requests_total{handler="$handler",code="$code"}[5m])) by (handler) / sum(rate(http_requests_total{handler="$handler"}[5m])) by (handler) * 100 > $threshold`,
want: map[string]string{"handler": "handler", "code": "code"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := ExtractVarMapping(tt.promql)
if !reflect.DeepEqual(got, tt.want) {
t.Errorf("ExtractVarMapping() = %v, want %v", got, tt.want)
}
})
}
}

View File

@@ -1,6 +1,7 @@
package mute
import (
"slices"
"strconv"
"strings"
"time"
@@ -9,31 +10,33 @@ import (
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/pkg/errors"
"github.com/toolkits/pkg/logger"
)
func IsMuted(rule *models.AlertRule, event *models.AlertCurEvent, targetCache *memsto.TargetCacheType, alertMuteCache *memsto.AlertMuteCacheType) bool {
func IsMuted(rule *models.AlertRule, event *models.AlertCurEvent, targetCache *memsto.TargetCacheType, alertMuteCache *memsto.AlertMuteCacheType) (bool, string, int64) {
if rule.Disabled == 1 {
return true
return true, "rule disabled", 0
}
if TimeSpanMuteStrategy(rule, event) {
return true
return true, "rule is not effective for period of time", 0
}
if IdentNotExistsMuteStrategy(rule, event, targetCache) {
return true
return true, "ident not exists mute", 0
}
if BgNotMatchMuteStrategy(rule, event, targetCache) {
return true
return true, "bg not match mute", 0
}
if EventMuteStrategy(event, alertMuteCache) {
return true
hit, muteId := EventMuteStrategy(event, alertMuteCache)
if hit {
return true, "match mute rule", muteId
}
return false
return false, "", 0
}
// TimeSpanMuteStrategy 根据规则配置的告警生效时间段过滤,如果产生的告警不在规则配置的告警生效时间段内,则不告警,即被mute
@@ -43,6 +46,12 @@ func TimeSpanMuteStrategy(rule *models.AlertRule, event *models.AlertCurEvent) b
triggerTime := tm.Format("15:04")
triggerWeek := strconv.Itoa(int(tm.Weekday()))
if rule.EnableDaysOfWeek == "" {
// 如果规则没有配置生效时间,则默认全天生效
return false
}
enableStime := strings.Fields(rule.EnableStime)
enableEtime := strings.Fields(rule.EnableEtime)
enableDaysOfWeek := strings.Split(rule.EnableDaysOfWeek, ";")
@@ -114,83 +123,58 @@ func BgNotMatchMuteStrategy(rule *models.AlertRule, event *models.AlertCurEvent,
target, exists := targetCache.Get(ident)
// 对于包含ident的告警事件check一下ident所属bg和rule所属bg是否相同
// 如果告警规则选择了只在本BG生效那其他BG的机器就不能因此规则产生告警
if exists && target.GroupId != rule.GroupId {
if exists && !target.MatchGroupId(rule.GroupId) {
logger.Debugf("[%s] mute: rule_eval:%d cluster:%s", "BgNotMatchMuteStrategy", rule.Id, event.Cluster)
return true
}
return false
}
func EventMuteStrategy(event *models.AlertCurEvent, alertMuteCache *memsto.AlertMuteCacheType) bool {
func EventMuteStrategy(event *models.AlertCurEvent, alertMuteCache *memsto.AlertMuteCacheType) (bool, int64) {
mutes, has := alertMuteCache.Gets(event.GroupId)
if !has || len(mutes) == 0 {
return false
return false, 0
}
for i := 0; i < len(mutes); i++ {
if matchMute(event, mutes[i]) {
return true
matched, _ := MatchMute(event, mutes[i])
if matched {
return true, mutes[i].Id
}
}
return false
return false, 0
}
// matchMute 如果传入了clock这个可选参数就表示使用这个clock表示的时间否则就从event的字段中取TriggerTime
func matchMute(event *models.AlertCurEvent, mute *models.AlertMute, clock ...int64) bool {
// MatchMute 如果传入了clock这个可选参数就表示使用这个clock表示的时间否则就从event的字段中取TriggerTime
func MatchMute(event *models.AlertCurEvent, mute *models.AlertMute, clock ...int64) (bool, error) {
if mute.Disabled == 1 {
return false
}
ts := event.TriggerTime
if len(clock) > 0 {
ts = clock[0]
return false, errors.New("mute is disabled")
}
// 如果不是全局的,判断 匹配的 datasource id
if !(len(mute.DatasourceIdsJson) != 0 && mute.DatasourceIdsJson[0] == 0) && event.DatasourceId != 0 {
idm := make(map[int64]struct{}, len(mute.DatasourceIdsJson))
for i := 0; i < len(mute.DatasourceIdsJson); i++ {
idm[mute.DatasourceIdsJson[i]] = struct{}{}
}
// 判断 event.datasourceId 是否包含在 idm 中
if _, has := idm[event.DatasourceId]; !has {
return false
if len(mute.DatasourceIdsJson) != 0 && mute.DatasourceIdsJson[0] != 0 && event.DatasourceId != 0 {
if !slices.Contains(mute.DatasourceIdsJson, event.DatasourceId) {
return false, errors.New("datasource id not match")
}
}
var matchTime bool
if mute.MuteTimeType == models.TimeRange {
if ts < mute.Btime || ts > mute.Etime {
return false
if !mute.IsWithinTimeRange(event.TriggerTime) {
return false, errors.New("event trigger time not within mute time range")
}
matchTime = true
} else if mute.MuteTimeType == models.Periodic {
tm := time.Unix(event.TriggerTime, 0)
triggerTime := tm.Format("15:04")
triggerWeek := strconv.Itoa(int(tm.Weekday()))
for i := 0; i < len(mute.PeriodicMutesJson); i++ {
if strings.Contains(mute.PeriodicMutesJson[i].EnableDaysOfWeek, triggerWeek) {
if mute.PeriodicMutesJson[i].EnableStime == mute.PeriodicMutesJson[i].EnableEtime {
matchTime = true
break
} else if mute.PeriodicMutesJson[i].EnableStime < mute.PeriodicMutesJson[i].EnableEtime {
if triggerTime >= mute.PeriodicMutesJson[i].EnableStime && triggerTime < mute.PeriodicMutesJson[i].EnableEtime {
matchTime = true
break
}
} else {
if triggerTime >= mute.PeriodicMutesJson[i].EnableStime || triggerTime < mute.PeriodicMutesJson[i].EnableEtime {
matchTime = true
break
}
}
}
ts := event.TriggerTime
if len(clock) > 0 {
ts = clock[0]
}
}
if !matchTime {
return false
if !mute.IsWithinPeriodicMute(ts) {
return false, errors.New("event trigger time not within periodic mute range")
}
} else {
logger.Warningf("mute time type invalid, %d", mute.MuteTimeType)
return false, errors.New("mute time type invalid")
}
var matchSeverity bool
@@ -206,8 +190,14 @@ func matchMute(event *models.AlertCurEvent, mute *models.AlertMute, clock ...int
}
if !matchSeverity {
return false
return false, errors.New("event severity not match mute severity")
}
return common.MatchTags(event.TagsMap, mute.ITags)
if len(mute.ITags) == 0 {
return true, nil
}
if !common.MatchTags(event.TagsMap, mute.ITags) {
return false, errors.New("event tags not match mute tags")
}
return true, nil
}

View File

@@ -12,12 +12,12 @@ const NodeReplicas = 500
type DatasourceHashRingType struct {
sync.RWMutex
Rings map[int64]*consistent.Consistent
Rings map[string]*consistent.Consistent
}
// for alert_rule sharding
var HostDatasource int64 = 99999999
var DatasourceHashRing = DatasourceHashRingType{Rings: make(map[int64]*consistent.Consistent)}
var DatasourceHashRing = DatasourceHashRingType{Rings: make(map[string]*consistent.Consistent)}
func NewConsistentHashRing(replicas int32, nodes []string) *consistent.Consistent {
ret := consistent.New()
@@ -28,7 +28,7 @@ func NewConsistentHashRing(replicas int32, nodes []string) *consistent.Consisten
return ret
}
func RebuildConsistentHashRing(datasourceId int64, nodes []string) {
func RebuildConsistentHashRing(datasourceId string, nodes []string) {
r := consistent.New()
r.NumberOfReplicas = NodeReplicas
for i := 0; i < len(nodes); i++ {
@@ -36,10 +36,10 @@ func RebuildConsistentHashRing(datasourceId int64, nodes []string) {
}
DatasourceHashRing.Set(datasourceId, r)
logger.Infof("hash ring %d rebuild %+v", datasourceId, r.Members())
logger.Infof("hash ring %s rebuild %+v", datasourceId, r.Members())
}
func (chr *DatasourceHashRingType) GetNode(datasourceId int64, pk string) (string, error) {
func (chr *DatasourceHashRingType) GetNode(datasourceId string, pk string) (string, error) {
chr.Lock()
defer chr.Unlock()
_, exists := chr.Rings[datasourceId]
@@ -50,28 +50,34 @@ func (chr *DatasourceHashRingType) GetNode(datasourceId int64, pk string) (strin
return chr.Rings[datasourceId].Get(pk)
}
func (chr *DatasourceHashRingType) IsHit(datasourceId int64, pk string, currentNode string) bool {
func (chr *DatasourceHashRingType) IsHit(datasourceId string, pk string, currentNode string) bool {
node, err := chr.GetNode(datasourceId, pk)
if err != nil {
if !errors.Is(err, consistent.ErrEmptyCircle) {
logger.Errorf("rule id:%s is not work, datasource id:%d failed to get node from hashring:%v", pk, datasourceId, err)
logger.Errorf("rule id:%s is not work, datasource id:%s failed to get node from hashring:%v", pk, datasourceId, err)
}
return false
}
return node == currentNode
}
func (chr *DatasourceHashRingType) Set(datasourceId int64, r *consistent.Consistent) {
func (chr *DatasourceHashRingType) Set(datasourceId string, r *consistent.Consistent) {
chr.Lock()
defer chr.Unlock()
chr.Rings[datasourceId] = r
}
func (chr *DatasourceHashRingType) Clear() {
func (chr *DatasourceHashRingType) Del(datasourceId string) {
chr.Lock()
defer chr.Unlock()
delete(chr.Rings, datasourceId)
}
func (chr *DatasourceHashRingType) Clear(engineName string) {
chr.Lock()
defer chr.Unlock()
for id := range chr.Rings {
if id == HostDatasource {
if id == engineName {
continue
}
delete(chr.Rings, id)

View File

@@ -7,6 +7,7 @@ import (
"time"
"github.com/ccfos/nightingale/v6/alert/aconf"
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/poster"
@@ -17,12 +18,14 @@ import (
type Naming struct {
ctx *ctx.Context
heartbeatConfig aconf.HeartbeatConfig
astats *astats.Stats
}
func NewNaming(ctx *ctx.Context, heartbeat aconf.HeartbeatConfig) *Naming {
func NewNaming(ctx *ctx.Context, heartbeat aconf.HeartbeatConfig, alertStats *astats.Stats) *Naming {
naming := &Naming{
ctx: ctx,
heartbeatConfig: heartbeat,
astats: alertStats,
}
naming.Heartbeats()
return naming
@@ -30,9 +33,11 @@ func NewNaming(ctx *ctx.Context, heartbeat aconf.HeartbeatConfig) *Naming {
// local servers
var localss map[int64]string
var localHostServers map[string]string
func (n *Naming) Heartbeats() error {
localss = make(map[int64]string)
localHostServers = make(map[string]string)
if err := n.heartbeat(); err != nil {
fmt.Println("failed to heartbeat:", err)
return err
@@ -86,30 +91,32 @@ func (n *Naming) heartbeat() error {
err := models.AlertingEngineHeartbeatWithCluster(n.ctx, n.heartbeatConfig.Endpoint, n.heartbeatConfig.EngineName, 0)
if err != nil {
logger.Warningf("heartbeat with cluster %s err:%v", "", err)
n.astats.CounterHeartbeatErrorTotal.WithLabelValues().Inc()
}
} else {
for i := 0; i < len(datasourceIds); i++ {
err := models.AlertingEngineHeartbeatWithCluster(n.ctx, n.heartbeatConfig.Endpoint, n.heartbeatConfig.EngineName, datasourceIds[i])
if err != nil {
logger.Warningf("heartbeat with cluster %d err:%v", datasourceIds[i], err)
n.astats.CounterHeartbeatErrorTotal.WithLabelValues().Inc()
}
}
}
if len(datasourceIds) == 0 {
DatasourceHashRing.Clear()
DatasourceHashRing.Clear(n.heartbeatConfig.EngineName)
for dsId := range localss {
if dsId == HostDatasource {
continue
}
delete(localss, dsId)
}
}
newDatasource := make(map[int64]struct{})
for i := 0; i < len(datasourceIds); i++ {
newDatasource[datasourceIds[i]] = struct{}{}
servers, err := n.ActiveServers(datasourceIds[i])
if err != nil {
logger.Warningf("hearbeat %d get active server err:%v", datasourceIds[i], err)
logger.Warningf("heartbeat %d get active server err:%v", datasourceIds[i], err)
n.astats.CounterHeartbeatErrorTotal.WithLabelValues().Inc()
continue
}
@@ -121,36 +128,42 @@ func (n *Naming) heartbeat() error {
continue
}
RebuildConsistentHashRing(datasourceIds[i], servers)
RebuildConsistentHashRing(fmt.Sprintf("%d", datasourceIds[i]), servers)
localss[datasourceIds[i]] = newss
}
if n.ctx.IsCenter {
// 如果是中心节点,还需要处理 host 类型的告警规则host 类型告警规则,和数据源无关,想复用下数据源的 hash ring想用一个虚假的数据源 id 来处理
// if is center node, we need to handle host type alerting rules, host type alerting rules are not related to datasource, we want to reuse the hash ring of datasource, we want to use a fake datasource id to handle it
err := models.AlertingEngineHeartbeatWithCluster(n.ctx, n.heartbeatConfig.Endpoint, n.heartbeatConfig.EngineName, HostDatasource)
if err != nil {
logger.Warningf("heartbeat with cluster %s err:%v", "", err)
for dsId := range localss {
if _, exists := newDatasource[dsId]; !exists {
delete(localss, dsId)
DatasourceHashRing.Del(fmt.Sprintf("%d", dsId))
}
servers, err := n.ActiveServers(HostDatasource)
if err != nil {
logger.Warningf("hearbeat %d get active server err:%v", HostDatasource, err)
return nil
}
sort.Strings(servers)
newss := strings.Join(servers, " ")
oldss, exists := localss[HostDatasource]
if exists && oldss == newss {
return nil
}
RebuildConsistentHashRing(HostDatasource, servers)
localss[HostDatasource] = newss
}
// host 告警使用的是 hash ring
err = models.AlertingEngineHeartbeatWithCluster(n.ctx, n.heartbeatConfig.Endpoint, n.heartbeatConfig.EngineName, HostDatasource)
if err != nil {
logger.Warningf("heartbeat with cluster %s err:%v", "", err)
n.astats.CounterHeartbeatErrorTotal.WithLabelValues().Inc()
}
servers, err := n.ActiveServersByEngineName()
if err != nil {
logger.Warningf("heartbeat %d get active server err:%v", HostDatasource, err)
n.astats.CounterHeartbeatErrorTotal.WithLabelValues().Inc()
return nil
}
sort.Strings(servers)
newss := strings.Join(servers, " ")
oldss, exists := localHostServers[n.heartbeatConfig.EngineName]
if exists && oldss == newss {
return nil
}
RebuildConsistentHashRing(n.heartbeatConfig.EngineName, servers)
localHostServers[n.heartbeatConfig.EngineName] = newss
return nil
}

28
alert/naming/leader.go Normal file
View File

@@ -0,0 +1,28 @@
package naming
import (
"sort"
"github.com/toolkits/pkg/logger"
)
func (n *Naming) IamLeader() bool {
if !n.ctx.IsCenter {
return false
}
servers, err := n.ActiveServersByEngineName()
if err != nil {
logger.Errorf("failed to get active servers: %v", err)
return false
}
if len(servers) == 0 {
logger.Errorf("active servers empty")
return false
}
sort.Strings(servers)
return n.heartbeatConfig.Endpoint == servers[0]
}

View File

@@ -0,0 +1,383 @@
package engine
import (
"fmt"
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/google/uuid"
"github.com/toolkits/pkg/logger"
)
type WorkflowEngine struct {
ctx *ctx.Context
}
func NewWorkflowEngine(c *ctx.Context) *WorkflowEngine {
return &WorkflowEngine{ctx: c}
}
func (e *WorkflowEngine) Execute(pipeline *models.EventPipeline, event *models.AlertCurEvent, triggerCtx *models.WorkflowTriggerContext) (*models.AlertCurEvent, *models.WorkflowResult, error) {
startTime := time.Now()
wfCtx := e.initWorkflowContext(pipeline, event, triggerCtx)
nodes := pipeline.GetWorkflowNodes()
connections := pipeline.GetWorkflowConnections()
if len(nodes) == 0 {
return event, &models.WorkflowResult{
Event: event,
Status: models.ExecutionStatusSuccess,
Message: "no nodes to execute",
}, nil
}
nodeMap := make(map[string]*models.WorkflowNode)
for i := range nodes {
if nodes[i].RetryInterval == 0 {
nodes[i].RetryInterval = 1
}
if nodes[i].MaxRetries == 0 {
nodes[i].MaxRetries = 1
}
nodeMap[nodes[i].ID] = &nodes[i]
}
result := e.executeDAG(nodeMap, connections, wfCtx)
result.Event = wfCtx.Event
duration := time.Since(startTime).Milliseconds()
if triggerCtx != nil && triggerCtx.Mode != "" {
e.saveExecutionRecord(pipeline, wfCtx, result, triggerCtx, startTime.Unix(), duration)
}
return wfCtx.Event, result, nil
}
func (e *WorkflowEngine) initWorkflowContext(pipeline *models.EventPipeline, event *models.AlertCurEvent, triggerCtx *models.WorkflowTriggerContext) *models.WorkflowContext {
// 合并环境变量
env := pipeline.GetEnvMap()
if triggerCtx != nil && triggerCtx.EnvOverrides != nil {
for k, v := range triggerCtx.EnvOverrides {
env[k] = v
}
}
metadata := map[string]string{
"start_time": fmt.Sprintf("%d", time.Now().Unix()),
"pipeline_id": fmt.Sprintf("%d", pipeline.ID),
}
// 是否启用流式输出
stream := false
if triggerCtx != nil {
metadata["request_id"] = triggerCtx.RequestID
metadata["trigger_mode"] = triggerCtx.Mode
metadata["trigger_by"] = triggerCtx.TriggerBy
stream = triggerCtx.Stream
}
return &models.WorkflowContext{
Event: event,
Env: env,
Vars: make(map[string]interface{}), // 初始化空的 Vars供节点间传递数据
Metadata: metadata,
Stream: stream,
}
}
// executeDAG 使用 Kahn 算法执行 DAG
func (e *WorkflowEngine) executeDAG(nodeMap map[string]*models.WorkflowNode, connections models.Connections, wfCtx *models.WorkflowContext) *models.WorkflowResult {
result := &models.WorkflowResult{
Status: models.ExecutionStatusSuccess,
NodeResults: make([]*models.NodeExecutionResult, 0),
Stream: wfCtx.Stream, // 从上下文继承流式输出设置
}
// 计算每个节点的入度
inDegree := make(map[string]int)
for nodeID := range nodeMap {
inDegree[nodeID] = 0
}
// 遍历连接,计算入度
for _, nodeConns := range connections {
for _, targets := range nodeConns.Main {
for _, target := range targets {
inDegree[target.Node]++
}
}
}
// 找到所有入度为 0 的节点(起始节点)
queue := make([]string, 0)
for nodeID, degree := range inDegree {
if degree == 0 {
queue = append(queue, nodeID)
}
}
// 如果没有起始节点,说明存在循环依赖
if len(queue) == 0 && len(nodeMap) > 0 {
result.Status = models.ExecutionStatusFailed
result.Message = "workflow has circular dependency"
return result
}
// 记录已执行的节点
executed := make(map[string]bool)
// 记录节点的分支选择结果
branchResults := make(map[string]*int)
for len(queue) > 0 {
// 取出队首节点
nodeID := queue[0]
queue = queue[1:]
// 检查是否已执行
if executed[nodeID] {
continue
}
node, exists := nodeMap[nodeID]
if !exists {
continue
}
// 执行节点
nodeResult, nodeOutput := e.executeNode(node, wfCtx)
result.NodeResults = append(result.NodeResults, nodeResult)
if nodeOutput != nil && nodeOutput.Stream && nodeOutput.StreamChan != nil {
// 流式输出节点通常是最后一个节点
// 直接传递 StreamChan 给 WorkflowResult不阻塞等待
result.Stream = true
result.StreamChan = nodeOutput.StreamChan
result.Event = wfCtx.Event
result.Status = "streaming"
result.Message = fmt.Sprintf("streaming output from node: %s", node.Name)
// 更新节点状态为 streaming
nodeResult.Status = "streaming"
nodeResult.Message = "streaming in progress"
// 立即返回,让 API 层处理流式响应
return result
}
executed[nodeID] = true
// 保存分支结果
if nodeResult.BranchIndex != nil {
branchResults[nodeID] = nodeResult.BranchIndex
}
// 检查执行状态
if nodeResult.Status == "failed" {
if !node.ContinueOnFail {
result.Status = models.ExecutionStatusFailed
result.ErrorNode = nodeID
result.Message = fmt.Sprintf("node %s failed: %s", node.Name, nodeResult.Error)
return result
}
}
// 检查是否终止
if nodeResult.Status == "terminated" {
result.Message = fmt.Sprintf("workflow terminated at node %s", node.Name)
return result
}
// 更新后继节点的入度
if nodeConns, ok := connections[nodeID]; ok {
for outputIndex, targets := range nodeConns.Main {
// 检查是否应该走这个分支
if !e.shouldFollowBranch(nodeID, outputIndex, branchResults) {
continue
}
for _, target := range targets {
inDegree[target.Node]--
if inDegree[target.Node] == 0 {
queue = append(queue, target.Node)
}
}
}
}
}
return result
}
// executeNode 执行单个节点
// 返回:节点执行结果、节点输出(用于流式输出检测)
func (e *WorkflowEngine) executeNode(node *models.WorkflowNode, wfCtx *models.WorkflowContext) (*models.NodeExecutionResult, *models.NodeOutput) {
startTime := time.Now()
nodeResult := &models.NodeExecutionResult{
NodeID: node.ID,
NodeName: node.Name,
NodeType: node.Type,
StartedAt: startTime.Unix(),
}
var nodeOutput *models.NodeOutput
// 跳过禁用的节点
if node.Disabled {
nodeResult.Status = "skipped"
nodeResult.Message = "node is disabled"
nodeResult.FinishedAt = time.Now().Unix()
nodeResult.DurationMs = time.Since(startTime).Milliseconds()
return nodeResult, nil
}
// 获取处理器
processor, err := models.GetProcessorByType(node.Type, node.Config)
if err != nil {
nodeResult.Status = "failed"
nodeResult.Error = fmt.Sprintf("failed to get processor: %v", err)
nodeResult.FinishedAt = time.Now().Unix()
nodeResult.DurationMs = time.Since(startTime).Milliseconds()
return nodeResult, nil
}
// 执行处理器(带重试)
var retries int
maxRetries := node.MaxRetries
if !node.RetryOnFail {
maxRetries = 0
}
for retries <= maxRetries {
// 检查是否为分支处理器
if branchProcessor, ok := processor.(models.BranchProcessor); ok {
output, err := branchProcessor.ProcessWithBranch(e.ctx, wfCtx)
if err != nil {
if retries < maxRetries {
retries++
time.Sleep(time.Duration(node.RetryInterval) * time.Second)
continue
}
nodeResult.Status = "failed"
nodeResult.Error = err.Error()
} else {
nodeResult.Status = "success"
if output != nil {
nodeOutput = output
if output.WfCtx != nil {
wfCtx = output.WfCtx
}
nodeResult.Message = output.Message
nodeResult.BranchIndex = output.BranchIndex
if output.Terminate {
nodeResult.Status = "terminated"
}
}
}
break
}
// 普通处理器
newWfCtx, msg, err := processor.Process(e.ctx, wfCtx)
if err != nil {
if retries < maxRetries {
retries++
time.Sleep(time.Duration(node.RetryInterval) * time.Second)
continue
}
nodeResult.Status = "failed"
nodeResult.Error = err.Error()
} else {
nodeResult.Status = "success"
nodeResult.Message = msg
if newWfCtx != nil {
wfCtx = newWfCtx
// 检测流式输出标记
if newWfCtx.Stream && newWfCtx.StreamChan != nil {
nodeOutput = &models.NodeOutput{
WfCtx: newWfCtx,
Message: msg,
Stream: true,
StreamChan: newWfCtx.StreamChan,
}
}
}
// 如果事件被 drop返回 nil 或 Event 为 nil标记为终止
if newWfCtx == nil || newWfCtx.Event == nil {
nodeResult.Status = "terminated"
nodeResult.Message = msg
}
}
break
}
nodeResult.FinishedAt = time.Now().Unix()
nodeResult.DurationMs = time.Since(startTime).Milliseconds()
logger.Infof("workflow: executed node %s (type=%s) status=%s msg=%s duration=%dms",
node.Name, node.Type, nodeResult.Status, nodeResult.Message, nodeResult.DurationMs)
return nodeResult, nodeOutput
}
// shouldFollowBranch 判断是否应该走某个分支
func (e *WorkflowEngine) shouldFollowBranch(nodeID string, outputIndex int, branchResults map[string]*int) bool {
branchIndex, hasBranch := branchResults[nodeID]
if !hasBranch {
// 没有分支结果,说明不是分支节点,只走第一个输出
return outputIndex == 0
}
if branchIndex == nil {
// branchIndex 为 nil走默认分支通常是最后一个
return true
}
// 只走选中的分支
return outputIndex == *branchIndex
}
func (e *WorkflowEngine) saveExecutionRecord(pipeline *models.EventPipeline, wfCtx *models.WorkflowContext, result *models.WorkflowResult, triggerCtx *models.WorkflowTriggerContext, startTime int64, duration int64) {
executionID := triggerCtx.RequestID
if executionID == "" {
executionID = uuid.New().String()
}
execution := &models.EventPipelineExecution{
ID: executionID,
PipelineID: pipeline.ID,
PipelineName: pipeline.Name,
Mode: triggerCtx.Mode,
Status: result.Status,
ErrorMessage: result.Message,
ErrorNode: result.ErrorNode,
CreatedAt: startTime,
FinishedAt: time.Now().Unix(),
DurationMs: duration,
TriggerBy: triggerCtx.TriggerBy,
}
if wfCtx.Event != nil {
execution.EventID = wfCtx.Event.Id
}
if err := execution.SetNodeResults(result.NodeResults); err != nil {
logger.Errorf("workflow: failed to set node results: pipeline_id=%d, error=%v", pipeline.ID, err)
}
secretKeys := pipeline.GetSecretKeys()
sanitizedEnv := wfCtx.SanitizedEnv(secretKeys)
if err := execution.SetEnvSnapshot(sanitizedEnv); err != nil {
logger.Errorf("workflow: failed to set env snapshot: pipeline_id=%d, error=%v", pipeline.ID, err)
}
if err := models.CreateEventPipelineExecution(e.ctx, execution); err != nil {
logger.Errorf("workflow: failed to save execution record: pipeline_id=%d, error=%v", pipeline.ID, err)
}
}

View File

@@ -0,0 +1,13 @@
package pipeline
import (
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/aisummary"
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/callback"
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/eventdrop"
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/eventupdate"
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/logic"
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/relabel"
)
func Init() {
}

View File

@@ -0,0 +1,246 @@
package aisummary
import (
"bytes"
"crypto/tls"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"strconv"
"strings"
"text/template"
"time"
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/callback"
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/common"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/tplx"
)
const (
HTTP_STATUS_SUCCESS_MAX = 299
)
// AISummaryConfig 配置结构体
type AISummaryConfig struct {
callback.HTTPConfig
ModelName string `json:"model_name"`
APIKey string `json:"api_key"`
PromptTemplate string `json:"prompt_template"`
CustomParams map[string]interface{} `json:"custom_params"`
}
type Message struct {
Role string `json:"role"`
Content string `json:"content"`
}
type ChatCompletionResponse struct {
Choices []struct {
Message struct {
Content string `json:"content"`
} `json:"message"`
} `json:"choices"`
}
func init() {
models.RegisterProcessor("ai_summary", &AISummaryConfig{})
}
func (c *AISummaryConfig) Init(settings interface{}) (models.Processor, error) {
result, err := common.InitProcessor[*AISummaryConfig](settings)
return result, err
}
func (c *AISummaryConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
event := wfCtx.Event
if c.Client == nil {
if err := c.initHTTPClient(); err != nil {
return wfCtx, "", fmt.Errorf("failed to initialize HTTP client: %v processor: %v", err, c)
}
}
// 准备告警事件信息
eventInfo, err := c.prepareEventInfo(wfCtx)
if err != nil {
return wfCtx, "", fmt.Errorf("failed to prepare event info: %v processor: %v", err, c)
}
// 调用AI模型生成总结
summary, err := c.generateAISummary(eventInfo)
if err != nil {
return wfCtx, "", fmt.Errorf("failed to generate AI summary: %v processor: %v", err, c)
}
// 将总结添加到annotations字段
if event.AnnotationsJSON == nil {
event.AnnotationsJSON = make(map[string]string)
}
event.AnnotationsJSON["ai_summary"] = summary
// 更新Annotations字段
b, err := json.Marshal(event.AnnotationsJSON)
if err != nil {
return wfCtx, "", fmt.Errorf("failed to marshal annotations: %v processor: %v", err, c)
}
event.Annotations = string(b)
return wfCtx, "", nil
}
func (c *AISummaryConfig) initHTTPClient() error {
transport := &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: c.SkipSSLVerify},
}
if c.Proxy != "" {
proxyURL, err := url.Parse(c.Proxy)
if err != nil {
return fmt.Errorf("failed to parse proxy url: %v", err)
}
transport.Proxy = http.ProxyURL(proxyURL)
}
c.Client = &http.Client{
Timeout: time.Duration(c.Timeout) * time.Millisecond,
Transport: transport,
}
return nil
}
func (c *AISummaryConfig) prepareEventInfo(wfCtx *models.WorkflowContext) (string, error) {
var defs = []string{
"{{$event := .Event}}",
"{{$env := .Env}}",
}
text := strings.Join(append(defs, c.PromptTemplate), "")
t, err := template.New("prompt").Funcs(template.FuncMap(tplx.TemplateFuncMap)).Parse(text)
if err != nil {
return "", fmt.Errorf("failed to parse prompt template: %v", err)
}
var body bytes.Buffer
err = t.Execute(&body, wfCtx)
if err != nil {
return "", fmt.Errorf("failed to execute prompt template: %v", err)
}
return body.String(), nil
}
func (c *AISummaryConfig) generateAISummary(eventInfo string) (string, error) {
// 构建基础请求参数
reqParams := map[string]interface{}{
"model": c.ModelName,
"messages": []Message{
{
Role: "user",
Content: eventInfo,
},
},
}
// 合并自定义参数
for k, v := range c.CustomParams {
converted, err := convertCustomParam(v)
if err != nil {
return "", fmt.Errorf("failed to convert custom param %s: %v", k, err)
}
reqParams[k] = converted
}
// 序列化请求体
jsonData, err := json.Marshal(reqParams)
if err != nil {
return "", fmt.Errorf("failed to marshal request body: %v", err)
}
// 创建HTTP请求
req, err := http.NewRequest("POST", c.URL, bytes.NewBuffer(jsonData))
if err != nil {
return "", fmt.Errorf("failed to create request: %v", err)
}
// 设置请求头
req.Header.Set("Authorization", "Bearer "+c.APIKey)
req.Header.Set("Content-Type", "application/json")
for k, v := range c.Headers {
req.Header.Set(k, v)
}
// 发送请求
resp, err := c.Client.Do(req)
if err != nil {
return "", fmt.Errorf("failed to send request: %v", err)
}
defer resp.Body.Close()
// 检查响应状态码
if resp.StatusCode > HTTP_STATUS_SUCCESS_MAX {
body, _ := io.ReadAll(resp.Body)
return "", fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body))
}
// 读取响应
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", fmt.Errorf("failed to read response body: %v", err)
}
// 解析响应
var chatResp ChatCompletionResponse
if err := json.Unmarshal(body, &chatResp); err != nil {
return "", fmt.Errorf("failed to unmarshal response: %v", err)
}
if len(chatResp.Choices) == 0 {
return "", fmt.Errorf("no response from AI model")
}
return chatResp.Choices[0].Message.Content, nil
}
// convertCustomParam 将前端传入的参数转换为正确的类型
func convertCustomParam(value interface{}) (interface{}, error) {
if value == nil {
return nil, nil
}
// 如果是字符串,尝试转换为其他类型
if str, ok := value.(string); ok {
// 尝试转换为数字
if f, err := strconv.ParseFloat(str, 64); err == nil {
// 检查是否为整数
if f == float64(int64(f)) {
return int64(f), nil
}
return f, nil
}
// 尝试转换为布尔值
if b, err := strconv.ParseBool(str); err == nil {
return b, nil
}
// 尝试解析为JSON数组
if strings.HasPrefix(strings.TrimSpace(str), "[") {
var arr []interface{}
if err := json.Unmarshal([]byte(str), &arr); err == nil {
return arr, nil
}
}
// 尝试解析为JSON对象
if strings.HasPrefix(strings.TrimSpace(str), "{") {
var obj map[string]interface{}
if err := json.Unmarshal([]byte(str), &obj); err == nil {
return obj, nil
}
}
}
return value, nil
}

View File

@@ -0,0 +1,145 @@
package aisummary
import (
"testing"
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/callback"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/stretchr/testify/assert"
)
func TestAISummaryConfig_Process(t *testing.T) {
// 创建测试配置
config := &AISummaryConfig{
HTTPConfig: callback.HTTPConfig{
URL: "https://generativelanguage.googleapis.com/v1beta/openai/chat/completions",
Timeout: 30000,
SkipSSLVerify: true,
Headers: map[string]string{
"Content-Type": "application/json",
},
},
ModelName: "gemini-2.0-flash",
APIKey: "*",
PromptTemplate: "告警规则:{{$event.RuleName}}\n严重程度{{$event.Severity}}",
CustomParams: map[string]interface{}{
"temperature": 0.7,
"max_tokens": 2000,
"top_p": 0.9,
},
}
// 创建测试事件
event := &models.AlertCurEvent{
RuleName: "Test Rule",
Severity: 1,
TagsMap: map[string]string{
"host": "test-host",
},
AnnotationsJSON: map[string]string{
"description": "Test alert",
},
}
// 创建 WorkflowContext
wfCtx := &models.WorkflowContext{
Event: event,
Env: map[string]string{},
}
// 测试模板处理
eventInfo, err := config.prepareEventInfo(wfCtx)
assert.NoError(t, err)
assert.Contains(t, eventInfo, "Test Rule")
assert.Contains(t, eventInfo, "1")
// 测试配置初始化
processor, err := config.Init(config)
assert.NoError(t, err)
assert.NotNil(t, processor)
// 测试处理函数
result, _, err := processor.Process(&ctx.Context{}, wfCtx)
assert.NoError(t, err)
assert.NotNil(t, result)
assert.NotEmpty(t, result.Event.AnnotationsJSON["ai_summary"])
// 展示处理结果
t.Log("\n=== 处理结果 ===")
t.Logf("告警规则: %s", result.Event.RuleName)
t.Logf("严重程度: %d", result.Event.Severity)
t.Logf("标签: %v", result.Event.TagsMap)
t.Logf("原始注释: %v", result.Event.AnnotationsJSON["description"])
t.Logf("AI总结: %s", result.Event.AnnotationsJSON["ai_summary"])
}
func TestConvertCustomParam(t *testing.T) {
tests := []struct {
name string
input interface{}
expected interface{}
hasError bool
}{
{
name: "nil value",
input: nil,
expected: nil,
hasError: false,
},
{
name: "string number to int64",
input: "123",
expected: int64(123),
hasError: false,
},
{
name: "string float to float64",
input: "123.45",
expected: 123.45,
hasError: false,
},
{
name: "string boolean to bool",
input: "true",
expected: true,
hasError: false,
},
{
name: "string false to bool",
input: "false",
expected: false,
hasError: false,
},
{
name: "JSON array string to slice",
input: `["a", "b", "c"]`,
expected: []interface{}{"a", "b", "c"},
hasError: false,
},
{
name: "JSON object string to map",
input: `{"key": "value", "num": 123}`,
expected: map[string]interface{}{"key": "value", "num": float64(123)},
hasError: false,
},
{
name: "plain string remains string",
input: "hello world",
expected: "hello world",
hasError: false,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
converted, err := convertCustomParam(test.input)
if test.hasError {
assert.Error(t, err)
return
}
assert.NoError(t, err)
assert.Equal(t, test.expected, converted)
})
}
}

View File

@@ -0,0 +1,104 @@
package callback
import (
"crypto/tls"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/common"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/toolkits/pkg/logger"
)
type HTTPConfig struct {
URL string `json:"url"`
Method string `json:"method,omitempty"`
Body string `json:"body,omitempty"`
Headers map[string]string `json:"header"`
AuthUsername string `json:"auth_username"`
AuthPassword string `json:"auth_password"`
Timeout int `json:"timeout"` // 单位:ms
SkipSSLVerify bool `json:"skip_ssl_verify"`
Proxy string `json:"proxy"`
Client *http.Client `json:"-"`
}
// RelabelConfig
type CallbackConfig struct {
HTTPConfig
}
func init() {
models.RegisterProcessor("callback", &CallbackConfig{})
}
func (c *CallbackConfig) Init(settings interface{}) (models.Processor, error) {
result, err := common.InitProcessor[*CallbackConfig](settings)
return result, err
}
func (c *CallbackConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
event := wfCtx.Event
if c.Client == nil {
transport := &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: c.SkipSSLVerify},
}
if c.Proxy != "" {
proxyURL, err := url.Parse(c.Proxy)
if err != nil {
return wfCtx, "", fmt.Errorf("failed to parse proxy url: %v processor: %v", err, c)
} else {
transport.Proxy = http.ProxyURL(proxyURL)
}
}
c.Client = &http.Client{
Timeout: time.Duration(c.Timeout) * time.Millisecond,
Transport: transport,
}
}
headers := make(map[string]string)
headers["Content-Type"] = "application/json"
for k, v := range c.Headers {
headers[k] = v
}
body, err := json.Marshal(event)
if err != nil {
return wfCtx, "", fmt.Errorf("failed to marshal event: %v processor: %v", err, c)
}
req, err := http.NewRequest("POST", c.URL, strings.NewReader(string(body)))
if err != nil {
return wfCtx, "", fmt.Errorf("failed to create request: %v processor: %v", err, c)
}
for k, v := range headers {
req.Header.Set(k, v)
}
if c.AuthUsername != "" && c.AuthPassword != "" {
req.SetBasicAuth(c.AuthUsername, c.AuthPassword)
}
resp, err := c.Client.Do(req)
if err != nil {
return wfCtx, "", fmt.Errorf("failed to send request: %v processor: %v", err, c)
}
b, err := io.ReadAll(resp.Body)
if err != nil {
return wfCtx, "", fmt.Errorf("failed to read response body: %v processor: %v", err, c)
}
logger.Debugf("callback processor response body: %s", string(b))
return wfCtx, "callback success", nil
}

View File

@@ -0,0 +1,24 @@
package common
import (
"encoding/json"
)
// InitProcessor 是一个通用的初始化处理器的方法
// 使用泛型简化处理器初始化逻辑
// T 必须是 models.Processor 接口的实现
func InitProcessor[T any](settings interface{}) (T, error) {
var zero T
b, err := json.Marshal(settings)
if err != nil {
return zero, err
}
var result T
err = json.Unmarshal(b, &result)
if err != nil {
return zero, err
}
return result, nil
}

View File

@@ -0,0 +1,62 @@
package eventdrop
import (
"bytes"
"fmt"
"strings"
texttemplate "text/template"
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/common"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/tplx"
"github.com/toolkits/pkg/logger"
)
type EventDropConfig struct {
Content string `json:"content"`
}
func init() {
models.RegisterProcessor("event_drop", &EventDropConfig{})
}
func (c *EventDropConfig) Init(settings interface{}) (models.Processor, error) {
result, err := common.InitProcessor[*EventDropConfig](settings)
return result, err
}
func (c *EventDropConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
// 使用背景是可以根据此处理器,实现对事件进行更加灵活的过滤的逻辑
// 在标签过滤和属性过滤都不满足需求时可以使用
// 如果模板执行结果为 true则删除该事件
event := wfCtx.Event
var defs = []string{
"{{ $event := .Event }}",
"{{ $labels := .Event.TagsMap }}",
"{{ $value := .Event.TriggerValue }}",
"{{ $env := .Env }}",
}
text := strings.Join(append(defs, c.Content), "")
tpl, err := texttemplate.New("eventdrop").Funcs(tplx.TemplateFuncMap).Parse(text)
if err != nil {
return wfCtx, "", fmt.Errorf("processor failed to parse template: %v processor: %v", err, c)
}
var body bytes.Buffer
if err = tpl.Execute(&body, wfCtx); err != nil {
return wfCtx, "", fmt.Errorf("processor failed to execute template: %v processor: %v", err, c)
}
result := strings.TrimSpace(body.String())
logger.Infof("processor eventdrop result: %v", result)
if result == "true" {
logger.Infof("processor eventdrop drop event: %v", event)
return nil, "drop event success", nil
}
return wfCtx, "drop event failed", nil
}

View File

@@ -0,0 +1,97 @@
package eventupdate
import (
"crypto/tls"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/callback"
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/common"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/toolkits/pkg/logger"
)
// RelabelConfig
type EventUpdateConfig struct {
callback.HTTPConfig
}
func init() {
models.RegisterProcessor("event_update", &EventUpdateConfig{})
}
func (c *EventUpdateConfig) Init(settings interface{}) (models.Processor, error) {
result, err := common.InitProcessor[*EventUpdateConfig](settings)
return result, err
}
func (c *EventUpdateConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
event := wfCtx.Event
if c.Client == nil {
transport := &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: c.SkipSSLVerify},
}
if c.Proxy != "" {
proxyURL, err := url.Parse(c.Proxy)
if err != nil {
return wfCtx, "", fmt.Errorf("failed to parse proxy url: %v processor: %v", err, c)
} else {
transport.Proxy = http.ProxyURL(proxyURL)
}
}
c.Client = &http.Client{
Timeout: time.Duration(c.Timeout) * time.Millisecond,
Transport: transport,
}
}
headers := make(map[string]string)
headers["Content-Type"] = "application/json"
for k, v := range c.Headers {
headers[k] = v
}
body, err := json.Marshal(event)
if err != nil {
return wfCtx, "", fmt.Errorf("failed to marshal event: %v processor: %v", err, c)
}
req, err := http.NewRequest("POST", c.URL, strings.NewReader(string(body)))
if err != nil {
return wfCtx, "", fmt.Errorf("failed to create request: %v processor: %v", err, c)
}
for k, v := range headers {
req.Header.Set(k, v)
}
if c.AuthUsername != "" && c.AuthPassword != "" {
req.SetBasicAuth(c.AuthUsername, c.AuthPassword)
}
resp, err := c.Client.Do(req)
if err != nil {
return wfCtx, "", fmt.Errorf("failed to send request: %v processor: %v", err, c)
}
b, err := io.ReadAll(resp.Body)
if err != nil {
return nil, "", fmt.Errorf("failed to read response body: %v processor: %v", err, c)
}
logger.Debugf("event update processor response body: %s", string(b))
err = json.Unmarshal(b, &event)
if err != nil {
return wfCtx, "", fmt.Errorf("failed to unmarshal response body: %v processor: %v", err, c)
}
return wfCtx, "", nil
}

View File

@@ -0,0 +1,197 @@
package logic
import (
"bytes"
"fmt"
"strings"
"text/template"
alertCommon "github.com/ccfos/nightingale/v6/alert/common"
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/common"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/tplx"
)
// 判断模式常量
const (
ConditionModeExpression = "expression" // 表达式模式(默认)
ConditionModeTags = "tags" // 标签/属性模式
)
// IfConfig If 条件处理器配置
type IfConfig struct {
// 判断模式expression表达式或 tags标签/属性)
Mode string `json:"mode,omitempty"`
// 表达式模式配置
// 条件表达式(支持 Go 模板语法)
// 例如:{{ if eq .Severity 1 }}true{{ end }}
Condition string `json:"condition,omitempty"`
// 标签/属性模式配置
LabelKeys []models.TagFilter `json:"label_keys,omitempty"` // 适用标签
Attributes []models.TagFilter `json:"attributes,omitempty"` // 适用属性
// 内部使用,解析后的过滤器
parsedLabelKeys []models.TagFilter `json:"-"`
parsedAttributes []models.TagFilter `json:"-"`
}
func init() {
models.RegisterProcessor("logic.if", &IfConfig{})
}
func (c *IfConfig) Init(settings interface{}) (models.Processor, error) {
result, err := common.InitProcessor[*IfConfig](settings)
if err != nil {
return nil, err
}
// 解析标签过滤器
if len(result.LabelKeys) > 0 {
// Deep copy to avoid concurrent map writes on cached objects
labelKeysCopy := make([]models.TagFilter, len(result.LabelKeys))
copy(labelKeysCopy, result.LabelKeys)
for i := range labelKeysCopy {
if labelKeysCopy[i].Func == "" {
labelKeysCopy[i].Func = labelKeysCopy[i].Op
}
}
result.parsedLabelKeys, err = models.ParseTagFilter(labelKeysCopy)
if err != nil {
return nil, fmt.Errorf("failed to parse label_keys: %v", err)
}
}
// 解析属性过滤器
if len(result.Attributes) > 0 {
// Deep copy to avoid concurrent map writes on cached objects
attributesCopy := make([]models.TagFilter, len(result.Attributes))
copy(attributesCopy, result.Attributes)
for i := range attributesCopy {
if attributesCopy[i].Func == "" {
attributesCopy[i].Func = attributesCopy[i].Op
}
}
result.parsedAttributes, err = models.ParseTagFilter(attributesCopy)
if err != nil {
return nil, fmt.Errorf("failed to parse attributes: %v", err)
}
}
return result, nil
}
// Process 实现 Processor 接口(兼容旧模式)
func (c *IfConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
result, err := c.evaluateCondition(wfCtx)
if err != nil {
return wfCtx, "", fmt.Errorf("if processor: failed to evaluate condition: %v", err)
}
if result {
return wfCtx, "condition matched (true branch)", nil
}
return wfCtx, "condition not matched (false branch)", nil
}
// ProcessWithBranch 实现 BranchProcessor 接口
func (c *IfConfig) ProcessWithBranch(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.NodeOutput, error) {
result, err := c.evaluateCondition(wfCtx)
if err != nil {
return nil, fmt.Errorf("if processor: failed to evaluate condition: %v", err)
}
output := &models.NodeOutput{
WfCtx: wfCtx,
}
if result {
// 条件为 true走输出 0true 分支)
branchIndex := 0
output.BranchIndex = &branchIndex
output.Message = "condition matched (true branch)"
} else {
// 条件为 false走输出 1false 分支)
branchIndex := 1
output.BranchIndex = &branchIndex
output.Message = "condition not matched (false branch)"
}
return output, nil
}
// evaluateCondition 评估条件
func (c *IfConfig) evaluateCondition(wfCtx *models.WorkflowContext) (bool, error) {
mode := c.Mode
if mode == "" {
mode = ConditionModeExpression // 默认表达式模式
}
switch mode {
case ConditionModeTags:
return c.evaluateTagsCondition(wfCtx.Event)
default:
return c.evaluateExpressionCondition(wfCtx)
}
}
// evaluateExpressionCondition 评估表达式条件
func (c *IfConfig) evaluateExpressionCondition(wfCtx *models.WorkflowContext) (bool, error) {
if c.Condition == "" {
return true, nil
}
// 构建模板数据
var defs = []string{
"{{ $event := .Event }}",
"{{ $labels := .Event.TagsMap }}",
"{{ $value := .Event.TriggerValue }}",
"{{ $env := .Env }}",
}
text := strings.Join(append(defs, c.Condition), "")
tpl, err := template.New("if_condition").Funcs(tplx.TemplateFuncMap).Parse(text)
if err != nil {
return false, err
}
var buf bytes.Buffer
if err = tpl.Execute(&buf, wfCtx); err != nil {
return false, err
}
result := strings.TrimSpace(strings.ToLower(buf.String()))
return result == "true" || result == "1", nil
}
// evaluateTagsCondition 评估标签/属性条件
func (c *IfConfig) evaluateTagsCondition(event *models.AlertCurEvent) (bool, error) {
// 如果没有配置任何过滤条件,默认返回 true
if len(c.parsedLabelKeys) == 0 && len(c.parsedAttributes) == 0 {
return true, nil
}
// 匹配标签 (TagsMap)
if len(c.parsedLabelKeys) > 0 {
tagsMap := event.TagsMap
if tagsMap == nil {
tagsMap = make(map[string]string)
}
if !alertCommon.MatchTags(tagsMap, c.parsedLabelKeys) {
return false, nil
}
}
// 匹配属性 (JsonTagsAndValue - 所有 JSON 字段)
if len(c.parsedAttributes) > 0 {
attributesMap := event.JsonTagsAndValue()
if !alertCommon.MatchTags(attributesMap, c.parsedAttributes) {
return false, nil
}
}
return true, nil
}

View File

@@ -0,0 +1,224 @@
package logic
import (
"bytes"
"fmt"
"strings"
"text/template"
alertCommon "github.com/ccfos/nightingale/v6/alert/common"
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/common"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/tplx"
)
// SwitchCase Switch 分支定义
type SwitchCase struct {
// 判断模式expression表达式或 tags标签/属性)
Mode string `json:"mode,omitempty"`
// 表达式模式配置
// 条件表达式(支持 Go 模板语法)
Condition string `json:"condition,omitempty"`
// 标签/属性模式配置
LabelKeys []models.TagFilter `json:"label_keys,omitempty"` // 适用标签
Attributes []models.TagFilter `json:"attributes,omitempty"` // 适用属性
// 分支名称(可选,用于日志)
Name string `json:"name,omitempty"`
// 内部使用,解析后的过滤器
parsedLabelKeys []models.TagFilter `json:"-"`
parsedAttributes []models.TagFilter `json:"-"`
}
// SwitchConfig Switch 多分支处理器配置
type SwitchConfig struct {
// 分支条件列表
// 按顺序匹配,第一个为 true 的分支将被选中
Cases []SwitchCase `json:"cases"`
// 是否允许多个分支同时匹配(默认 false只走第一个匹配的
AllowMultiple bool `json:"allow_multiple,omitempty"`
}
func init() {
models.RegisterProcessor("logic.switch", &SwitchConfig{})
}
func (c *SwitchConfig) Init(settings interface{}) (models.Processor, error) {
result, err := common.InitProcessor[*SwitchConfig](settings)
if err != nil {
return nil, err
}
// 解析每个 case 的标签和属性过滤器
for i := range result.Cases {
if len(result.Cases[i].LabelKeys) > 0 {
// Deep copy to avoid concurrent map writes on cached objects
labelKeysCopy := make([]models.TagFilter, len(result.Cases[i].LabelKeys))
copy(labelKeysCopy, result.Cases[i].LabelKeys)
for j := range labelKeysCopy {
if labelKeysCopy[j].Func == "" {
labelKeysCopy[j].Func = labelKeysCopy[j].Op
}
}
result.Cases[i].parsedLabelKeys, err = models.ParseTagFilter(labelKeysCopy)
if err != nil {
return nil, fmt.Errorf("failed to parse label_keys for case[%d]: %v", i, err)
}
}
if len(result.Cases[i].Attributes) > 0 {
// Deep copy to avoid concurrent map writes on cached objects
attributesCopy := make([]models.TagFilter, len(result.Cases[i].Attributes))
copy(attributesCopy, result.Cases[i].Attributes)
for j := range attributesCopy {
if attributesCopy[j].Func == "" {
attributesCopy[j].Func = attributesCopy[j].Op
}
}
result.Cases[i].parsedAttributes, err = models.ParseTagFilter(attributesCopy)
if err != nil {
return nil, fmt.Errorf("failed to parse attributes for case[%d]: %v", i, err)
}
}
}
return result, nil
}
// Process 实现 Processor 接口(兼容旧模式)
func (c *SwitchConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
index, caseName, err := c.evaluateCases(wfCtx)
if err != nil {
return wfCtx, "", fmt.Errorf("switch processor: failed to evaluate cases: %v", err)
}
if index >= 0 {
if caseName != "" {
return wfCtx, fmt.Sprintf("matched case[%d]: %s", index, caseName), nil
}
return wfCtx, fmt.Sprintf("matched case[%d]", index), nil
}
// 走默认分支(最后一个输出)
return wfCtx, "no case matched, using default branch", nil
}
// ProcessWithBranch 实现 BranchProcessor 接口
func (c *SwitchConfig) ProcessWithBranch(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.NodeOutput, error) {
index, caseName, err := c.evaluateCases(wfCtx)
if err != nil {
return nil, fmt.Errorf("switch processor: failed to evaluate cases: %v", err)
}
output := &models.NodeOutput{
WfCtx: wfCtx,
}
if index >= 0 {
output.BranchIndex = &index
if caseName != "" {
output.Message = fmt.Sprintf("matched case[%d]: %s", index, caseName)
} else {
output.Message = fmt.Sprintf("matched case[%d]", index)
}
} else {
// 默认分支的索引是 cases 数量(即最后一个输出端口)
defaultIndex := len(c.Cases)
output.BranchIndex = &defaultIndex
output.Message = "no case matched, using default branch"
}
return output, nil
}
// evaluateCases 评估所有分支条件
// 返回匹配的分支索引和分支名称,如果没有匹配返回 -1
func (c *SwitchConfig) evaluateCases(wfCtx *models.WorkflowContext) (int, string, error) {
for i := range c.Cases {
matched, err := c.evaluateCaseCondition(&c.Cases[i], wfCtx)
if err != nil {
return -1, "", fmt.Errorf("case[%d] evaluation error: %v", i, err)
}
if matched {
return i, c.Cases[i].Name, nil
}
}
return -1, "", nil
}
// evaluateCaseCondition 评估单个分支条件
func (c *SwitchConfig) evaluateCaseCondition(caseItem *SwitchCase, wfCtx *models.WorkflowContext) (bool, error) {
mode := caseItem.Mode
if mode == "" {
mode = ConditionModeExpression // 默认表达式模式
}
switch mode {
case ConditionModeTags:
return c.evaluateTagsCondition(caseItem, wfCtx.Event)
default:
return c.evaluateExpressionCondition(caseItem.Condition, wfCtx)
}
}
// evaluateExpressionCondition 评估表达式条件
func (c *SwitchConfig) evaluateExpressionCondition(condition string, wfCtx *models.WorkflowContext) (bool, error) {
if condition == "" {
return false, nil
}
var defs = []string{
"{{ $event := .Event }}",
"{{ $labels := .Event.TagsMap }}",
"{{ $value := .Event.TriggerValue }}",
"{{ $env := .Env }}",
}
text := strings.Join(append(defs, condition), "")
tpl, err := template.New("switch_condition").Funcs(tplx.TemplateFuncMap).Parse(text)
if err != nil {
return false, err
}
var buf bytes.Buffer
if err = tpl.Execute(&buf, wfCtx); err != nil {
return false, err
}
result := strings.TrimSpace(strings.ToLower(buf.String()))
return result == "true" || result == "1", nil
}
// evaluateTagsCondition 评估标签/属性条件
func (c *SwitchConfig) evaluateTagsCondition(caseItem *SwitchCase, event *models.AlertCurEvent) (bool, error) {
// 如果没有配置任何过滤条件,默认返回 false不匹配
if len(caseItem.parsedLabelKeys) == 0 && len(caseItem.parsedAttributes) == 0 {
return false, nil
}
// 匹配标签 (TagsMap)
if len(caseItem.parsedLabelKeys) > 0 {
tagsMap := event.TagsMap
if tagsMap == nil {
tagsMap = make(map[string]string)
}
if !alertCommon.MatchTags(tagsMap, caseItem.parsedLabelKeys) {
return false, nil
}
}
// 匹配属性 (JsonTagsAndValue - 所有 JSON 字段)
if len(caseItem.parsedAttributes) > 0 {
attributesMap := event.JsonTagsAndValue()
if !alertCommon.MatchTags(attributesMap, caseItem.parsedAttributes) {
return false, nil
}
}
return true, nil
}

View File

@@ -0,0 +1,107 @@
package relabel
import (
"fmt"
"regexp"
"strings"
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/common"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pushgw/pconf"
"github.com/ccfos/nightingale/v6/pushgw/writer"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/prompb"
)
const (
REPLACE_DOT = "___"
)
// RelabelConfig
type RelabelConfig struct {
SourceLabels []string `json:"source_labels"`
Separator string `json:"separator"`
Regex string `json:"regex"`
RegexCompiled *regexp.Regexp
If string `json:"if"`
IfRegex *regexp.Regexp
Modulus uint64 `json:"modulus"`
TargetLabel string `json:"target_label"`
Replacement string `json:"replacement"`
Action string `json:"action"`
}
func init() {
models.RegisterProcessor("relabel", &RelabelConfig{})
}
func (r *RelabelConfig) Init(settings interface{}) (models.Processor, error) {
result, err := common.InitProcessor[*RelabelConfig](settings)
return result, err
}
func (r *RelabelConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
sourceLabels := make([]model.LabelName, len(r.SourceLabels))
for i := range r.SourceLabels {
sourceLabels[i] = model.LabelName(strings.ReplaceAll(r.SourceLabels[i], ".", REPLACE_DOT))
}
relabelConfigs := []*pconf.RelabelConfig{
{
SourceLabels: sourceLabels,
Separator: r.Separator,
Regex: r.Regex,
RegexCompiled: r.RegexCompiled,
If: r.If,
IfRegex: r.IfRegex,
Modulus: r.Modulus,
TargetLabel: r.TargetLabel,
Replacement: r.Replacement,
Action: r.Action,
},
}
EventRelabel(wfCtx.Event, relabelConfigs)
return wfCtx, "", nil
}
func EventRelabel(event *models.AlertCurEvent, relabelConfigs []*pconf.RelabelConfig) {
labels := make([]prompb.Label, len(event.TagsJSON))
event.OriginalTagsJSON = make([]string, len(event.TagsJSON))
for i, tag := range event.TagsJSON {
label := strings.SplitN(tag, "=", 2)
if len(label) != 2 {
continue
}
event.OriginalTagsJSON[i] = tag
label[0] = strings.ReplaceAll(string(label[0]), ".", REPLACE_DOT)
labels[i] = prompb.Label{Name: label[0], Value: label[1]}
}
for i := 0; i < len(relabelConfigs); i++ {
if relabelConfigs[i].Replacement == "" {
relabelConfigs[i].Replacement = "$1"
}
if relabelConfigs[i].Separator == "" {
relabelConfigs[i].Separator = ";"
}
if relabelConfigs[i].Regex == "" {
relabelConfigs[i].Regex = "(.*)"
}
}
gotLabels := writer.Process(labels, relabelConfigs...)
event.TagsJSON = make([]string, len(gotLabels))
event.TagsMap = make(map[string]string, len(gotLabels))
for i, label := range gotLabels {
label.Name = strings.ReplaceAll(string(label.Name), REPLACE_DOT, ".")
event.TagsJSON[i] = fmt.Sprintf("%s=%s", label.Name, label.Value)
event.TagsMap[label.Name] = label.Value
}
event.Tags = strings.Join(event.TagsJSON, ",,")
}

View File

@@ -2,6 +2,7 @@ package process
import (
"bytes"
"encoding/json"
"fmt"
"html/template"
"sort"
@@ -13,18 +14,18 @@ import (
"github.com/ccfos/nightingale/v6/alert/common"
"github.com/ccfos/nightingale/v6/alert/dispatch"
"github.com/ccfos/nightingale/v6/alert/mute"
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/relabel"
"github.com/ccfos/nightingale/v6/alert/queue"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/tplx"
"github.com/ccfos/nightingale/v6/prom"
"github.com/robfig/cron/v3"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/str"
)
type EventMuteHookFunc func(event *models.AlertCurEvent) bool
type ExternalProcessorsType struct {
ExternalLock sync.RWMutex
Processors map[string]*Processor
@@ -49,31 +50,33 @@ type HandleEventFunc func(event *models.AlertCurEvent)
type Processor struct {
datasourceId int64
EngineName string
rule *models.AlertRule
fires *AlertCurEventMap
pendings *AlertCurEventMap
inhibit bool
rule *models.AlertRule
fires *AlertCurEventMap
pendings *AlertCurEventMap
pendingsUseByRecover *AlertCurEventMap
inhibit bool
tagsMap map[string]string
tagsArr []string
target string
targetNote string
groupName string
tagsMap map[string]string
tagsArr []string
groupName string
atertRuleCache *memsto.AlertRuleCacheType
TargetCache *memsto.TargetCacheType
BusiGroupCache *memsto.BusiGroupCacheType
alertMuteCache *memsto.AlertMuteCacheType
datasourceCache *memsto.DatasourceCacheType
alertRuleCache *memsto.AlertRuleCacheType
TargetCache *memsto.TargetCacheType
TargetsOfAlertRuleCache *memsto.TargetsOfAlertRuleCacheType
BusiGroupCache *memsto.BusiGroupCacheType
alertMuteCache *memsto.AlertMuteCacheType
datasourceCache *memsto.DatasourceCacheType
promClients *prom.PromClientMap
ctx *ctx.Context
Stats *astats.Stats
ctx *ctx.Context
Stats *astats.Stats
HandleFireEventHook HandleEventFunc
HandleRecoverEventHook HandleEventFunc
EventMuteHook EventMuteHookFunc
ScheduleEntry cron.Entry
PromEvalInterval int
}
func (p *Processor) Key() string {
@@ -85,50 +88,57 @@ func (p *Processor) DatasourceId() int64 {
}
func (p *Processor) Hash() string {
return str.MD5(fmt.Sprintf("%d_%d_%s_%d",
return str.MD5(fmt.Sprintf("%d_%s_%s_%d",
p.rule.Id,
p.rule.PromEvalInterval,
p.rule.CronPattern,
p.rule.RuleConfig,
p.datasourceId,
))
}
func NewProcessor(rule *models.AlertRule, datasourceId int64, atertRuleCache *memsto.AlertRuleCacheType, targetCache *memsto.TargetCacheType,
func NewProcessor(engineName string, rule *models.AlertRule, datasourceId int64, alertRuleCache *memsto.AlertRuleCacheType,
targetCache *memsto.TargetCacheType, targetsOfAlertRuleCache *memsto.TargetsOfAlertRuleCacheType,
busiGroupCache *memsto.BusiGroupCacheType, alertMuteCache *memsto.AlertMuteCacheType, datasourceCache *memsto.DatasourceCacheType, ctx *ctx.Context,
stats *astats.Stats) *Processor {
p := &Processor{
EngineName: engineName,
datasourceId: datasourceId,
rule: rule,
TargetCache: targetCache,
BusiGroupCache: busiGroupCache,
alertMuteCache: alertMuteCache,
atertRuleCache: atertRuleCache,
datasourceCache: datasourceCache,
TargetCache: targetCache,
TargetsOfAlertRuleCache: targetsOfAlertRuleCache,
BusiGroupCache: busiGroupCache,
alertMuteCache: alertMuteCache,
alertRuleCache: alertRuleCache,
datasourceCache: datasourceCache,
ctx: ctx,
Stats: stats,
HandleFireEventHook: func(event *models.AlertCurEvent) {},
HandleRecoverEventHook: func(event *models.AlertCurEvent) {},
EventMuteHook: func(event *models.AlertCurEvent) bool { return false },
}
p.mayHandleGroup()
return p
}
func (p *Processor) Handle(anomalyPoints []common.AnomalyPoint, from string, inhibit bool) {
func (p *Processor) Handle(anomalyPoints []models.AnomalyPoint, from string, inhibit bool) {
// 有可能rule的一些配置已经发生变化比如告警接收人、callbacks等
// 这些信息的修改是不会引起worker restart的但是确实会影响告警处理逻辑
// 所以这里直接从memsto.AlertRuleCache中获取并覆盖
p.inhibit = inhibit
cachedRule := p.atertRuleCache.Get(p.rule.Id)
cachedRule := p.alertRuleCache.Get(p.rule.Id)
if cachedRule == nil {
logger.Errorf("rule not found %+v", anomalyPoints)
logger.Warningf("process handle error: rule not found %+v rule_id:%d maybe rule has been deleted", anomalyPoints, p.rule.Id)
p.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", p.DatasourceId()), "handle_event", p.BusiGroupCache.GetNameByBusiGroupId(p.rule.GroupId), fmt.Sprintf("%v", p.rule.Id)).Inc()
return
}
// 在 rule 变化之前取到 ruleHash
ruleHash := p.rule.Hash()
p.rule = cachedRule
now := time.Now().Unix()
alertingKeys := map[string]struct{}{}
@@ -136,17 +146,41 @@ func (p *Processor) Handle(anomalyPoints []common.AnomalyPoint, from string, inh
// 根据 event 的 tag 将 events 分组,处理告警抑制的情况
eventsMap := make(map[string][]*models.AlertCurEvent)
for _, anomalyPoint := range anomalyPoints {
event := p.BuildEvent(anomalyPoint, from, now)
event := p.BuildEvent(anomalyPoint, from, now, ruleHash)
event.NotifyRuleIds = cachedRule.NotifyRuleIds
// 如果 event 被 mute 了,本质也是 fire 的状态,这里无论如何都添加到 alertingKeys 中,防止 fire 的事件自动恢复了
hash := event.Hash
alertingKeys[hash] = struct{}{}
if mute.IsMuted(cachedRule, event, p.TargetCache, p.alertMuteCache) {
p.Stats.CounterMuteTotal.WithLabelValues(event.GroupName).Inc()
logger.Debugf("rule_eval:%s event:%v is muted", p.Key(), event)
// event processor
eventCopy := event.DeepCopy()
event = dispatch.HandleEventPipeline(cachedRule.PipelineConfigs, eventCopy, event, dispatch.EventProcessorCache, p.ctx, cachedRule.Id, "alert_rule")
if event == nil {
logger.Infof("rule_eval:%s is muted drop by pipeline event:%v", p.Key(), eventCopy)
continue
}
if p.EventMuteHook(event) {
// event mute
isMuted, detail, muteId := mute.IsMuted(cachedRule, event, p.TargetCache, p.alertMuteCache)
if isMuted {
logger.Infof("rule_eval:%s is muted, detail:%s event:%v", p.Key(), detail, event)
p.Stats.CounterMuteTotal.WithLabelValues(
fmt.Sprintf("%v", event.GroupName),
fmt.Sprintf("%v", p.rule.Id),
fmt.Sprintf("%v", muteId),
fmt.Sprintf("%v", p.datasourceId),
).Inc()
continue
}
if dispatch.EventMuteHook(event) {
logger.Infof("rule_eval:%s is muted by hook event:%v", p.Key(), event)
p.Stats.CounterMuteTotal.WithLabelValues(
fmt.Sprintf("%v", event.GroupName),
fmt.Sprintf("%v", p.rule.Id),
fmt.Sprintf("%v", 0),
fmt.Sprintf("%v", p.datasourceId),
).Inc()
continue
}
@@ -158,12 +192,14 @@ func (p *Processor) Handle(anomalyPoints []common.AnomalyPoint, from string, inh
p.handleEvent(events)
}
p.HandleRecover(alertingKeys, now)
if from == "inner" {
p.HandleRecover(alertingKeys, now, inhibit)
}
}
func (p *Processor) BuildEvent(anomalyPoint common.AnomalyPoint, from string, now int64) *models.AlertCurEvent {
func (p *Processor) BuildEvent(anomalyPoint models.AnomalyPoint, from string, now int64, ruleHash string) *models.AlertCurEvent {
p.fillTags(anomalyPoint)
p.mayHandleIdent()
hash := Hash(p.rule.Id, p.datasourceId, anomalyPoint)
ds := p.datasourceCache.GetById(p.datasourceId)
var dsName string
@@ -172,37 +208,94 @@ func (p *Processor) BuildEvent(anomalyPoint common.AnomalyPoint, from string, no
}
event := p.rule.GenerateNewEvent(p.ctx)
bg := p.BusiGroupCache.GetByBusiGroupId(p.rule.GroupId)
if bg != nil {
event.GroupName = bg.Name
}
event.TriggerTime = anomalyPoint.Timestamp
event.TagsMap = p.tagsMap
event.DatasourceId = p.datasourceId
event.Cluster = dsName
event.Hash = hash
event.TargetIdent = p.target
event.TargetNote = p.targetNote
event.TriggerValue = anomalyPoint.ReadableValue()
event.TriggerValues = anomalyPoint.Values
event.TriggerValuesJson = models.EventTriggerValues{ValuesWithUnit: anomalyPoint.ValuesUnit}
event.TagsJSON = p.tagsArr
event.GroupName = p.groupName
event.Tags = strings.Join(p.tagsArr, ",,")
event.IsRecovered = false
event.Callbacks = p.rule.Callbacks
event.CallbacksJSON = p.rule.CallbacksJSON
event.Annotations = p.rule.Annotations
event.AnnotationsJSON = make(map[string]string)
event.RuleConfig = p.rule.RuleConfig
event.RuleConfigJson = p.rule.RuleConfigJson
event.Severity = anomalyPoint.Severity
event.ExtraConfig = p.rule.ExtraConfigJSON
event.PromQl = anomalyPoint.Query
event.RecoverConfig = anomalyPoint.RecoverConfig
event.RuleHash = ruleHash
if anomalyPoint.TriggerType == models.TriggerTypeNodata {
event.TriggerValue = "nodata"
ruleConfig := models.RuleQuery{}
json.Unmarshal([]byte(p.rule.RuleConfig), &ruleConfig)
ruleConfig.TriggerType = anomalyPoint.TriggerType
b, _ := json.Marshal(ruleConfig)
event.RuleConfig = string(b)
}
if err := json.Unmarshal([]byte(p.rule.Annotations), &event.AnnotationsJSON); err != nil {
event.AnnotationsJSON = make(map[string]string) // 解析失败时使用空 map
logger.Warningf("unmarshal annotations json failed: %v, rule: %d", err, p.rule.Id)
}
if event.TriggerValues != "" && strings.Count(event.TriggerValues, "$") > 1 {
// TriggerValues 有多个变量,将多个变量都放到 TriggerValue 中
event.TriggerValue = event.TriggerValues
}
if from == "inner" {
event.LastEvalTime = now
} else {
event.LastEvalTime = event.TriggerTime
}
// 生成事件之后,立马进程 relabel 处理
Relabel(p.rule, event)
// 放到 Relabel(p.rule, event) 下面,为了处理 relabel 之后,标签里才出现 ident 的情况
p.mayHandleIdent(event)
if event.TargetIdent != "" {
if pt, exist := p.TargetCache.Get(event.TargetIdent); exist {
pt.GroupNames = p.BusiGroupCache.GetNamesByBusiGroupIds(pt.GroupIds)
event.Target = pt
} else {
logger.Infof("fill event target error, ident: %s doesn't exist in cache.", event.TargetIdent)
}
}
return event
}
func (p *Processor) HandleRecover(alertingKeys map[string]struct{}, now int64) {
func Relabel(rule *models.AlertRule, event *models.AlertCurEvent) {
if rule == nil {
return
}
// need to keep the original label
event.OriginalTags = event.Tags
event.OriginalTagsJSON = event.TagsJSON
if len(rule.EventRelabelConfig) == 0 {
return
}
relabel.EventRelabel(event, rule.EventRelabelConfig)
}
func (p *Processor) HandleRecover(alertingKeys map[string]struct{}, now int64, inhibit bool) {
for _, hash := range p.pendings.Keys() {
if _, has := alertingKeys[hash]; has {
continue
@@ -210,36 +303,102 @@ func (p *Processor) HandleRecover(alertingKeys map[string]struct{}, now int64) {
p.pendings.Delete(hash)
}
for hash := range p.fires.GetAll() {
hashArr := make([]string, 0, len(alertingKeys))
for hash, _ := range p.fires.GetAll() {
if _, has := alertingKeys[hash]; has {
continue
}
p.RecoverSingle(hash, now, nil)
hashArr = append(hashArr, hash)
}
p.HandleRecoverEvent(hashArr, now, inhibit)
}
func (p *Processor) RecoverSingle(hash string, now int64, value *string) {
func (p *Processor) HandleRecoverEvent(hashArr []string, now int64, inhibit bool) {
cachedRule := p.rule
if cachedRule == nil {
return
}
if !inhibit {
for _, hash := range hashArr {
p.RecoverSingle(false, hash, now, nil)
}
return
}
eventMap := make(map[string]models.AlertCurEvent)
for _, hash := range hashArr {
event, has := p.fires.Get(hash)
if !has {
continue
}
e, exists := eventMap[event.Tags]
if !exists {
eventMap[event.Tags] = *event
continue
}
if e.Severity > event.Severity {
// hash 对应的恢复事件的被抑制了,把之前的事件删除
p.fires.Delete(e.Hash)
p.pendings.Delete(e.Hash)
models.AlertCurEventDelByHash(p.ctx, e.Hash)
eventMap[event.Tags] = *event
}
}
for _, event := range eventMap {
p.RecoverSingle(false, event.Hash, now, nil)
}
}
func (p *Processor) RecoverSingle(byRecover bool, hash string, now int64, value *string, values ...string) {
cachedRule := p.rule
if cachedRule == nil {
return
}
event, has := p.fires.Get(hash)
if !has {
return
}
// 如果配置了留观时长,就不能立马恢复了
if cachedRule.RecoverDuration > 0 && now-event.LastEvalTime < cachedRule.RecoverDuration {
if cachedRule.RecoverDuration > 0 {
lastPendingEvent, has := p.pendingsUseByRecover.Get(hash)
if !has {
// 说明没有产生过异常点,就不需要恢复了
logger.Debugf("rule_eval:%s event:%v do not has pending event, not recover", p.Key(), event)
return
}
if now-lastPendingEvent.LastEvalTime < cachedRule.RecoverDuration {
logger.Debugf("rule_eval:%s event:%v not recover", p.Key(), event)
return
}
}
// 如果设置了恢复条件,则不能在此处恢复,必须依靠 recoverPoint 来恢复
if event.RecoverConfig.JudgeType != models.Origin && !byRecover {
logger.Debugf("rule_eval:%s event:%v not recover", p.Key(), event)
return
}
if value != nil {
event.TriggerValue = *value
if len(values) > 0 {
event.TriggerValues = values[0]
}
}
// 没查到触发阈值的vector姑且就认为这个vector的值恢复了
// 我确实无法分辨是prom中有值但是未满足阈值所以没返回还是prom中确实丢了一些点导致没有数据可以返回尴尬
p.fires.Delete(hash)
p.pendings.Delete(hash)
p.pendingsUseByRecover.Delete(hash)
// 可能是因为调整了promql才恢复的所以事件里边要体现最新的promql否则用户会比较困惑
// 当然其实rule的各个字段都可能发生变化了都更新一下吧
@@ -253,12 +412,20 @@ func (p *Processor) RecoverSingle(hash string, now int64, value *string) {
func (p *Processor) handleEvent(events []*models.AlertCurEvent) {
var fireEvents []*models.AlertCurEvent
// severity 初始为 4, 一定为遇到比自己优先级高的事件
severity := 4
// severity 初始为最低优先级, 一定为遇到比自己优先级高的事件
severity := models.SeverityLowest
for _, event := range events {
if event == nil {
continue
}
if _, has := p.pendingsUseByRecover.Get(event.Hash); has {
p.pendingsUseByRecover.UpdateLastEvalTime(event.Hash, event.LastEvalTime)
} else {
p.pendingsUseByRecover.Set(event.Hash, event)
}
event.PromEvalInterval = p.PromEvalInterval
if p.rule.PromForDuration == 0 {
fireEvents = append(fireEvents, event)
if severity > event.Severity {
@@ -267,17 +434,18 @@ func (p *Processor) handleEvent(events []*models.AlertCurEvent) {
continue
}
var preTriggerTime int64
var preEvalTime int64 // 第一个 pending event 的检测时间
preEvent, has := p.pendings.Get(event.Hash)
if has {
p.pendings.UpdateLastEvalTime(event.Hash, event.LastEvalTime)
preTriggerTime = preEvent.TriggerTime
preEvalTime = preEvent.FirstEvalTime
} else {
event.FirstEvalTime = event.LastEvalTime
p.pendings.Set(event.Hash, event)
preTriggerTime = event.TriggerTime
preEvalTime = event.FirstEvalTime
}
if event.LastEvalTime-preTriggerTime+int64(event.PromEvalInterval) >= int64(p.rule.PromForDuration) {
if event.LastEvalTime-preEvalTime+int64(event.PromEvalInterval) >= int64(p.rule.PromForDuration) {
fireEvents = append(fireEvents, event)
if severity > event.Severity {
severity = event.Severity
@@ -306,16 +474,18 @@ func (p *Processor) fireEvent(event *models.AlertCurEvent) {
return
}
logger.Debugf("rule_eval:%s event:%+v fire", p.Key(), event)
message := "unknown"
defer func() {
logger.Infof("rule_eval:%s event-hash-%s %s", p.Key(), event.Hash, message)
}()
if fired, has := p.fires.Get(event.Hash); has {
p.fires.UpdateLastEvalTime(event.Hash, event.LastEvalTime)
event.FirstTriggerTime = fired.FirstTriggerTime
p.HandleFireEventHook(event)
if cachedRule.NotifyRepeatStep == 0 {
logger.Debugf("rule_eval:%s event:%+v repeat is zero nothing to do", p.Key(), event)
// 说明不想重复通知那就直接返回了nothing to do
// do not need to send alert again
message = "stalled, rule.notify_repeat_step is 0, no need to repeat notify"
return
}
@@ -324,21 +494,26 @@ func (p *Processor) fireEvent(event *models.AlertCurEvent) {
if cachedRule.NotifyMaxNumber == 0 {
// 最大可以发送次数如果是0表示不想限制最大发送次数一直发即可
event.NotifyCurNumber = fired.NotifyCurNumber + 1
message = fmt.Sprintf("fired, notify_repeat_step_matched(%d >= %d + %d * 60) notify_max_number_ignore(#%d / %d)", event.LastEvalTime, fired.LastSentTime, cachedRule.NotifyRepeatStep, event.NotifyCurNumber, cachedRule.NotifyMaxNumber)
p.pushEventToQueue(event)
} else {
// 有最大发送次数的限制,就要看已经发了几次了,是否达到了最大发送次数
if fired.NotifyCurNumber >= cachedRule.NotifyMaxNumber {
logger.Debugf("rule_eval:%s event:%+v reach max number", p.Key(), event)
message = fmt.Sprintf("stalled, notify_repeat_step_matched(%d >= %d + %d * 60) notify_max_number_not_matched(#%d / %d)", event.LastEvalTime, fired.LastSentTime, cachedRule.NotifyRepeatStep, fired.NotifyCurNumber, cachedRule.NotifyMaxNumber)
return
} else {
event.NotifyCurNumber = fired.NotifyCurNumber + 1
message = fmt.Sprintf("fired, notify_repeat_step_matched(%d >= %d + %d * 60) notify_max_number_matched(#%d / %d)", event.LastEvalTime, fired.LastSentTime, cachedRule.NotifyRepeatStep, event.NotifyCurNumber, cachedRule.NotifyMaxNumber)
p.pushEventToQueue(event)
}
}
} else {
message = fmt.Sprintf("stalled, notify_repeat_step_not_matched(%d < %d + %d * 60)", event.LastEvalTime, fired.LastSentTime, cachedRule.NotifyRepeatStep)
}
} else {
event.NotifyCurNumber = 1
event.FirstTriggerTime = event.TriggerTime
message = fmt.Sprintf("fired, first_trigger_time: %d", event.FirstTriggerTime)
p.HandleFireEventHook(event)
p.pushEventToQueue(event)
}
@@ -353,29 +528,58 @@ func (p *Processor) pushEventToQueue(e *models.AlertCurEvent) {
dispatch.LogEvent(e, "push_queue")
if !queue.EventQueue.PushFront(e) {
logger.Warningf("event_push_queue: queue is full, event:%+v", e)
p.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", p.DatasourceId()), "push_event_queue", p.BusiGroupCache.GetNameByBusiGroupId(p.rule.GroupId), fmt.Sprintf("%v", p.rule.Id)).Inc()
}
}
func (p *Processor) RecoverAlertCurEventFromDb() {
p.pendings = NewAlertCurEventMap(nil)
p.pendingsUseByRecover = NewAlertCurEventMap(nil)
curEvents, err := models.AlertCurEventGetByRuleIdAndDsId(p.ctx, p.rule.Id, p.datasourceId)
if err != nil {
logger.Errorf("recover event from db for rule:%s failed, err:%s", p.Key(), err)
p.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", p.DatasourceId()), "get_recover_event", p.BusiGroupCache.GetNameByBusiGroupId(p.rule.GroupId), fmt.Sprintf("%v", p.rule.Id)).Inc()
p.fires = NewAlertCurEventMap(nil)
return
}
fireMap := make(map[string]*models.AlertCurEvent)
pendingsUseByRecoverMap := make(map[string]*models.AlertCurEvent)
for _, event := range curEvents {
alertRule := p.alertRuleCache.Get(event.RuleId)
if alertRule == nil {
continue
}
event.NotifyRuleIds = alertRule.NotifyRuleIds
if event.Cate == models.HOST {
target, exists := p.TargetCache.Get(event.TargetIdent)
if exists && target.EngineName != p.EngineName && !(p.ctx.IsCenter && target.EngineName == "") {
// 如果是 host rule且 target 的 engineName 不是当前的 engineName 或者是中心机房 target EngineName 为空,就跳过
continue
}
}
event.DB2Mem()
target, exists := p.TargetCache.Get(event.TargetIdent)
if exists {
target.GroupNames = p.BusiGroupCache.GetNamesByBusiGroupIds(target.GroupIds)
event.Target = target
}
fireMap[event.Hash] = event
e := *event
pendingsUseByRecoverMap[event.Hash] = &e
}
p.fires = NewAlertCurEventMap(fireMap)
// 修改告警规则,或者进程重启之后,需要重新加载 pendingsUseByRecover
p.pendingsUseByRecover = NewAlertCurEventMap(pendingsUseByRecoverMap)
}
func (p *Processor) fillTags(anomalyPoint common.AnomalyPoint) {
func (p *Processor) fillTags(anomalyPoint models.AnomalyPoint) {
// handle series tags
tagsMap := make(map[string]string)
for label, value := range anomalyPoint.Labels {
@@ -387,7 +591,9 @@ func (p *Processor) fillTags(anomalyPoint common.AnomalyPoint) {
}
// handle rule tags
for _, tag := range p.rule.AppendTagsJSON {
tags := p.rule.AppendTagsJSON
tags = append(tags, "rulename="+p.rule.Name)
for _, tag := range tags {
arr := strings.SplitN(tag, "=", 2)
var defs = []string{
@@ -413,27 +619,25 @@ func (p *Processor) fillTags(anomalyPoint common.AnomalyPoint) {
tagsMap[arr[0]] = body.String()
}
tagsMap["rulename"] = p.rule.Name
p.tagsMap = tagsMap
// handle tagsArr
p.tagsArr = labelMapToArr(tagsMap)
}
func (p *Processor) mayHandleIdent() {
func (p *Processor) mayHandleIdent(event *models.AlertCurEvent) {
// handle ident
if ident, has := p.tagsMap["ident"]; has {
if ident, has := event.TagsMap["ident"]; has {
if target, exists := p.TargetCache.Get(ident); exists {
p.target = target.Ident
p.targetNote = target.Note
event.TargetIdent = target.Ident
event.TargetNote = target.Note
} else {
p.target = ident
p.targetNote = ""
event.TargetIdent = ident
event.TargetNote = ""
}
} else {
p.target = ""
p.targetNote = ""
event.TargetIdent = ""
event.TargetNote = ""
}
}
@@ -445,6 +649,12 @@ func (p *Processor) mayHandleGroup() {
}
}
func (p *Processor) DeleteProcessEvent(hash string) {
p.fires.Delete(hash)
p.pendings.Delete(hash)
p.pendingsUseByRecover.Delete(hash)
}
func labelMapToArr(m map[string]string) []string {
numLabels := len(m)
@@ -459,10 +669,10 @@ func labelMapToArr(m map[string]string) []string {
return labelStrings
}
func Hash(ruleId, datasourceId int64, vector common.AnomalyPoint) string {
func Hash(ruleId, datasourceId int64, vector models.AnomalyPoint) string {
return str.MD5(fmt.Sprintf("%d_%s_%d_%d_%s", ruleId, vector.Labels.String(), datasourceId, vector.Severity, vector.Query))
}
func TagHash(vector common.AnomalyPoint) string {
func TagHash(vector models.AnomalyPoint) string {
return str.MD5(vector.Labels.String())
}

View File

@@ -10,6 +10,7 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/prom"
"github.com/ccfos/nightingale/v6/pushgw/writer"
"github.com/robfig/cron/v3"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/str"
@@ -19,19 +20,35 @@ type RecordRuleContext struct {
datasourceId int64
quit chan struct{}
scheduler *cron.Cron
rule *models.RecordingRule
promClients *prom.PromClientMap
stats *astats.Stats
}
func NewRecordRuleContext(rule *models.RecordingRule, datasourceId int64, promClients *prom.PromClientMap, writers *writer.WritersType, stats *astats.Stats) *RecordRuleContext {
return &RecordRuleContext{
rrc := &RecordRuleContext{
datasourceId: datasourceId,
quit: make(chan struct{}),
rule: rule,
promClients: promClients,
stats: stats,
}
if rule.CronPattern == "" && rule.PromEvalInterval != 0 {
rule.CronPattern = fmt.Sprintf("@every %ds", rule.PromEvalInterval)
}
rrc.scheduler = cron.New(cron.WithSeconds(), cron.WithChain(cron.SkipIfStillRunning(cron.DefaultLogger)))
_, err := rrc.scheduler.AddFunc(rule.CronPattern, func() {
rrc.Eval()
})
if err != nil {
logger.Errorf("add cron pattern error: %v", err)
}
return rrc
}
func (rrc *RecordRuleContext) Key() string {
@@ -39,11 +56,13 @@ func (rrc *RecordRuleContext) Key() string {
}
func (rrc *RecordRuleContext) Hash() string {
return str.MD5(fmt.Sprintf("%d_%d_%s_%d",
return str.MD5(fmt.Sprintf("%d_%s_%s_%d_%s_%s",
rrc.rule.Id,
rrc.rule.PromEvalInterval,
rrc.rule.CronPattern,
rrc.rule.PromQl,
rrc.datasourceId,
rrc.rule.AppendTags,
rrc.rule.Name,
))
}
@@ -51,27 +70,11 @@ func (rrc *RecordRuleContext) Prepare() {}
func (rrc *RecordRuleContext) Start() {
logger.Infof("eval:%s started", rrc.Key())
interval := rrc.rule.PromEvalInterval
if interval <= 0 {
interval = 10
}
ticker := time.NewTicker(time.Duration(interval) * time.Second)
go func() {
defer ticker.Stop()
for {
select {
case <-rrc.quit:
return
case <-ticker.C:
rrc.Eval()
}
}
}()
rrc.scheduler.Start()
}
func (rrc *RecordRuleContext) Eval() {
rrc.stats.CounterRecordEval.WithLabelValues().Inc()
rrc.stats.CounterRecordEval.WithLabelValues(fmt.Sprintf("%d", rrc.datasourceId)).Inc()
promql := strings.TrimSpace(rrc.rule.PromQl)
if promql == "" {
logger.Errorf("eval:%s promql is blank", rrc.Key())
@@ -80,30 +83,37 @@ func (rrc *RecordRuleContext) Eval() {
if rrc.promClients.IsNil(rrc.datasourceId) {
logger.Errorf("eval:%s reader client is nil", rrc.Key())
rrc.stats.CounterRecordEvalErrorTotal.WithLabelValues().Inc()
rrc.stats.CounterRecordEvalErrorTotal.WithLabelValues(fmt.Sprintf("%d", rrc.datasourceId)).Inc()
return
}
value, warnings, err := rrc.promClients.GetCli(rrc.datasourceId).Query(context.Background(), promql, time.Now())
if err != nil {
logger.Errorf("eval:%s promql:%s, error:%v", rrc.Key(), promql, err)
rrc.stats.CounterRecordEvalErrorTotal.WithLabelValues().Inc()
rrc.stats.CounterRecordEvalErrorTotal.WithLabelValues(fmt.Sprintf("%d", rrc.datasourceId)).Inc()
return
}
if len(warnings) > 0 {
logger.Errorf("eval:%s promql:%s, warnings:%v", rrc.Key(), promql, warnings)
rrc.stats.CounterRecordEvalErrorTotal.WithLabelValues().Inc()
rrc.stats.CounterRecordEvalErrorTotal.WithLabelValues(fmt.Sprintf("%d", rrc.datasourceId)).Inc()
return
}
ts := ConvertToTimeSeries(value, rrc.rule)
if len(ts) != 0 {
rrc.promClients.GetWriterCli(rrc.datasourceId).Write(ts)
err := rrc.promClients.GetWriterCli(rrc.datasourceId).Write(ts)
if err != nil {
logger.Errorf("eval:%s promql:%s, error:%v", rrc.Key(), promql, err)
rrc.stats.CounterRecordEvalErrorTotal.WithLabelValues(fmt.Sprintf("%d", rrc.datasourceId)).Inc()
}
}
}
func (rrc *RecordRuleContext) Stop() {
logger.Infof("%s stopped", rrc.Key())
c := rrc.scheduler.Stop()
<-c.Done()
close(rrc.quit)
}

View File

@@ -3,6 +3,7 @@ package record
import (
"context"
"fmt"
"strconv"
"time"
"github.com/ccfos/nightingale/v6/alert/aconf"
@@ -25,9 +26,11 @@ type Scheduler struct {
writers *writer.WritersType
stats *astats.Stats
datasourceCache *memsto.DatasourceCacheType
}
func NewScheduler(aconf aconf.Alert, rrc *memsto.RecordingRuleCacheType, promClients *prom.PromClientMap, writers *writer.WritersType, stats *astats.Stats) *Scheduler {
func NewScheduler(aconf aconf.Alert, rrc *memsto.RecordingRuleCacheType, promClients *prom.PromClientMap, writers *writer.WritersType, stats *astats.Stats, datasourceCache *memsto.DatasourceCacheType) *Scheduler {
scheduler := &Scheduler{
aconf: aconf,
recordRules: make(map[string]*RecordRuleContext),
@@ -38,6 +41,8 @@ func NewScheduler(aconf aconf.Alert, rrc *memsto.RecordingRuleCacheType, promCli
writers: writers,
stats: stats,
datasourceCache: datasourceCache,
}
go scheduler.LoopSyncRules(context.Background())
@@ -66,9 +71,9 @@ func (s *Scheduler) syncRecordRules() {
continue
}
datasourceIds := s.promClients.Hit(rule.DatasourceIdsJson)
datasourceIds := s.datasourceCache.GetIDsByDsCateAndQueries("prometheus", rule.DatasourceQueries)
for _, dsId := range datasourceIds {
if !naming.DatasourceHashRing.IsHit(dsId, fmt.Sprintf("%d", rule.Id), s.aconf.Heartbeat.Endpoint) {
if !naming.DatasourceHashRing.IsHit(strconv.FormatInt(dsId, 10), fmt.Sprintf("%d", rule.Id), s.aconf.Heartbeat.Endpoint) {
continue
}

View File

@@ -2,10 +2,10 @@ package router
import (
"fmt"
"strconv"
"strings"
"time"
"github.com/ccfos/nightingale/v6/alert/common"
"github.com/ccfos/nightingale/v6/alert/dispatch"
"github.com/ccfos/nightingale/v6/alert/mute"
"github.com/ccfos/nightingale/v6/alert/naming"
@@ -25,6 +25,7 @@ func (rt *Router) pushEventToQueue(c *gin.Context) {
if event.RuleId == 0 {
ginx.Bomb(200, "event is illegal")
}
event.FE2DB()
event.TagsMap = make(map[string]string)
for i := 0; i < len(event.TagsJSON); i++ {
@@ -33,15 +34,15 @@ func (rt *Router) pushEventToQueue(c *gin.Context) {
continue
}
arr := strings.Split(pair, "=")
arr := strings.SplitN(pair, "=", 2)
if len(arr) != 2 {
continue
}
event.TagsMap[arr[0]] = arr[1]
}
if mute.EventMuteStrategy(event, rt.AlertMuteCache) {
hit, _ := mute.EventMuteStrategy(event, rt.AlertMuteCache)
if hit {
logger.Infof("event_muted: rule_id=%d %s", event.RuleId, event.Hash)
ginx.NewRender(c).Message(nil)
return
@@ -91,7 +92,7 @@ func (rt *Router) eventPersist(c *gin.Context) {
type eventForm struct {
Alert bool `json:"alert"`
AnomalyPoints []common.AnomalyPoint `json:"vectors"`
AnomalyPoints []models.AnomalyPoint `json:"vectors"`
RuleId int64 `json:"rule_id"`
DatasourceId int64 `json:"datasource_id"`
Inhibit bool `json:"inhibit"`
@@ -102,7 +103,7 @@ func (rt *Router) makeEvent(c *gin.Context) {
ginx.BindJSON(c, &events)
//now := time.Now().Unix()
for i := 0; i < len(events); i++ {
node, err := naming.DatasourceHashRing.GetNode(events[i].DatasourceId, fmt.Sprintf("%d", events[i].RuleId))
node, err := naming.DatasourceHashRing.GetNode(strconv.FormatInt(events[i].DatasourceId, 10), fmt.Sprintf("%d", events[i].RuleId))
if err != nil {
logger.Warningf("event:%+v get node err:%v", events[i], err)
ginx.Bomb(200, "event node not exists")
@@ -128,7 +129,7 @@ func (rt *Router) makeEvent(c *gin.Context) {
} else {
for _, vector := range events[i].AnomalyPoints {
readableString := vector.ReadableValue()
go ruleWorker.RecoverSingle(process.Hash(events[i].RuleId, events[i].DatasourceId, vector), vector.Timestamp, &readableString)
go ruleWorker.RecoverSingle(false, process.Hash(events[i].RuleId, events[i].DatasourceId, vector), vector.Timestamp, &readableString)
}
}
}

View File

@@ -1,64 +1,183 @@
package sender
import (
"encoding/json"
"strconv"
"fmt"
"html/template"
"net/url"
"strings"
"time"
"github.com/ccfos/nightingale/v6/alert/aconf"
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/ibex"
"github.com/ccfos/nightingale/v6/pkg/poster"
"github.com/toolkits/pkg/logger"
)
func SendCallbacks(ctx *ctx.Context, urls []string, event *models.AlertCurEvent, targetCache *memsto.TargetCacheType, userCache *memsto.UserCacheType,
ibexConf aconf.Ibex, stats *astats.Stats) {
for _, url := range urls {
if url == "" {
continue
}
type (
// CallBacker 进行回调的接口
CallBacker interface {
CallBack(ctx CallBackContext)
}
if strings.HasPrefix(url, "${ibex}") {
if !event.IsRecovered {
handleIbex(ctx, url, event, targetCache, userCache, ibexConf)
}
continue
}
// CallBackContext 回调时所需的上下文
CallBackContext struct {
Ctx *ctx.Context
CallBackURL string
Users []*models.User
Rule *models.AlertRule
Events []*models.AlertCurEvent
Stats *astats.Stats
BatchSend bool
}
if !(strings.HasPrefix(url, "http://") || strings.HasPrefix(url, "https://")) {
url = "http://" + url
}
DefaultCallBacker struct{}
)
stats.AlertNotifyTotal.WithLabelValues("rule_callback").Inc()
resp, code, err := poster.PostJSON(url, 5*time.Second, event, 3)
if err != nil {
logger.Errorf("event_callback_fail(rule_id=%d url=%s), resp: %s, err: %v, code: %d", event.RuleId, url, string(resp), err, code)
stats.AlertNotifyErrorTotal.WithLabelValues("rule_callback").Inc()
} else {
logger.Infof("event_callback_succ(rule_id=%d url=%s), resp: %s, code: %d", event.RuleId, url, string(resp), code)
}
func BuildCallBackContext(ctx *ctx.Context, callBackURL string, rule *models.AlertRule, events []*models.AlertCurEvent,
uids []int64, userCache *memsto.UserCacheType, batchSend bool, stats *astats.Stats) CallBackContext {
users := userCache.GetByUserIds(uids)
newCallBackUrl, _ := events[0].ParseURL(callBackURL)
return CallBackContext{
Ctx: ctx,
CallBackURL: newCallBackUrl,
Rule: rule,
Events: events,
Users: users,
BatchSend: batchSend,
Stats: stats,
}
}
type TaskForm struct {
Title string `json:"title"`
Account string `json:"account"`
Batch int `json:"batch"`
Tolerance int `json:"tolerance"`
Timeout int `json:"timeout"`
Pause string `json:"pause"`
Script string `json:"script"`
Args string `json:"args"`
Stdin string `json:"stdin"`
Action string `json:"action"`
Creator string `json:"creator"`
Hosts []string `json:"hosts"`
func ExtractAtsParams(rawURL string) []string {
ans := make([]string, 0, 1)
parsedURL, err := url.Parse(rawURL)
if err != nil {
logger.Errorf("ExtractAtsParams(url=%s), err: %v", rawURL, err)
return ans
}
queryParams := parsedURL.Query()
atParam := queryParams.Get("ats")
if atParam == "" {
return ans
}
// Split the atParam by comma and return the result as a slice
return strings.Split(atParam, ",")
}
func NewCallBacker(
key string,
targetCache *memsto.TargetCacheType,
userCache *memsto.UserCacheType,
taskTplCache *memsto.TaskTplCache,
tpls map[string]*template.Template,
) CallBacker {
switch key {
case models.IbexDomain: // Distribute to Ibex
return &IbexCallBacker{
targetCache: targetCache,
userCache: userCache,
taskTplCache: taskTplCache,
}
case models.DefaultDomain: // default callback
return &DefaultCallBacker{}
case models.DingtalkDomain:
return &DingtalkSender{tpl: tpls[models.Dingtalk]}
case models.WecomDomain:
return &WecomSender{tpl: tpls[models.Wecom]}
case models.FeishuDomain:
return &FeishuSender{tpl: tpls[models.Feishu]}
case models.FeishuCardDomain:
return &FeishuCardSender{tpl: tpls[models.FeishuCard]}
//case models.Mm:
// return &MmSender{tpl: tpls[models.Mm]}
case models.TelegramDomain:
return &TelegramSender{tpl: tpls[models.Telegram]}
case models.LarkDomain:
return &LarkSender{tpl: tpls[models.Lark]}
case models.LarkCardDomain:
return &LarkCardSender{tpl: tpls[models.LarkCard]}
}
return nil
}
func (c *DefaultCallBacker) CallBack(ctx CallBackContext) {
if len(ctx.CallBackURL) == 0 || len(ctx.Events) == 0 {
return
}
event := ctx.Events[0]
if ctx.BatchSend {
webhookConf := &models.Webhook{
Type: models.RuleCallback,
Enable: true,
Url: ctx.CallBackURL,
Timeout: 5,
RetryCount: 3,
RetryInterval: 10,
Batch: 1000,
}
PushCallbackEvent(ctx.Ctx, webhookConf, event, ctx.Stats)
return
}
doSendAndRecord(ctx.Ctx, ctx.CallBackURL, ctx.CallBackURL, event, "callback", ctx.Stats, ctx.Events)
}
func doSendAndRecord(ctx *ctx.Context, url, token string, body interface{}, channel string,
stats *astats.Stats, events []*models.AlertCurEvent) {
res, err := doSend(url, body, channel, stats)
NotifyRecord(ctx, events, 0, channel, token, res, err)
}
func NotifyRecord(ctx *ctx.Context, evts []*models.AlertCurEvent, notifyRuleID int64, channel, target, res string, err error) {
// 一个通知可能对应多个 event都需要记录
notis := make([]*models.NotificationRecord, 0, len(evts))
for _, evt := range evts {
noti := models.NewNotificationRecord(evt, notifyRuleID, channel, target)
if err != nil {
noti.SetStatus(models.NotiStatusFailure)
noti.SetDetails(err.Error())
} else if res != "" {
noti.SetDetails(string(res))
}
notis = append(notis, noti)
}
if !ctx.IsCenter {
err := poster.PostByUrls(ctx, "/v1/n9e/notify-record", notis)
if err != nil {
logger.Errorf("add notis:%v failed, err: %v", notis, err)
}
return
}
PushNotifyRecords(notis)
}
func doSend(url string, body interface{}, channel string, stats *astats.Stats) (string, error) {
stats.AlertNotifyTotal.WithLabelValues(channel).Inc()
start := time.Now()
res, code, err := poster.PostJSON(url, time.Second*5, body, 3)
res = []byte(fmt.Sprintf("duration: %d ms status_code:%d, response:%s", time.Since(start).Milliseconds(), code, string(res)))
if err != nil {
logger.Errorf("%s_sender: result=fail url=%s code=%d error=%v req:%v response=%s", channel, url, code, err, body, string(res))
stats.AlertNotifyErrorTotal.WithLabelValues(channel).Inc()
return string(res), err
}
logger.Infof("%s_sender: result=succ url=%s code=%d req:%v response=%s", channel, url, code, body, string(res))
return string(res), nil
}
type TaskCreateReply struct {
@@ -66,157 +185,26 @@ type TaskCreateReply struct {
Dat int64 `json:"dat"` // task.id
}
func handleIbex(ctx *ctx.Context, url string, event *models.AlertCurEvent, targetCache *memsto.TargetCacheType, userCache *memsto.UserCacheType, ibexConf aconf.Ibex) {
arr := strings.Split(url, "/")
func PushCallbackEvent(ctx *ctx.Context, webhook *models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
CallbackEventQueueLock.RLock()
queue := CallbackEventQueue[webhook.Url]
CallbackEventQueueLock.RUnlock()
var idstr string
var host string
if len(arr) > 1 {
idstr = arr[1]
}
if len(arr) > 2 {
host = arr[2]
}
id, err := strconv.ParseInt(idstr, 10, 64)
if err != nil {
logger.Errorf("event_callback_ibex: failed to parse url: %s", url)
return
}
if host == "" {
// 用户在callback url中没有传入host就从event中解析
host = event.TargetIdent
}
if host == "" {
logger.Error("event_callback_ibex: failed to get host")
return
}
tpl, err := models.TaskTplGetById(ctx, id)
if err != nil {
logger.Errorf("event_callback_ibex: failed to get tpl: %v", err)
return
}
if tpl == nil {
logger.Errorf("event_callback_ibex: no such tpl(%d)", id)
return
}
// check perm
// tpl.GroupId - host - account 三元组校验权限
can, err := canDoIbex(ctx, tpl.UpdateBy, tpl, host, targetCache, userCache)
if err != nil {
logger.Errorf("event_callback_ibex: check perm fail: %v", err)
return
}
if !can {
logger.Errorf("event_callback_ibex: user(%s) no permission", tpl.UpdateBy)
return
}
tagsMap := make(map[string]string)
for i := 0; i < len(event.TagsJSON); i++ {
pair := strings.TrimSpace(event.TagsJSON[i])
if pair == "" {
continue
if queue == nil {
queue = &WebhookQueue{
eventQueue: NewSafeEventQueue(QueueMaxSize),
closeCh: make(chan struct{}),
}
arr := strings.Split(pair, "=")
if len(arr) != 2 {
continue
}
CallbackEventQueueLock.Lock()
CallbackEventQueue[webhook.Url] = queue
CallbackEventQueueLock.Unlock()
tagsMap[arr[0]] = arr[1]
}
// 附加告警级别 告警触发值标签
tagsMap["alert_severity"] = strconv.Itoa(event.Severity)
tagsMap["alert_trigger_value"] = event.TriggerValue
tags, err := json.Marshal(tagsMap)
if err != nil {
logger.Errorf("event_callback_ibex: failed to marshal tags to json: %v", tagsMap)
return
StartConsumer(ctx, queue, webhook.Batch, webhook, stats)
}
// call ibex
in := TaskForm{
Title: tpl.Title + " FH: " + host,
Account: tpl.Account,
Batch: tpl.Batch,
Tolerance: tpl.Tolerance,
Timeout: tpl.Timeout,
Pause: tpl.Pause,
Script: tpl.Script,
Args: tpl.Args,
Stdin: string(tags),
Action: "start",
Creator: tpl.UpdateBy,
Hosts: []string{host},
}
var res TaskCreateReply
err = ibex.New(
ibexConf.Address,
ibexConf.BasicAuthUser,
ibexConf.BasicAuthPass,
ibexConf.Timeout,
).
Path("/ibex/v1/tasks").
In(in).
Out(&res).
POST()
if err != nil {
logger.Errorf("event_callback_ibex: call ibex fail: %v", err)
return
}
if res.Err != "" {
logger.Errorf("event_callback_ibex: call ibex response error: %v", res.Err)
return
}
// write db
record := models.TaskRecord{
Id: res.Dat,
EventId: event.Id,
GroupId: tpl.GroupId,
IbexAddress: ibexConf.Address,
IbexAuthUser: ibexConf.BasicAuthUser,
IbexAuthPass: ibexConf.BasicAuthPass,
Title: in.Title,
Account: in.Account,
Batch: in.Batch,
Tolerance: in.Tolerance,
Timeout: in.Timeout,
Pause: in.Pause,
Script: in.Script,
Args: in.Args,
CreateAt: time.Now().Unix(),
CreateBy: in.Creator,
}
if err = record.Add(ctx); err != nil {
logger.Errorf("event_callback_ibex: persist task_record fail: %v", err)
succ := queue.eventQueue.Push(event)
if !succ {
logger.Warningf("Write channel(%s) full, current channel size: %d event:%v", webhook.Url, queue.eventQueue.Len(), event)
}
}
func canDoIbex(ctx *ctx.Context, username string, tpl *models.TaskTpl, host string, targetCache *memsto.TargetCacheType, userCache *memsto.UserCacheType) (bool, error) {
user := userCache.GetByUsername(username)
if user != nil && user.IsAdmin() {
return true, nil
}
target, has := targetCache.Get(host)
if !has {
return false, nil
}
return target.GroupId == tpl.GroupId, nil
}

View File

@@ -3,13 +3,8 @@ package sender
import (
"html/template"
"strings"
"time"
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/poster"
"github.com/toolkits/pkg/logger"
)
type dingtalkMarkdown struct {
@@ -28,6 +23,10 @@ type dingtalk struct {
At dingtalkAt `json:"at"`
}
var (
_ CallBacker = (*DingtalkSender)(nil)
)
type DingtalkSender struct {
tpl *template.Template
}
@@ -37,13 +36,13 @@ func (ds *DingtalkSender) Send(ctx MessageContext) {
return
}
urls, ats := ds.extract(ctx.Users)
urls, ats, tokens := ds.extract(ctx.Users)
if len(urls) == 0 {
return
}
message := BuildTplMessage(ds.tpl, ctx.Events)
message := BuildTplMessage(models.Dingtalk, ds.tpl, ctx.Events)
for _, url := range urls {
for i, url := range urls {
var body dingtalk
// NoAt in url
if strings.Contains(url, "noat=1") {
@@ -68,14 +67,44 @@ func (ds *DingtalkSender) Send(ctx MessageContext) {
}
}
doSend(url, body, models.Dingtalk, ctx.Stats)
doSendAndRecord(ctx.Ctx, url, tokens[i], body, models.Dingtalk, ctx.Stats, ctx.Events)
}
}
func (ds *DingtalkSender) CallBack(ctx CallBackContext) {
if len(ctx.Events) == 0 || len(ctx.CallBackURL) == 0 {
return
}
body := dingtalk{
Msgtype: "markdown",
Markdown: dingtalkMarkdown{
Title: ctx.Events[0].RuleName,
},
}
ats := ExtractAtsParams(ctx.CallBackURL)
message := BuildTplMessage(models.Dingtalk, ds.tpl, ctx.Events)
if len(ats) > 0 {
body.Markdown.Text = message + "\n@" + strings.Join(ats, "@")
body.At = dingtalkAt{
AtMobiles: ats,
IsAtAll: false,
}
} else {
// NoAt in url
body.Markdown.Text = message
}
doSendAndRecord(ctx.Ctx, ctx.CallBackURL, ctx.CallBackURL, body, "callback", ctx.Stats, ctx.Events)
}
// extract urls and ats from Users
func (ds *DingtalkSender) extract(users []*models.User) ([]string, []string) {
func (ds *DingtalkSender) extract(users []*models.User) ([]string, []string, []string) {
urls := make([]string, 0, len(users))
ats := make([]string, 0, len(users))
tokens := make([]string, 0, len(users))
for _, user := range users {
if user.Phone != "" {
@@ -87,19 +116,8 @@ func (ds *DingtalkSender) extract(users []*models.User) ([]string, []string) {
url = "https://oapi.dingtalk.com/robot/send?access_token=" + token
}
urls = append(urls, url)
tokens = append(tokens, token)
}
}
return urls, ats
}
func doSend(url string, body interface{}, channel string, stats *astats.Stats) {
stats.AlertNotifyTotal.WithLabelValues(channel).Inc()
res, code, err := poster.PostJSON(url, time.Second*5, body, 3)
if err != nil {
logger.Errorf("%s_sender: result=fail url=%s code=%d error=%v response=%s", channel, url, code, err, string(res))
stats.AlertNotifyErrorTotal.WithLabelValues(channel).Inc()
} else {
logger.Infof("%s_sender: result=succ url=%s code=%d response=%s", channel, url, code, string(res))
}
return urls, ats, tokens
}

View File

@@ -7,14 +7,16 @@ import (
"time"
"github.com/ccfos/nightingale/v6/alert/aconf"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/toolkits/pkg/logger"
"gopkg.in/gomail.v2"
)
var mailch chan *gomail.Message
var mailch chan *EmailContext
type EmailSender struct {
subjectTpl *template.Template
@@ -22,6 +24,11 @@ type EmailSender struct {
smtp aconf.SMTPConfig
}
type EmailContext struct {
events []*models.AlertCurEvent
mail *gomail.Message
}
func (es *EmailSender) Send(ctx MessageContext) {
if len(ctx.Users) == 0 || len(ctx.Events) == 0 {
return
@@ -30,12 +37,12 @@ func (es *EmailSender) Send(ctx MessageContext) {
var subject string
if es.subjectTpl != nil {
subject = BuildTplMessage(es.subjectTpl, []*models.AlertCurEvent{ctx.Events[0]})
subject = BuildTplMessage(models.Email, es.subjectTpl, []*models.AlertCurEvent{ctx.Events[0]})
} else {
subject = ctx.Events[0].RuleName
}
content := BuildTplMessage(es.contentTpl, ctx.Events)
es.WriteEmail(subject, content, tos)
content := BuildTplMessage(models.Email, es.contentTpl, ctx.Events)
es.WriteEmail(subject, content, tos, ctx.Events)
ctx.Stats.AlertNotifyTotal.WithLabelValues(models.Email).Add(float64(len(tos)))
}
@@ -72,7 +79,7 @@ func SendEmail(subject, content string, tos []string, stmp aconf.SMTPConfig) err
return nil
}
func (es *EmailSender) WriteEmail(subject, content string, tos []string) {
func (es *EmailSender) WriteEmail(subject, content string, tos []string, events []*models.AlertCurEvent) {
m := gomail.NewMessage()
m.SetHeader("From", es.smtp.From)
@@ -80,41 +87,65 @@ func (es *EmailSender) WriteEmail(subject, content string, tos []string) {
m.SetHeader("Subject", subject)
m.SetBody("text/html", content)
mailch <- m
mailch <- &EmailContext{events, m}
}
func dialSmtp(d *gomail.Dialer) gomail.SendCloser {
for {
if s, err := d.Dial(); err != nil {
logger.Errorf("email_sender: failed to dial smtp: %s", err)
select {
case <-mailQuit:
// Note that Sendcloser is not obtained below,
// and the outgoing signal (with configuration changes) exits the current dial
return nil
default:
if s, err := d.Dial(); err != nil {
logger.Errorf("email_sender: failed to dial smtp: %s", err)
} else {
return s
}
time.Sleep(time.Second)
continue
} else {
return s
}
}
}
var mailQuit = make(chan struct{})
func RestartEmailSender(smtp aconf.SMTPConfig) {
close(mailQuit)
mailQuit = make(chan struct{})
startEmailSender(smtp)
func RestartEmailSender(ctx *ctx.Context, smtp aconf.SMTPConfig) {
// Notify internal start exit
mailQuit <- struct{}{}
startEmailSender(ctx, smtp)
}
func InitEmailSender(smtp aconf.SMTPConfig) {
mailch = make(chan *gomail.Message, 100000)
startEmailSender(smtp)
var smtpConfig aconf.SMTPConfig
func InitEmailSender(ctx *ctx.Context, ncc *memsto.NotifyConfigCacheType) {
mailch = make(chan *EmailContext, 100000)
go updateSmtp(ctx, ncc)
smtpConfig = ncc.GetSMTP()
go startEmailSender(ctx, smtpConfig)
}
func startEmailSender(smtp aconf.SMTPConfig) {
func updateSmtp(ctx *ctx.Context, ncc *memsto.NotifyConfigCacheType) {
for {
time.Sleep(1 * time.Minute)
smtp := ncc.GetSMTP()
if smtpConfig.Host != smtp.Host || smtpConfig.Batch != smtp.Batch || smtpConfig.From != smtp.From ||
smtpConfig.Pass != smtp.Pass || smtpConfig.User != smtp.User || smtpConfig.Port != smtp.Port ||
smtpConfig.InsecureSkipVerify != smtp.InsecureSkipVerify { //diff
smtpConfig = smtp
RestartEmailSender(ctx, smtp)
}
}
}
func startEmailSender(ctx *ctx.Context, smtp aconf.SMTPConfig) {
conf := smtp
if conf.Host == "" || conf.Port == 0 {
logger.Warning("SMTP configurations invalid")
logger.Debug("SMTP configurations invalid")
<-mailQuit
return
}
logger.Infof("start email sender... %+v", conf)
logger.Infof("start email sender... conf.Host:%+v,conf.Port:%+v", conf.Host, conf.Port)
d := gomail.NewDialer(conf.Host, conf.Port, conf.User, conf.Pass)
if conf.InsecureSkipVerify {
@@ -135,9 +166,16 @@ func startEmailSender(smtp aconf.SMTPConfig) {
if !open {
s = dialSmtp(d)
if s == nil {
// Indicates that the dialing failed and exited the current goroutine directly,
// but put the Message back in the mailch
mailch <- m
return
}
open = true
}
if err := gomail.Send(s, m); err != nil {
var err error
if err = gomail.Send(s, m.mail); err != nil {
logger.Errorf("email_sender: failed to send: %s", err)
// close and retry
@@ -146,13 +184,28 @@ func startEmailSender(smtp aconf.SMTPConfig) {
}
s = dialSmtp(d)
if s == nil {
// Indicates that the dialing failed and exited the current goroutine directly,
// but put the Message back in the mailch
mailch <- m
return
}
open = true
if err := gomail.Send(s, m); err != nil {
if err = gomail.Send(s, m.mail); err != nil {
logger.Errorf("email_sender: failed to retry send: %s", err)
}
} else {
logger.Infof("email_sender: result=succ subject=%v to=%v", m.GetHeader("Subject"), m.GetHeader("To"))
logger.Infof("email_sender: result=succ subject=%v to=%v",
m.mail.GetHeader("Subject"), m.mail.GetHeader("To"))
}
for _, to := range m.mail.GetHeader("To") {
msg := ""
if err == nil {
msg = "ok"
}
NotifyRecord(ctx, m.events, 0, models.Email, to, msg, err)
}
size++

View File

@@ -1,6 +1,7 @@
package sender
import (
"fmt"
"html/template"
"strings"
@@ -22,17 +23,47 @@ type feishu struct {
At feishuAt `json:"at"`
}
var (
_ CallBacker = (*FeishuSender)(nil)
)
type FeishuSender struct {
tpl *template.Template
}
func (fs *FeishuSender) CallBack(ctx CallBackContext) {
if len(ctx.Events) == 0 || len(ctx.CallBackURL) == 0 {
return
}
ats := ExtractAtsParams(ctx.CallBackURL)
message := BuildTplMessage(models.Feishu, fs.tpl, ctx.Events)
if len(ats) > 0 {
atTags := ""
for _, at := range ats {
atTags += fmt.Sprintf("<at user_id=\"%s\"></at> ", at)
}
message = atTags + message
}
body := feishu{
Msgtype: "text",
Content: feishuContent{
Text: message,
},
}
doSendAndRecord(ctx.Ctx, ctx.CallBackURL, ctx.CallBackURL, body, "callback", ctx.Stats, ctx.Events)
}
func (fs *FeishuSender) Send(ctx MessageContext) {
if len(ctx.Users) == 0 || len(ctx.Events) == 0 {
return
}
urls, ats := fs.extract(ctx.Users)
message := BuildTplMessage(fs.tpl, ctx.Events)
for _, url := range urls {
urls, ats, tokens := fs.extract(ctx.Users)
message := BuildTplMessage(models.Feishu, fs.tpl, ctx.Events)
for i, url := range urls {
body := feishu{
Msgtype: "text",
Content: feishuContent{
@@ -45,13 +76,14 @@ func (fs *FeishuSender) Send(ctx MessageContext) {
IsAtAll: false,
}
}
doSend(url, body, models.Feishu, ctx.Stats)
doSendAndRecord(ctx.Ctx, url, tokens[i], body, models.Feishu, ctx.Stats, ctx.Events)
}
}
func (fs *FeishuSender) extract(users []*models.User) ([]string, []string) {
func (fs *FeishuSender) extract(users []*models.User) ([]string, []string, []string) {
urls := make([]string, 0, len(users))
ats := make([]string, 0, len(users))
tokens := make([]string, 0, len(users))
for _, user := range users {
if user.Phone != "" {
@@ -63,7 +95,8 @@ func (fs *FeishuSender) extract(users []*models.User) ([]string, []string) {
url = "https://open.feishu.cn/open-apis/bot/v2/hook/" + token
}
urls = append(urls, url)
tokens = append(tokens, token)
}
}
return urls, ats
return urls, ats, tokens
}

View File

@@ -3,6 +3,7 @@ package sender
import (
"fmt"
"html/template"
"net/url"
"strings"
"github.com/ccfos/nightingale/v6/models"
@@ -55,8 +56,8 @@ const (
Triggered = "triggered"
)
var (
body = feishuCard{
func createFeishuCardBody() feishuCard {
return feishuCard{
feishu: feishu{Msgtype: "interactive"},
Card: Cards{
Config: Conf{
@@ -89,14 +90,28 @@ var (
},
},
}
)
}
func (fs *FeishuCardSender) Send(ctx MessageContext) {
if len(ctx.Users) == 0 || len(ctx.Events) == 0 {
func (fs *FeishuCardSender) CallBack(ctx CallBackContext) {
if len(ctx.Events) == 0 || len(ctx.CallBackURL) == 0 {
return
}
urls, _ := fs.extract(ctx.Users)
message := BuildTplMessage(fs.tpl, ctx.Events)
ats := ExtractAtsParams(ctx.CallBackURL)
message := BuildTplMessage(models.FeishuCard, fs.tpl, ctx.Events)
if len(ats) > 0 {
atTags := ""
for _, at := range ats {
if strings.Contains(at, "@") {
atTags += fmt.Sprintf("<at email=\"%s\" ></at>", at)
} else {
atTags += fmt.Sprintf("<at id=\"%s\" ></at>", at)
}
}
message = atTags + message
}
color := "red"
lowerUnicode := strings.ToLower(message)
if strings.Count(lowerUnicode, Recovered) > 0 && strings.Count(lowerUnicode, Triggered) > 0 {
@@ -106,18 +121,51 @@ func (fs *FeishuCardSender) Send(ctx MessageContext) {
}
SendTitle := fmt.Sprintf("🔔 %s", ctx.Events[0].RuleName)
body := createFeishuCardBody()
body.Card.Header.Title.Content = SendTitle
body.Card.Header.Template = color
body.Card.Elements[0].Text.Content = message
body.Card.Elements[2].Elements[0].Content = SendTitle
for _, url := range urls {
doSend(url, body, models.FeishuCard, ctx.Stats)
// This is to be compatible with the feishucard interface, if with query string parameters, the request will fail
// Remove query parameters from the URL,
parsedURL, err := url.Parse(ctx.CallBackURL)
if err != nil {
return
}
parsedURL.RawQuery = ""
doSendAndRecord(ctx.Ctx, parsedURL.String(), parsedURL.String(), body, "callback", ctx.Stats, ctx.Events)
}
func (fs *FeishuCardSender) Send(ctx MessageContext) {
if len(ctx.Users) == 0 || len(ctx.Events) == 0 {
return
}
urls, tokens := fs.extract(ctx.Users)
message := BuildTplMessage(models.FeishuCard, fs.tpl, ctx.Events)
color := "red"
lowerUnicode := strings.ToLower(message)
if strings.Count(lowerUnicode, Recovered) > 0 && strings.Count(lowerUnicode, Triggered) > 0 {
color = "orange"
} else if strings.Count(lowerUnicode, Recovered) > 0 {
color = "green"
}
SendTitle := fmt.Sprintf("🔔 %s", ctx.Events[0].RuleName)
body := createFeishuCardBody()
body.Card.Header.Title.Content = SendTitle
body.Card.Header.Template = color
body.Card.Elements[0].Text.Content = message
body.Card.Elements[2].Elements[0].Content = SendTitle
for i, url := range urls {
doSendAndRecord(ctx.Ctx, url, tokens[i], body, models.FeishuCard, ctx.Stats, ctx.Events)
}
}
func (fs *FeishuCardSender) extract(users []*models.User) ([]string, []string) {
urls := make([]string, 0, len(users))
ats := make([]string, 0)
tokens := make([]string, 0, len(users))
for i := range users {
if token, has := users[i].ExtractToken(models.FeishuCard); has {
url := token
@@ -125,7 +173,8 @@ func (fs *FeishuCardSender) extract(users []*models.User) ([]string, []string) {
url = "https://open.feishu.cn/open-apis/bot/v2/hook/" + strings.TrimSpace(token)
}
urls = append(urls, url)
tokens = append(tokens, token)
}
}
return urls, ats
return urls, tokens
}

299
alert/sender/ibex.go Normal file
View File

@@ -0,0 +1,299 @@
// @Author: Ciusyan 6/5/24
package sender
import (
"encoding/json"
"fmt"
"strconv"
"strings"
"time"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
imodels "github.com/flashcatcloud/ibex/src/models"
"github.com/flashcatcloud/ibex/src/storage"
"github.com/toolkits/pkg/logger"
)
var (
_ CallBacker = (*IbexCallBacker)(nil)
)
type IbexCallBacker struct {
targetCache *memsto.TargetCacheType
userCache *memsto.UserCacheType
taskTplCache *memsto.TaskTplCache
}
func (c *IbexCallBacker) CallBack(ctx CallBackContext) {
if len(ctx.CallBackURL) == 0 || len(ctx.Events) == 0 {
logger.Warningf("event_callback_ibex: url or events is empty, url: %s, events: %+v", ctx.CallBackURL, ctx.Events)
return
}
event := ctx.Events[0]
if event.IsRecovered {
logger.Infof("event_callback_ibex: event is recovered, event: %+v", event)
return
}
c.handleIbex(ctx.Ctx, ctx.CallBackURL, event)
}
func (c *IbexCallBacker) handleIbex(ctx *ctx.Context, url string, event *models.AlertCurEvent) {
logger.Infof("event_callback_ibex: url: %s, event: %+v", url, event)
if imodels.DB() == nil && ctx.IsCenter {
logger.Warningf("event_callback_ibex: db is nil, event: %+v", event)
return
}
arr := strings.Split(url, "/")
var idstr string
var host string
if len(arr) > 1 {
idstr = arr[1]
}
if len(arr) > 2 {
host = arr[2]
}
id, err := strconv.ParseInt(idstr, 10, 64)
if err != nil {
logger.Errorf("event_callback_ibex: failed to parse url: %s event: %+v", url, event)
return
}
if host == "" {
// 用户在callback url中没有传入host就从event中解析
host = event.TargetIdent
if host == "" {
if ident, has := event.TagsMap["ident"]; has {
host = ident
}
}
}
if host == "" {
logger.Errorf("event_callback_ibex: failed to get host, id: %d, event: %+v", id, event)
return
}
CallIbex(ctx, id, host, c.taskTplCache, c.targetCache, c.userCache, event, "")
}
func CallIbex(ctx *ctx.Context, id int64, host string,
taskTplCache *memsto.TaskTplCache, targetCache *memsto.TargetCacheType,
userCache *memsto.UserCacheType, event *models.AlertCurEvent, args string) (int64, error) {
logger.Infof("event_callback_ibex: id: %d, host: %s, args: %s, event: %+v", id, host, args, event)
tpl := taskTplCache.Get(id)
if tpl == nil {
err := fmt.Errorf("event_callback_ibex: no such tpl(%d), event: %+v", id, event)
logger.Errorf("%s", err)
return 0, err
}
// check perm
// tpl.GroupId - host - account 三元组校验权限
can, err := CanDoIbex(tpl.UpdateBy, tpl, host, targetCache, userCache)
if err != nil {
err = fmt.Errorf("event_callback_ibex: check perm fail: %v, event: %+v", err, event)
logger.Errorf("%s", err)
return 0, err
}
if !can {
err = fmt.Errorf("event_callback_ibex: user(%s) no permission, event: %+v", tpl.UpdateBy, event)
logger.Errorf("%s", err)
return 0, err
}
tagsMap := make(map[string]string)
for i := 0; i < len(event.TagsJSON); i++ {
pair := strings.TrimSpace(event.TagsJSON[i])
if pair == "" {
continue
}
arr := strings.SplitN(pair, "=", 2)
if len(arr) != 2 {
continue
}
tagsMap[arr[0]] = arr[1]
}
// 附加告警级别 告警触发值标签
tagsMap["alert_severity"] = strconv.Itoa(event.Severity)
tagsMap["alert_trigger_value"] = event.TriggerValue
tagsMap["is_recovered"] = strconv.FormatBool(event.IsRecovered)
tags, err := json.Marshal(tagsMap)
if err != nil {
err = fmt.Errorf("event_callback_ibex: failed to marshal tags to json: %v, event: %+v", tagsMap, event)
logger.Errorf("%s", err)
return 0, err
}
// call ibex
taskArgs := tpl.Args
if args != "" {
taskArgs = args
}
in := models.TaskForm{
Title: tpl.Title + " FH: " + host,
Account: tpl.Account,
Batch: tpl.Batch,
Tolerance: tpl.Tolerance,
Timeout: tpl.Timeout,
Pause: tpl.Pause,
Script: tpl.Script,
Args: taskArgs,
Stdin: string(tags),
Action: "start",
Creator: tpl.UpdateBy,
Hosts: []string{host},
AlertTriggered: true,
}
id, err = TaskAdd(in, tpl.UpdateBy, ctx.IsCenter)
if err != nil {
err = fmt.Errorf("event_callback_ibex: call ibex fail: %v, event: %+v", err, event)
logger.Errorf("%s", err)
return 0, err
}
// write db
record := models.TaskRecord{
Id: id,
EventId: event.Id,
GroupId: tpl.GroupId,
Title: in.Title,
Account: in.Account,
Batch: in.Batch,
Tolerance: in.Tolerance,
Timeout: in.Timeout,
Pause: in.Pause,
Script: in.Script,
Args: in.Args,
CreateAt: time.Now().Unix(),
CreateBy: in.Creator,
}
if err = record.Add(ctx); err != nil {
err = fmt.Errorf("event_callback_ibex: persist task_record fail: %v, event: %+v", err, event)
logger.Errorf("%s", err)
return id, err
}
return id, nil
}
func CanDoIbex(username string, tpl *models.TaskTpl, host string, targetCache *memsto.TargetCacheType, userCache *memsto.UserCacheType) (bool, error) {
user := userCache.GetByUsername(username)
if user != nil && user.IsAdmin() {
return true, nil
}
target, has := targetCache.Get(host)
if !has {
return false, nil
}
return target.MatchGroupId(tpl.GroupId), nil
}
func TaskAdd(f models.TaskForm, authUser string, isCenter bool) (int64, error) {
if storage.Cache == nil {
logger.Warningf("event_callback_ibex: redis cache is nil, task: %+v", f)
return 0, fmt.Errorf("redis cache is nil")
}
hosts := cleanHosts(f.Hosts)
if len(hosts) == 0 {
return 0, fmt.Errorf("arg(hosts) empty")
}
taskMeta := &imodels.TaskMeta{
Title: f.Title,
Account: f.Account,
Batch: f.Batch,
Tolerance: f.Tolerance,
Timeout: f.Timeout,
Pause: f.Pause,
Script: f.Script,
Args: f.Args,
Stdin: f.Stdin,
Creator: f.Creator,
}
err := taskMeta.CleanFields()
if err != nil {
return 0, err
}
taskMeta.HandleFH(hosts[0])
// 任务类型分为"告警规则触发"和"n9e center用户下发"两种;
// 边缘机房"告警规则触发"的任务不需要规划并且它可能是失联的无法使用db资源所以放入redis缓存中直接下发给agentd执行
if !isCenter && f.AlertTriggered {
if err := taskMeta.Create(); err != nil {
// 当网络不连通时生成唯一的id防止边缘机房中不同任务的id相同
// 方法是redis自增id去防止同一个机房的不同n9e edge生成的id相同
// 但没法防止不同边缘机房生成同样的id所以生成id的数据不会上报存入数据库只用于闭环执行。
taskMeta.Id, err = storage.IdGet()
if err != nil {
return 0, err
}
}
taskHost := imodels.TaskHost{
Id: taskMeta.Id,
Host: hosts[0],
Status: "running",
}
if err = taskHost.Create(); err != nil {
logger.Warningf("task_add_fail: authUser=%s title=%s err=%s", authUser, taskMeta.Title, err.Error())
}
// 缓存任务元信息和待下发的任务
err = taskMeta.Cache(hosts[0])
if err != nil {
return 0, err
}
} else {
// 如果是中心机房,还是保持之前的逻辑
err = taskMeta.Save(hosts, f.Action)
if err != nil {
return 0, err
}
}
logger.Infof("task_add_succ: authUser=%s title=%s", authUser, taskMeta.Title)
return taskMeta.Id, nil
}
func cleanHosts(formHosts []string) []string {
cnt := len(formHosts)
arr := make([]string, 0, cnt)
for i := 0; i < cnt; i++ {
item := strings.TrimSpace(formHosts[i])
if item == "" {
continue
}
if strings.HasPrefix(item, "#") {
continue
}
arr = append(arr, item)
}
return arr
}

65
alert/sender/lark.go Normal file
View File

@@ -0,0 +1,65 @@
package sender
import (
"html/template"
"strings"
"github.com/ccfos/nightingale/v6/models"
)
var (
_ CallBacker = (*LarkSender)(nil)
)
type LarkSender struct {
tpl *template.Template
}
func (lk *LarkSender) CallBack(ctx CallBackContext) {
if len(ctx.Events) == 0 || len(ctx.CallBackURL) == 0 {
return
}
body := feishu{
Msgtype: "text",
Content: feishuContent{
Text: BuildTplMessage(models.Lark, lk.tpl, ctx.Events),
},
}
doSendAndRecord(ctx.Ctx, ctx.CallBackURL, ctx.CallBackURL, body, "callback", ctx.Stats, ctx.Events)
}
func (lk *LarkSender) Send(ctx MessageContext) {
if len(ctx.Users) == 0 || len(ctx.Events) == 0 {
return
}
urls, tokens := lk.extract(ctx.Users)
message := BuildTplMessage(models.Lark, lk.tpl, ctx.Events)
for i, url := range urls {
body := feishu{
Msgtype: "text",
Content: feishuContent{
Text: message,
},
}
doSendAndRecord(ctx.Ctx, url, tokens[i], body, models.Lark, ctx.Stats, ctx.Events)
}
}
func (lk *LarkSender) extract(users []*models.User) ([]string, []string) {
urls := make([]string, 0, len(users))
tokens := make([]string, 0, len(users))
for _, user := range users {
if token, has := user.ExtractToken(models.Lark); has {
url := token
if !strings.HasPrefix(token, "https://") && !strings.HasPrefix(token, "http://") {
url = "https://open.larksuite.com/open-apis/bot/v2/hook/" + token
}
urls = append(urls, url)
tokens = append(tokens, token)
}
}
return urls, tokens
}

101
alert/sender/larkcard.go Normal file
View File

@@ -0,0 +1,101 @@
package sender
import (
"fmt"
"html/template"
"net/url"
"strings"
"github.com/ccfos/nightingale/v6/models"
)
type LarkCardSender struct {
tpl *template.Template
}
func (fs *LarkCardSender) CallBack(ctx CallBackContext) {
if len(ctx.Events) == 0 || len(ctx.CallBackURL) == 0 {
return
}
ats := ExtractAtsParams(ctx.CallBackURL)
message := BuildTplMessage(models.LarkCard, fs.tpl, ctx.Events)
if len(ats) > 0 {
atTags := ""
for _, at := range ats {
if strings.Contains(at, "@") {
atTags += fmt.Sprintf("<at email=\"%s\" ></at>", at)
} else {
atTags += fmt.Sprintf("<at id=\"%s\" ></at>", at)
}
}
message = atTags + message
}
color := "red"
lowerUnicode := strings.ToLower(message)
if strings.Count(lowerUnicode, Recovered) > 0 && strings.Count(lowerUnicode, Triggered) > 0 {
color = "orange"
} else if strings.Count(lowerUnicode, Recovered) > 0 {
color = "green"
}
SendTitle := fmt.Sprintf("🔔 %s", ctx.Events[0].RuleName)
body := createFeishuCardBody()
body.Card.Header.Title.Content = SendTitle
body.Card.Header.Template = color
body.Card.Elements[0].Text.Content = message
body.Card.Elements[2].Elements[0].Content = SendTitle
// This is to be compatible with the Larkcard interface, if with query string parameters, the request will fail
// Remove query parameters from the URL,
parsedURL, err := url.Parse(ctx.CallBackURL)
if err != nil {
return
}
parsedURL.RawQuery = ""
doSendAndRecord(ctx.Ctx, ctx.CallBackURL, ctx.CallBackURL, body, "callback", ctx.Stats, ctx.Events)
}
func (fs *LarkCardSender) Send(ctx MessageContext) {
if len(ctx.Users) == 0 || len(ctx.Events) == 0 {
return
}
urls, tokens := fs.extract(ctx.Users)
message := BuildTplMessage(models.LarkCard, fs.tpl, ctx.Events)
color := "red"
lowerUnicode := strings.ToLower(message)
if strings.Count(lowerUnicode, Recovered) > 0 && strings.Count(lowerUnicode, Triggered) > 0 {
color = "orange"
} else if strings.Count(lowerUnicode, Recovered) > 0 {
color = "green"
}
SendTitle := fmt.Sprintf("🔔 %s", ctx.Events[0].RuleName)
body := createFeishuCardBody()
body.Card.Header.Title.Content = SendTitle
body.Card.Header.Template = color
body.Card.Elements[0].Text.Content = message
body.Card.Elements[2].Elements[0].Content = SendTitle
for i, url := range urls {
doSendAndRecord(ctx.Ctx, url, tokens[i], body, models.LarkCard, ctx.Stats, ctx.Events)
}
}
func (fs *LarkCardSender) extract(users []*models.User) ([]string, []string) {
urls := make([]string, 0, len(users))
tokens := make([]string, 0)
for i := range users {
if token, has := users[i].ExtractToken(models.Lark); has {
url := token
if !strings.HasPrefix(token, "https://") && !strings.HasPrefix(token, "http://") {
url = "https://open.larksuite.com/open-apis/bot/v2/hook/" + strings.TrimSpace(token)
}
urls = append(urls, url)
tokens = append(tokens, token)
}
}
return urls, tokens
}

View File

@@ -7,6 +7,7 @@ import (
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/toolkits/pkg/logger"
)
@@ -36,13 +37,26 @@ func (ms *MmSender) Send(ctx MessageContext) {
if len(urls) == 0 {
return
}
message := BuildTplMessage(ms.tpl, ctx.Events)
message := BuildTplMessage(models.Mm, ms.tpl, ctx.Events)
SendMM(MatterMostMessage{
SendMM(ctx.Ctx, MatterMostMessage{
Text: message,
Tokens: urls,
Stats: ctx.Stats,
})
}, ctx.Events, models.Mm)
}
func (ms *MmSender) CallBack(ctx CallBackContext) {
if len(ctx.Events) == 0 || len(ctx.CallBackURL) == 0 {
return
}
message := BuildTplMessage(models.Mm, ms.tpl, ctx.Events)
SendMM(ctx.Ctx, MatterMostMessage{
Text: message,
Tokens: []string{ctx.CallBackURL},
Stats: ctx.Stats,
}, ctx.Events, "callback")
}
func (ms *MmSender) extract(users []*models.User) []string {
@@ -55,11 +69,12 @@ func (ms *MmSender) extract(users []*models.User) []string {
return tokens
}
func SendMM(message MatterMostMessage) {
func SendMM(ctx *ctx.Context, message MatterMostMessage, events []*models.AlertCurEvent, channel string) {
for i := 0; i < len(message.Tokens); i++ {
u, err := url.Parse(message.Tokens[i])
if err != nil {
logger.Errorf("mm_sender: failed to parse error=%v", err)
NotifyRecord(ctx, events, 0, channel, message.Tokens[i], "", err)
continue
}
@@ -88,7 +103,7 @@ func SendMM(message MatterMostMessage) {
Username: username,
Text: txt + message.Text,
}
doSend(ur, body, models.Mm, message.Stats)
doSendAndRecord(ctx, ur, message.Tokens[i], body, channel, message.Stats, events)
}
}
}

View File

@@ -0,0 +1,75 @@
package sender
import (
"errors"
"time"
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/toolkits/pkg/container/list"
"github.com/toolkits/pkg/logger"
)
// 通知记录队列,最大长度 1000000
var NotifyRecordQueue = list.NewSafeListLimited(1000000)
// 每秒上报通知记录队列大小
func ReportNotifyRecordQueueSize(stats *astats.Stats) {
for {
time.Sleep(time.Second)
stats.GaugeNotifyRecordQueueSize.Set(float64(NotifyRecordQueue.Len()))
}
}
// 推送通知记录到队列
// 若队列满 则返回 error
func PushNotifyRecords(records []*models.NotificationRecord) error {
for _, record := range records {
if ok := NotifyRecordQueue.PushFront(record); !ok {
logger.Warningf("notify record queue is full, record: %+v", record)
return errors.New("notify record queue is full")
}
}
return nil
}
type NotifyRecordConsumer struct {
ctx *ctx.Context
}
func NewNotifyRecordConsumer(ctx *ctx.Context) *NotifyRecordConsumer {
return &NotifyRecordConsumer{
ctx: ctx,
}
}
// 消费通知记录队列 每 100ms 检测一次队列是否为空
func (c *NotifyRecordConsumer) LoopConsume() {
duration := time.Duration(100) * time.Millisecond
for {
// 无论队列是否为空 都需要等待
time.Sleep(duration)
inotis := NotifyRecordQueue.PopBackBy(100)
if len(inotis) == 0 {
continue
}
// 类型转换,不然 CreateInBatches 会报错
notis := make([]*models.NotificationRecord, 0, len(inotis))
for _, inoti := range inotis {
notis = append(notis, inoti.(*models.NotificationRecord))
}
c.consume(notis)
}
}
func (c *NotifyRecordConsumer) consume(notis []*models.NotificationRecord) {
if err := models.DB(c.ctx).CreateInBatches(notis, 100).Error; err != nil {
logger.Errorf("add notis:%v failed, err: %v", notis, err)
}
}

View File

@@ -2,26 +2,31 @@ package sender
import (
"bytes"
"fmt"
"os"
"os/exec"
"time"
"unicode/utf8"
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/toolkits/pkg/file"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/sys"
)
func MayPluginNotify(noticeBytes []byte, notifyScript models.NotifyScript, stats *astats.Stats) {
func MayPluginNotify(ctx *ctx.Context, noticeBytes []byte, notifyScript models.NotifyScript,
stats *astats.Stats, event *models.AlertCurEvent) {
if len(noticeBytes) == 0 {
return
}
alertingCallScript(noticeBytes, notifyScript, stats)
alertingCallScript(ctx, noticeBytes, notifyScript, stats, event)
}
func alertingCallScript(stdinBytes []byte, notifyScript models.NotifyScript, stats *astats.Stats) {
func alertingCallScript(ctx *ctx.Context, stdinBytes []byte, notifyScript models.NotifyScript,
stats *astats.Stats, event *models.AlertCurEvent) {
// not enable or no notify.py? do nothing
config := notifyScript
if !config.Enable || config.Content == "" {
@@ -30,7 +35,7 @@ func alertingCallScript(stdinBytes []byte, notifyScript models.NotifyScript, sta
channel := "script"
stats.AlertNotifyTotal.WithLabelValues(channel).Inc()
fpath := ".notify_scriptt"
fpath := ".notify_script"
if config.Type == 1 {
fpath = config.Content
} else {
@@ -74,6 +79,7 @@ func alertingCallScript(stdinBytes []byte, notifyScript models.NotifyScript, sta
cmd.Stdout = &buf
cmd.Stderr = &buf
start := time.Now()
err := startCmd(cmd)
if err != nil {
logger.Errorf("event_script_notify_fail: run cmd err: %v", err)
@@ -82,6 +88,26 @@ func alertingCallScript(stdinBytes []byte, notifyScript models.NotifyScript, sta
err, isTimeout := sys.WrapTimeout(cmd, time.Duration(config.Timeout)*time.Second)
res := buf.String()
res = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), res)
// 截断超出长度的输出
if len(res) > 512 {
// 确保在有效的UTF-8字符边界处截断
validLen := 0
for i := 0; i < 512 && i < len(res); {
_, size := utf8.DecodeRuneInString(res[i:])
if i+size > 512 {
break
}
i += size
validLen = i
}
res = res[:validLen] + "..."
}
NotifyRecord(ctx, []*models.AlertCurEvent{event}, 0, channel, cmd.String(), res, buildErr(err, isTimeout))
if isTimeout {
if err == nil {
logger.Errorf("event_script_notify_fail: timeout and killed process %s", fpath)
@@ -95,10 +121,18 @@ func alertingCallScript(stdinBytes []byte, notifyScript models.NotifyScript, sta
}
if err != nil {
logger.Errorf("event_script_notify_fail: exec script %s occur error: %v, output: %s", fpath, err, buf.String())
logger.Errorf("event_script_notify_fail: exec script %s occur error: %v, output: %s", fpath, err, res)
stats.AlertNotifyErrorTotal.WithLabelValues(channel).Inc()
return
}
logger.Infof("event_script_notify_ok: exec %s output: %s", fpath, buf.String())
logger.Infof("event_script_notify_ok: exec %s output: %s", fpath, res)
}
func buildErr(err error, isTimeout bool) error {
if err == nil && !isTimeout {
return nil
} else {
return fmt.Errorf("is_timeout: %v, err: %v", isTimeout, err)
}
}

View File

@@ -8,6 +8,7 @@ import (
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
)
type (
@@ -22,6 +23,7 @@ type (
Rule *models.AlertRule
Events []*models.AlertCurEvent
Stats *astats.Stats
Ctx *ctx.Context
}
)
@@ -41,25 +43,31 @@ func NewSender(key string, tpls map[string]*template.Template, smtp ...aconf.SMT
return &MmSender{tpl: tpls[models.Mm]}
case models.Telegram:
return &TelegramSender{tpl: tpls[models.Telegram]}
case models.Lark:
return &LarkSender{tpl: tpls[models.Lark]}
case models.LarkCard:
return &LarkCardSender{tpl: tpls[models.LarkCard]}
}
return nil
}
func BuildMessageContext(rule *models.AlertRule, events []*models.AlertCurEvent, uids []int64, userCache *memsto.UserCacheType, stats *astats.Stats) MessageContext {
func BuildMessageContext(ctx *ctx.Context, rule *models.AlertRule, events []*models.AlertCurEvent,
uids []int64, userCache *memsto.UserCacheType, stats *astats.Stats) MessageContext {
users := userCache.GetByUserIds(uids)
return MessageContext{
Rule: rule,
Events: events,
Users: users,
Stats: stats,
Ctx: ctx,
}
}
type BuildTplMessageFunc func(tpl *template.Template, events []*models.AlertCurEvent) string
type BuildTplMessageFunc func(channel string, tpl *template.Template, events []*models.AlertCurEvent) string
var BuildTplMessage BuildTplMessageFunc = buildTplMessage
func buildTplMessage(tpl *template.Template, events []*models.AlertCurEvent) string {
func buildTplMessage(channel string, tpl *template.Template, events []*models.AlertCurEvent) string {
if tpl == nil {
return "tpl for current sender not found, please check configuration"
}

View File

@@ -1,11 +1,13 @@
package sender
import (
"errors"
"html/template"
"strings"
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/toolkits/pkg/logger"
)
@@ -21,22 +23,39 @@ type telegram struct {
Text string `json:"text"`
}
var (
_ CallBacker = (*TelegramSender)(nil)
)
type TelegramSender struct {
tpl *template.Template
}
func (ts *TelegramSender) CallBack(ctx CallBackContext) {
if len(ctx.Events) == 0 || len(ctx.CallBackURL) == 0 {
return
}
message := BuildTplMessage(models.Telegram, ts.tpl, ctx.Events)
SendTelegram(ctx.Ctx, TelegramMessage{
Text: message,
Tokens: []string{ctx.CallBackURL},
Stats: ctx.Stats,
}, ctx.Events, "callback")
}
func (ts *TelegramSender) Send(ctx MessageContext) {
if len(ctx.Users) == 0 || len(ctx.Events) == 0 {
return
}
tokens := ts.extract(ctx.Users)
message := BuildTplMessage(ts.tpl, ctx.Events)
message := BuildTplMessage(models.Telegram, ts.tpl, ctx.Events)
SendTelegram(TelegramMessage{
SendTelegram(ctx.Ctx, TelegramMessage{
Text: message,
Tokens: tokens,
Stats: ctx.Stats,
})
}, ctx.Events, models.Telegram)
}
func (ts *TelegramSender) extract(users []*models.User) []string {
@@ -49,10 +68,11 @@ func (ts *TelegramSender) extract(users []*models.User) []string {
return tokens
}
func SendTelegram(message TelegramMessage) {
func SendTelegram(ctx *ctx.Context, message TelegramMessage, events []*models.AlertCurEvent, channel string) {
for i := 0; i < len(message.Tokens); i++ {
if !strings.Contains(message.Tokens[i], "/") && !strings.HasPrefix(message.Tokens[i], "https://") {
logger.Errorf("telegram_sender: result=fail invalid token=%s", message.Tokens[i])
NotifyRecord(ctx, events, 0, channel, message.Tokens[i], "", errors.New("invalid token"))
continue
}
var url string
@@ -73,6 +93,6 @@ func SendTelegram(message TelegramMessage) {
Text: message.Text,
}
doSend(url, body, models.Telegram, message.Stats)
doSendAndRecord(ctx, url, message.Tokens[i], body, channel, message.Stats, events)
}
}

View File

@@ -2,70 +2,216 @@ package sender
import (
"bytes"
"crypto/tls"
"encoding/json"
"fmt"
"io"
"net/http"
"sync"
"time"
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/poster"
"github.com/toolkits/pkg/logger"
)
func SendWebhooks(webhooks []*models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
for _, conf := range webhooks {
if conf.Url == "" || !conf.Enable {
continue
}
bs, err := json.Marshal(event)
if err != nil {
continue
}
// webhookClientCache 缓存 http.Client避免每次请求都创建新的 Client 导致连接泄露
var webhookClientCache sync.Map // key: clientKey (string), value: *http.Client
bf := bytes.NewBuffer(bs)
// 相同配置的 webhook 会复用同一个 Client
func getWebhookClient(webhook *models.Webhook) *http.Client {
clientKey := webhook.Hash()
req, err := http.NewRequest("POST", conf.Url, bf)
if err != nil {
logger.Warning("alertingWebhook failed to new request", err)
continue
}
if client, ok := webhookClientCache.Load(clientKey); ok {
return client.(*http.Client)
}
req.Header.Set("Content-Type", "application/json")
if conf.BasicAuthUser != "" && conf.BasicAuthPass != "" {
req.SetBasicAuth(conf.BasicAuthUser, conf.BasicAuthPass)
}
// 创建新的 Client
transport := &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: webhook.SkipVerify},
MaxIdleConns: 100,
MaxIdleConnsPerHost: 10,
IdleConnTimeout: 90 * time.Second,
}
if len(conf.Headers) > 0 && len(conf.Headers)%2 == 0 {
for i := 0; i < len(conf.Headers); i += 2 {
if conf.Headers[i] == "host" {
req.Host = conf.Headers[i+1]
continue
}
req.Header.Set(conf.Headers[i], conf.Headers[i+1])
if poster.UseProxy(webhook.Url) {
transport.Proxy = http.ProxyFromEnvironment
}
timeout := webhook.Timeout
if timeout <= 0 {
timeout = 10
}
newClient := &http.Client{
Timeout: time.Duration(timeout) * time.Second,
Transport: transport,
}
// 使用 LoadOrStore 确保并发安全,避免重复创建
actual, loaded := webhookClientCache.LoadOrStore(clientKey, newClient)
if loaded {
return actual.(*http.Client)
}
return newClient
}
func sendWebhook(webhook *models.Webhook, event interface{}, stats *astats.Stats) (bool, string, error) {
channel := "webhook"
if webhook.Type == models.RuleCallback {
channel = "callback"
}
conf := webhook
if conf.Url == "" || !conf.Enable {
return false, "", nil
}
bs, err := json.Marshal(event)
if err != nil {
logger.Errorf("%s alertingWebhook failed to marshal event:%+v err:%v", channel, event, err)
return false, "", err
}
bf := bytes.NewBuffer(bs)
req, err := http.NewRequest("POST", conf.Url, bf)
if err != nil {
logger.Warningf("%s alertingWebhook failed to new request event:%s err:%v", channel, string(bs), err)
return true, "", err
}
req.Header.Set("Content-Type", "application/json")
if conf.BasicAuthUser != "" && conf.BasicAuthPass != "" {
req.SetBasicAuth(conf.BasicAuthUser, conf.BasicAuthPass)
}
if len(conf.Headers) > 0 && len(conf.Headers)%2 == 0 {
for i := 0; i < len(conf.Headers); i += 2 {
if conf.Headers[i] == "host" || conf.Headers[i] == "Host" {
req.Host = conf.Headers[i+1]
continue
}
req.Header.Set(conf.Headers[i], conf.Headers[i+1])
}
}
// 使用全局 Client 缓存,避免每次请求都创建新的 Client 导致连接泄露
client := getWebhookClient(conf)
// todo add skip verify
client := http.Client{
Timeout: time.Duration(conf.Timeout) * time.Second,
stats.AlertNotifyTotal.WithLabelValues(channel).Inc()
var resp *http.Response
var body []byte
resp, err = client.Do(req)
if err != nil {
stats.AlertNotifyErrorTotal.WithLabelValues(channel).Inc()
logger.Errorf("event_%s_fail, event:%s, url: [%s], error: [%s]", channel, string(bs), conf.Url, err)
return true, "", err
}
if resp.Body != nil {
defer resp.Body.Close()
body, _ = io.ReadAll(resp.Body)
}
if resp.StatusCode == 429 {
logger.Errorf("event_%s_fail, url: %s, response code: %d, body: %s event:%s", channel, conf.Url, resp.StatusCode, string(body), string(bs))
return true, fmt.Sprintf("status_code:%d, response:%s", resp.StatusCode, string(body)), fmt.Errorf("status code is 429")
}
logger.Debugf("event_%s_succ, url: %s, response code: %d, body: %s event:%s", channel, conf.Url, resp.StatusCode, string(body), string(bs))
return false, fmt.Sprintf("status_code:%d, response:%s", resp.StatusCode, string(body)), nil
}
func SingleSendWebhooks(ctx *ctx.Context, webhooks map[string]*models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
for _, conf := range webhooks {
retryCount := 0
for retryCount < 3 {
start := time.Now()
needRetry, res, err := sendWebhook(conf, event, stats)
res = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), res)
NotifyRecord(ctx, []*models.AlertCurEvent{event}, 0, "webhook", conf.Url, res, err)
if !needRetry {
break
}
retryCount++
time.Sleep(time.Minute * 1 * time.Duration(retryCount))
}
}
}
func BatchSendWebhooks(ctx *ctx.Context, webhooks map[string]*models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
for _, conf := range webhooks {
logger.Infof("push event:%+v to queue:%v", event, conf)
PushEvent(ctx, conf, event, stats)
}
}
var EventQueue = make(map[string]*WebhookQueue)
var CallbackEventQueue = make(map[string]*WebhookQueue)
var CallbackEventQueueLock sync.RWMutex
var EventQueueLock sync.RWMutex
const QueueMaxSize = 100000
type WebhookQueue struct {
eventQueue *SafeEventQueue
closeCh chan struct{}
}
func PushEvent(ctx *ctx.Context, webhook *models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
EventQueueLock.RLock()
queue := EventQueue[webhook.Url]
EventQueueLock.RUnlock()
if queue == nil {
queue = &WebhookQueue{
eventQueue: NewSafeEventQueue(QueueMaxSize),
closeCh: make(chan struct{}),
}
EventQueueLock.Lock()
EventQueue[webhook.Url] = queue
EventQueueLock.Unlock()
StartConsumer(ctx, queue, webhook.Batch, webhook, stats)
}
succ := queue.eventQueue.Push(event)
if !succ {
stats.AlertNotifyErrorTotal.WithLabelValues("push_event_queue").Inc()
logger.Warningf("Write channel(%s) full, current channel size: %d event:%v", webhook.Url, queue.eventQueue.Len(), event)
}
}
func StartConsumer(ctx *ctx.Context, queue *WebhookQueue, popSize int, webhook *models.Webhook, stats *astats.Stats) {
for {
select {
case <-queue.closeCh:
logger.Infof("event queue:%v closed", queue)
return
default:
events := queue.eventQueue.PopN(popSize)
if len(events) == 0 {
time.Sleep(time.Millisecond * 400)
continue
}
retryCount := 0
for retryCount < webhook.RetryCount {
start := time.Now()
needRetry, res, err := sendWebhook(webhook, events, stats)
res = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), res)
go NotifyRecord(ctx, events, 0, "webhook", webhook.Url, res, err)
if !needRetry {
break
}
retryCount++
time.Sleep(time.Second * time.Duration(webhook.RetryInterval) * time.Duration(retryCount))
}
}
stats.AlertNotifyTotal.WithLabelValues("webhook").Inc()
var resp *http.Response
resp, err = client.Do(req)
if err != nil {
stats.AlertNotifyErrorTotal.WithLabelValues("webhook").Inc()
logger.Errorf("event_webhook_fail, ruleId: [%d], eventId: [%d], url: [%s], error: [%s]", event.RuleId, event.Id, conf.Url, err)
continue
}
var body []byte
if resp.Body != nil {
defer resp.Body.Close()
body, _ = io.ReadAll(resp.Body)
}
logger.Debugf("event_webhook_succ, url: %s, response code: %d, body: %s", conf.Url, resp.StatusCode, string(body))
}
}

View File

@@ -0,0 +1,109 @@
package sender
import (
"container/list"
"sync"
"github.com/ccfos/nightingale/v6/models"
)
type SafeEventQueue struct {
lock sync.RWMutex
maxSize int
queueHigh *list.List
queueMiddle *list.List
queueLow *list.List
}
const (
High = 1
Middle = 2
Low = 3
)
func NewSafeEventQueue(maxSize int) *SafeEventQueue {
return &SafeEventQueue{
maxSize: maxSize,
lock: sync.RWMutex{},
queueHigh: list.New(),
queueMiddle: list.New(),
queueLow: list.New(),
}
}
func (spq *SafeEventQueue) Len() int {
spq.lock.RLock()
defer spq.lock.RUnlock()
return spq.queueHigh.Len() + spq.queueMiddle.Len() + spq.queueLow.Len()
}
// len 无锁读取长度,不要在本文件外调用
func (spq *SafeEventQueue) len() int {
return spq.queueHigh.Len() + spq.queueMiddle.Len() + spq.queueLow.Len()
}
func (spq *SafeEventQueue) Push(event *models.AlertCurEvent) bool {
spq.lock.Lock()
defer spq.lock.Unlock()
for spq.len() > spq.maxSize {
return false
}
switch event.Severity {
case High:
spq.queueHigh.PushBack(event)
case Middle:
spq.queueMiddle.PushBack(event)
case Low:
spq.queueLow.PushBack(event)
default:
return false
}
return true
}
// pop 无锁弹出事件,不要在本文件外调用
func (spq *SafeEventQueue) pop() *models.AlertCurEvent {
if spq.len() == 0 {
return nil
}
var elem interface{}
if spq.queueHigh.Len() > 0 {
elem = spq.queueHigh.Remove(spq.queueHigh.Front())
} else if spq.queueMiddle.Len() > 0 {
elem = spq.queueMiddle.Remove(spq.queueMiddle.Front())
} else {
elem = spq.queueLow.Remove(spq.queueLow.Front())
}
event, ok := elem.(*models.AlertCurEvent)
if !ok {
return nil
}
return event
}
func (spq *SafeEventQueue) Pop() *models.AlertCurEvent {
spq.lock.Lock()
defer spq.lock.Unlock()
return spq.pop()
}
func (spq *SafeEventQueue) PopN(n int) []*models.AlertCurEvent {
spq.lock.Lock()
defer spq.lock.Unlock()
events := make([]*models.AlertCurEvent, 0, n)
count := 0
for count < n && spq.len() > 0 {
event := spq.pop()
if event != nil {
events = append(events, event)
}
count++
}
return events
}

View File

@@ -0,0 +1,157 @@
package sender
import (
"sync"
"testing"
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/stretchr/testify/assert"
)
func TestSafePriorityQueue_ConcurrentPushPop(t *testing.T) {
spq := NewSafeEventQueue(100000)
var wg sync.WaitGroup
numGoroutines := 100
numEvents := 1000
// 并发 Push
wg.Add(numGoroutines)
for i := 0; i < numGoroutines; i++ {
go func(goroutineID int) {
defer wg.Done()
for j := 0; j < numEvents; j++ {
event := &models.AlertCurEvent{
Severity: goroutineID%3 + 1,
TriggerTime: time.Now().UnixNano(),
}
spq.Push(event)
}
}(i)
}
wg.Wait()
// 检查队列长度是否正确
expectedLen := numGoroutines * numEvents
assert.Equal(t, expectedLen, spq.Len(), "Queue length mismatch after concurrent pushes")
// 并发 Pop
wg.Add(numGoroutines)
for i := 0; i < numGoroutines; i++ {
go func() {
defer wg.Done()
for {
event := spq.Pop()
if event == nil {
return
}
}
}()
}
wg.Wait()
// 最终队列应该为空
assert.Equal(t, 0, spq.Len(), "Queue should be empty after concurrent pops")
}
func TestSafePriorityQueue_ConcurrentPopMax(t *testing.T) {
spq := NewSafeEventQueue(100000)
// 添加初始数据
for i := 0; i < 1000; i++ {
spq.Push(&models.AlertCurEvent{
Severity: i%3 + 1,
TriggerTime: time.Now().UnixNano(),
})
}
var wg sync.WaitGroup
numGoroutines := 10
popMax := 100
// 并发 PopN
wg.Add(numGoroutines)
for i := 0; i < numGoroutines; i++ {
go func() {
defer wg.Done()
events := spq.PopN(popMax)
assert.LessOrEqual(t, len(events), popMax, "PopN exceeded maximum")
}()
}
wg.Wait()
// 检查队列长度是否正确
expectedRemaining := 1000 - (numGoroutines * popMax)
if expectedRemaining < 0 {
expectedRemaining = 0
}
assert.Equal(t, expectedRemaining, spq.Len(), "Queue length mismatch after concurrent PopN")
}
func TestSafePriorityQueue_ConcurrentPushPopWithDifferentSeverities(t *testing.T) {
spq := NewSafeEventQueue(100000)
var wg sync.WaitGroup
numGoroutines := 50
numEvents := 500
// 并发 Push 不同优先级的事件
wg.Add(numGoroutines)
for i := 0; i < numGoroutines; i++ {
go func(goroutineID int) {
defer wg.Done()
for j := 0; j < numEvents; j++ {
event := &models.AlertCurEvent{
Severity: goroutineID%3 + 1, // 模拟不同的 Severity
TriggerTime: time.Now().UnixNano(),
}
spq.Push(event)
}
}(i)
}
wg.Wait()
// 检查队列长度是否正确
expectedLen := numGoroutines * numEvents
assert.Equal(t, expectedLen, spq.Len(), "Queue length mismatch after concurrent pushes")
// 检查事件的顺序是否按照优先级排列
var lastEvent *models.AlertCurEvent
for spq.Len() > 0 {
event := spq.Pop()
if lastEvent != nil {
assert.LessOrEqual(t, lastEvent.Severity, event.Severity, "Events are not in correct priority order")
}
lastEvent = event
}
}
func TestSafePriorityQueue_ExceedMaxSize(t *testing.T) {
spq := NewSafeEventQueue(5)
// 插入超过最大容量的事件
for i := 0; i < 10; i++ {
spq.Push(&models.AlertCurEvent{
Severity: i % 3,
TriggerTime: int64(i),
})
}
// 验证队列的长度是否不超过 maxSize
assert.LessOrEqual(t, spq.Len(), spq.maxSize)
// 验证队列中剩余事件的内容
expectedEvents := 5
if spq.Len() < 5 {
expectedEvents = spq.Len()
}
// 检查最后存入的事件是否是按优先级排序
for i := 0; i < expectedEvents; i++ {
event := spq.Pop()
if event != nil {
assert.LessOrEqual(t, event.Severity, 2)
}
}
}

View File

@@ -0,0 +1,111 @@
package sender
import (
"container/list"
"sync"
"github.com/ccfos/nightingale/v6/models"
)
type SafeList struct {
sync.RWMutex
L *list.List
}
func NewSafeList() *SafeList {
return &SafeList{L: list.New()}
}
func (sl *SafeList) PushFront(v interface{}) *list.Element {
sl.Lock()
e := sl.L.PushFront(v)
sl.Unlock()
return e
}
func (sl *SafeList) PushFrontBatch(vs []interface{}) {
sl.Lock()
for _, item := range vs {
sl.L.PushFront(item)
}
sl.Unlock()
}
func (sl *SafeList) PopBack(max int) []*models.AlertCurEvent {
sl.Lock()
count := sl.L.Len()
if count == 0 {
sl.Unlock()
return []*models.AlertCurEvent{}
}
if count > max {
count = max
}
items := make([]*models.AlertCurEvent, 0, count)
for i := 0; i < count; i++ {
item := sl.L.Remove(sl.L.Back())
sample, ok := item.(*models.AlertCurEvent)
if ok {
items = append(items, sample)
}
}
sl.Unlock()
return items
}
func (sl *SafeList) RemoveAll() {
sl.Lock()
sl.L.Init()
sl.Unlock()
}
func (sl *SafeList) Len() int {
sl.RLock()
size := sl.L.Len()
sl.RUnlock()
return size
}
// SafeList with Limited Size
type SafeListLimited struct {
maxSize int
SL *SafeList
}
func NewSafeListLimited(maxSize int) *SafeListLimited {
return &SafeListLimited{SL: NewSafeList(), maxSize: maxSize}
}
func (sll *SafeListLimited) PopBack(max int) []*models.AlertCurEvent {
return sll.SL.PopBack(max)
}
func (sll *SafeListLimited) PushFront(v interface{}) bool {
if sll.SL.Len() >= sll.maxSize {
return false
}
sll.SL.PushFront(v)
return true
}
func (sll *SafeListLimited) PushFrontBatch(vs []interface{}) bool {
if sll.SL.Len() >= sll.maxSize {
return false
}
sll.SL.PushFrontBatch(vs)
return true
}
func (sll *SafeListLimited) RemoveAll() {
sll.SL.RemoveAll()
}
func (sll *SafeListLimited) Len() int {
return sll.SL.Len()
}

View File

@@ -16,29 +16,50 @@ type wecom struct {
Markdown wecomMarkdown `json:"markdown"`
}
var (
_ CallBacker = (*WecomSender)(nil)
)
type WecomSender struct {
tpl *template.Template
}
func (ws *WecomSender) CallBack(ctx CallBackContext) {
if len(ctx.Events) == 0 || len(ctx.CallBackURL) == 0 {
return
}
message := BuildTplMessage(models.Wecom, ws.tpl, ctx.Events)
body := wecom{
Msgtype: "markdown",
Markdown: wecomMarkdown{
Content: message,
},
}
doSendAndRecord(ctx.Ctx, ctx.CallBackURL, ctx.CallBackURL, body, "callback", ctx.Stats, ctx.Events)
}
func (ws *WecomSender) Send(ctx MessageContext) {
if len(ctx.Users) == 0 || len(ctx.Events) == 0 {
return
}
urls := ws.extract(ctx.Users)
message := BuildTplMessage(ws.tpl, ctx.Events)
for _, url := range urls {
urls, tokens := ws.extract(ctx.Users)
message := BuildTplMessage(models.Wecom, ws.tpl, ctx.Events)
for i, url := range urls {
body := wecom{
Msgtype: "markdown",
Markdown: wecomMarkdown{
Content: message,
},
}
doSend(url, body, models.Wecom, ctx.Stats)
doSendAndRecord(ctx.Ctx, url, tokens[i], body, models.Wecom, ctx.Stats, ctx.Events)
}
}
func (ws *WecomSender) extract(users []*models.User) []string {
func (ws *WecomSender) extract(users []*models.User) ([]string, []string) {
urls := make([]string, 0, len(users))
tokens := make([]string, 0, len(users))
for _, user := range users {
if token, has := user.ExtractToken(models.Wecom); has {
url := token
@@ -46,7 +67,8 @@ func (ws *WecomSender) extract(users []*models.User) []string {
url = "https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=" + token
}
urls = append(urls, url)
tokens = append(tokens, token)
}
}
return urls
return urls, tokens
}

View File

@@ -1,5 +1,11 @@
package cconf
import (
"time"
"github.com/ccfos/nightingale/v6/pkg/httpx"
)
type Center struct {
Plugins []Plugin
MetricsYamlFile string
@@ -9,6 +15,11 @@ type Center struct {
MetricDesc MetricDescType
AnonymousAccess AnonymousAccess
UseFileAssets bool
FlashDuty FlashDuty
EventHistoryGroupView bool
CleanNotifyRecordDay int
MigrateBusiGroupLabel bool
RSA httpx.RSAConfig
}
type Plugin struct {
@@ -18,6 +29,12 @@ type Plugin struct {
TypeName string `json:"plugin_type_name"`
}
type FlashDuty struct {
Api string
Headers map[string]string
Timeout time.Duration
}
type AnonymousAccess struct {
PromQuerier bool
AlertDetail bool

View File

@@ -18,20 +18,28 @@ var MetricDesc MetricDescType
// GetMetricDesc , if metric is not registered, empty string will be returned
func GetMetricDesc(lang, metric string) string {
var m map[string]string
if lang == "zh" {
m = MetricDesc.Zh
} else {
switch lang {
case "en":
m = MetricDesc.En
default:
m = MetricDesc.Zh
}
if m != nil {
if desc, has := m[metric]; has {
if desc, ok := m[metric]; ok {
return desc
}
}
return MetricDesc.CommonDesc[metric]
}
if MetricDesc.CommonDesc != nil {
if desc, ok := MetricDesc.CommonDesc[metric]; ok {
return desc
}
}
return ""
}
func LoadMetricsYaml(configDir, metricsYamlFile string) error {
fp := metricsYamlFile
if fp == "" {

View File

@@ -15,9 +15,25 @@ type Operation struct {
}
type Ops struct {
Name string `yaml:"name" json:"name"`
Cname string `yaml:"cname" json:"cname"`
Ops []string `yaml:"ops" json:"ops"`
Name string `yaml:"name" json:"name"`
Cname string `yaml:"cname" json:"cname"`
Ops []SingleOp `yaml:"ops" json:"ops"`
}
// SingleOp Name 为 op 名称Cname 为展示名称,默认英文
type SingleOp struct {
Name string `yaml:"name" json:"name"`
Cname string `yaml:"cname" json:"cname"`
}
func TransformNames(name []string, nameToName map[string]string) []string {
var ret []string
for _, n := range name {
if v, has := nameToName[n]; has {
ret = append(ret, v)
}
}
return ret
}
func LoadOpsYaml(configDir string, opsYamlFile string) error {
@@ -28,11 +44,19 @@ func LoadOpsYaml(configDir string, opsYamlFile string) error {
if !file.IsExist(fp) {
return nil
}
hash, _ := file.MD5(fp)
if hash == "2f91a9ed265cf2024e266dc1d538ee77" {
// ops.yaml 是老的默认文件,删除
file.Remove(fp)
return nil
}
return file.ReadYaml(fp, &Operations)
}
func GetAllOps(ops []Ops) []string {
var ret []string
func GetAllOps(ops []Ops) []SingleOp {
var ret []SingleOp
for _, op := range ops {
ret = append(ret, op.Ops...)
}
@@ -40,7 +64,7 @@ func GetAllOps(ops []Ops) []string {
}
func MergeOperationConf() error {
opsBuiltIn := Operation{}
var opsBuiltIn Operation
err := yaml.Unmarshal([]byte(builtInOps), &opsBuiltIn)
if err != nil {
return fmt.Errorf("cannot parse builtInOps: %s", err.Error())
@@ -61,111 +85,221 @@ func MergeOperationConf() error {
const (
builtInOps = `
ops:
- name: dashboards
cname: 仪表盘
- name: Infrastructure
cname: Infrastructure
ops:
- "/dashboards"
- "/dashboards/add"
- "/dashboards/put"
- "/dashboards/del"
- "/dashboards-built-in"
- name: /targets
cname: Host - View
- name: /targets/put
cname: Host - Modify
- name: /targets/del
cname: Host - Delete
- name: /targets/bind
cname: Host - Bind Uncategorized
- name: alert
cname: 告警规则
- name: Explorer
cname: Explorer
ops:
- "/alert-rules"
- "/alert-rules/add"
- "/alert-rules/put"
- "/alert-rules/del"
- "/alert-rules-built-in"
- name: alert-mutes
cname: 告警静默管理
ops:
- "/alert-mutes"
- "/alert-mutes/add"
- "/alert-mutes/put"
- "/alert-mutes/del"
- name: alert-subscribes
cname: 告警订阅管理
ops:
- "/alert-subscribes"
- "/alert-subscribes/add"
- "/alert-subscribes/put"
- "/alert-subscribes/del"
- name: /metric/explorer
cname: Metrics Explorer
- name: /object/explorer
cname: Quick View
- name: /metrics-built-in
cname: Built-in Metric - View
- name: /builtin-metrics/add
cname: Built-in Metric - Add
- name: /builtin-metrics/put
cname: Built-in Metric - Modify
- name: /builtin-metrics/del
cname: Built-in Metric - Delete
- name: /recording-rules
cname: Recording Rule - View
- name: /recording-rules/add
cname: Recording Rule - Add
- name: /recording-rules/put
cname: Recording Rule - Modify
- name: /recording-rules/del
cname: Recording Rule - Delete
- name: /log/explorer
cname: Logs Explorer
- name: /log/index-patterns # 前端有个管理索引模式的页面,所以需要一个权限点来控制,后面应该改成侧拉板
cname: Index Pattern - View
- name: /log/index-patterns/add
cname: Index Pattern - Add
- name: /log/index-patterns/put
cname: Index Pattern - Modify
- name: /log/index-patterns/del
cname: Index Pattern - Delete
- name: /dashboards
cname: Dashboard - View
- name: /dashboards/add
cname: Dashboard - Add
- name: /dashboards/put
cname: Dashboard - Modify
- name: /dashboards/del
cname: Dashboard - Delete
- name: /public-dashboards
cname: Dashboard - View Public
- name: alert-events
cname: 告警事件管理
- name: alerting
cname: Alerting
ops:
- "/alert-cur-events"
- "/alert-cur-events/del"
- "/alert-his-events"
- name: /alert-rules
cname: Alerting Rule - View
- name: /alert-rules/add
cname: Alerting Rule - Add
- name: /alert-rules/put
cname: Alerting Rule - Modify
- name: /alert-rules/del
cname: Alerting Rule - Delete
- name: /alert-mutes
cname: Mutting Rule - View
- name: /alert-mutes/add
cname: Mutting Rule - Add
- name: /alert-mutes/put
cname: Mutting Rule - Modify
- name: /alert-mutes/del
cname: Mutting Rule - Delete
- name: /alert-subscribes
cname: Subscribing Rule - View
- name: /alert-subscribes/add
cname: Subscribing Rule - Add
- name: /alert-subscribes/put
cname: Subscribing Rule - Modify
- name: /alert-subscribes/del
cname: Subscribing Rule - Delete
- name: /job-tpls
cname: Self-healing-Script - View
- name: /job-tpls/add
cname: Self-healing-Script - Add
- name: /job-tpls/put
cname: Self-healing-Script - Modify
- name: /job-tpls/del
cname: Self-healing-Script - Delete
- name: /job-tasks
cname: Self-healing-Job - View
- name: /job-tasks/add
cname: Self-healing-Job - Add
- name: /job-tasks/put
cname: Self-healing-Job - Modify
- name: /alert-cur-events
cname: Active Event - View
- name: /alert-cur-events/del
cname: Active Event - Delete
- name: /alert-his-events
cname: Historical Event - View
- name: recording-rules
cname: 记录规则管理
- name: Notification
cname: Notification
ops:
- "/recording-rules"
- "/recording-rules/add"
- "/recording-rules/put"
- "/recording-rules/del"
- name: /notification-rules
cname: Notification Rule - View
- name: /notification-rules/add
cname: Notification Rule - Add
- name: /notification-rules/put
cname: Notification Rule - Modify
- name: /notification-rules/del
cname: Notification Rule - Delete
- name: /notification-channels
cname: Media Type - View
- name: /notification-channels/add
cname: Media Type - Add
- name: /notification-channels/put
cname: Media Type - Modify
- name: /notification-channels/del
cname: Media Type - Delete
- name: /notification-templates
cname: Message Template - View
- name: /notification-templates/add
cname: Message Template - Add
- name: /notification-templates/put
cname: Message Template - Modify
- name: /notification-templates/del
cname: Message Template - Delete
- name: /event-pipelines
cname: Event Pipeline - View
- name: /event-pipelines/add
cname: Event Pipeline - Add
- name: /event-pipelines/put
cname: Event Pipeline - Modify
- name: /event-pipelines/del
cname: Event Pipeline - Delete
- name: /help/notification-settings # 用于控制老版本的通知设置菜单是否展示
cname: Notification Settings - View
- name: /help/notification-tpls # 用于控制老版本的通知模板菜单是否展示
cname: Notification Templates - View
- name: metric
cname: 时序指标
- name: Integrations
cname: Integrations
ops:
- "/metric/explorer"
- "/object/explorer"
- name: /datasources # 用于控制能否看到数据源列表页面的菜单。只有 Admin 才能修改、删除数据源
cname: Data Source - View
- name: /components
cname: Component - View
- name: /components/add
cname: Component - Add
- name: /components/put
cname: Component - Modify
- name: /components/del
cname: Component - Delete
- name: /embedded-products
cname: Embedded Product - View
- name: /embedded-product/add
cname: Embedded Product - Add
- name: /embedded-product/put
cname: Embedded Product - Modify
- name: /embedded-product/delete
cname: Embedded Product - Delete
- name: log
cname: 日志分析
- name: Organization
cname: Organization
ops:
- "/log/explorer"
- "/log/index-patterns"
- name: /users
cname: User - View
- name: /users/add
cname: User - Add
- name: /users/put
cname: User - Modify
- name: /users/del
cname: User - Delete
- name: /user-groups
cname: Team - View
- name: /user-groups/add
cname: Team - Add
- name: /user-groups/put
cname: Team - Modify
- name: /user-groups/del
cname: Team - Delete
- name: /busi-groups
cname: Business Group - View
- name: /busi-groups/add
cname: Business Group - Add
- name: /busi-groups/put
cname: Business Group - Modify
- name: /busi-groups/del
cname: Business Group - Delete
- name: /roles
cname: Role - View
- name: /roles/add
cname: Role - Add
- name: /roles/put
cname: Role - Modify
- name: /roles/del
cname: Role - Delete
- name: targets
cname: 基础设施
- name: System Settings
cname: System Settings
ops:
- "/targets"
- "/targets/add"
- "/targets/put"
- "/targets/del"
- name: /system/site-settings # 仅用于控制能否展示菜单,只有 Admin 才能修改、删除
cname: View Site Settings
- name: /system/variable-settings
cname: View Variable Settings
- name: /system/sso-settings
cname: View SSO Settings
- name: /system/alerting-engines
cname: View Alerting Engines
- name: /system/version
cname: View Product Version
- name: job
cname: 任务管理
ops:
- "/job-tpls"
- "/job-tpls/add"
- "/job-tpls/put"
- "/job-tpls/del"
- "/job-tasks"
- "/job-tasks/add"
- "/job-tasks/put"
- name: user
cname: 用户管理
ops:
- "/users"
- "/user-groups"
- "/user-groups/add"
- "/user-groups/put"
- "/user-groups/del"
- name: busi-groups
cname: 业务分组管理
ops:
- "/busi-groups"
- "/busi-groups/add"
- "/busi-groups/put"
- "/busi-groups/del"
- name: system
cname: 系统信息
ops:
- "/help/version"
- "/help/servers"
- "/help/source"
- "/help/sso"
- "/help/notification-tpls"
- "/help/notification-settings"
- "/help/migrate"
`
)

View File

@@ -25,4 +25,40 @@ var Plugins = []Plugin{
Type: "tdengine",
TypeName: "TDengine",
},
{
Id: 5,
Category: "logging",
Type: "ck",
TypeName: "ClickHouse",
},
{
Id: 6,
Category: "timeseries",
Type: "mysql",
TypeName: "MySQL",
},
{
Id: 7,
Category: "timeseries",
Type: "pgsql",
TypeName: "PostgreSQL",
},
{
Id: 8,
Category: "logging",
Type: "doris",
TypeName: "Doris",
},
{
Id: 9,
Category: "logging",
Type: "opensearch",
TypeName: "OpenSearch",
},
{
Id: 10,
Category: "logging",
Type: "victorialogs",
TypeName: "VictoriaLogs",
},
}

View File

@@ -0,0 +1,105 @@
package rsa
import (
"os"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/httpx"
"github.com/ccfos/nightingale/v6/pkg/secu"
"github.com/pkg/errors"
"github.com/toolkits/pkg/file"
"github.com/toolkits/pkg/logger"
)
func InitRSAConfig(ctx *ctx.Context, rsaConfig *httpx.RSAConfig) error {
// 1.Load RSA keys from Database
rsaPassWord, err := models.ConfigsGet(ctx, models.RSA_PASSWORD)
if err != nil {
return errors.WithMessagef(err, "cannot query config(%s)", models.RSA_PASSWORD)
}
privateKeyVal, err := models.ConfigsGet(ctx, models.RSA_PRIVATE_KEY)
if err != nil {
return errors.WithMessagef(err, "cannot query config(%s)", models.RSA_PRIVATE_KEY)
}
publicKeyVal, err := models.ConfigsGet(ctx, models.RSA_PUBLIC_KEY)
if err != nil {
return errors.WithMessagef(err, "cannot query config(%s)", models.RSA_PUBLIC_KEY)
}
if rsaPassWord != "" && privateKeyVal != "" && publicKeyVal != "" {
rsaConfig.RSAPassWord = rsaPassWord
rsaConfig.RSAPrivateKey = []byte(privateKeyVal)
rsaConfig.RSAPublicKey = []byte(publicKeyVal)
return nil
}
// 2.Read RSA configuration from file if exists
if file.IsExist(rsaConfig.RSAPrivateKeyPath) && file.IsExist(rsaConfig.RSAPublicKeyPath) {
//password already read from config
rsaConfig.RSAPrivateKey, rsaConfig.RSAPublicKey, err = readConfigFile(rsaConfig)
if err != nil {
return errors.WithMessage(err, "failed to read rsa config from file")
}
return nil
}
// 3.Generate RSA keys if not exist
rsaConfig.RSAPassWord, rsaConfig.RSAPrivateKey, rsaConfig.RSAPublicKey, err = initRSAKeyPairs(ctx, rsaConfig.RSAPassWord)
if err != nil {
return errors.WithMessage(err, "failed to generate rsa key pair")
}
return nil
}
func initRSAKeyPairs(ctx *ctx.Context, rsaPassWord string) (password string, privateByte, publicByte []byte, err error) {
// Generate RSA keys
// Generate RSA password
if rsaPassWord != "" {
logger.Debug("Using existing RSA password")
password = rsaPassWord
err = models.ConfigsSet(ctx, models.RSA_PASSWORD, password)
if err != nil {
err = errors.WithMessagef(err, "failed to set config(%s)", models.RSA_PASSWORD)
return
}
} else {
password, err = models.InitRSAPassWord(ctx)
if err != nil {
err = errors.WithMessage(err, "failed to generate rsa password")
return
}
}
privateByte, publicByte, err = secu.GenerateRsaKeyPair(password)
if err != nil {
err = errors.WithMessage(err, "failed to generate rsa key pair")
return
}
// Save generated RSA keys
err = models.ConfigsSet(ctx, models.RSA_PRIVATE_KEY, string(privateByte))
if err != nil {
err = errors.WithMessagef(err, "failed to set config(%s)", models.RSA_PRIVATE_KEY)
return
}
err = models.ConfigsSet(ctx, models.RSA_PUBLIC_KEY, string(publicByte))
if err != nil {
err = errors.WithMessagef(err, "failed to set config(%s)", models.RSA_PUBLIC_KEY)
return
}
return
}
func readConfigFile(rsaConfig *httpx.RSAConfig) (privateBuf, publicBuf []byte, err error) {
publicBuf, err = os.ReadFile(rsaConfig.RSAPublicKeyPath)
if err != nil {
err = errors.WithMessagef(err, "could not read RSAPublicKeyPath %q", rsaConfig.RSAPublicKeyPath)
return
}
privateBuf, err = os.ReadFile(rsaConfig.RSAPrivateKeyPath)
if err != nil {
err = errors.WithMessagef(err, "could not read RSAPrivateKeyPath %q", rsaConfig.RSAPrivateKeyPath)
}
return
}

View File

@@ -2,34 +2,43 @@ package center
import (
"context"
"encoding/json"
"fmt"
"github.com/ccfos/nightingale/v6/dscache"
"github.com/toolkits/pkg/logger"
"github.com/ccfos/nightingale/v6/alert"
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/alert/dispatch"
"github.com/ccfos/nightingale/v6/alert/process"
alertrt "github.com/ccfos/nightingale/v6/alert/router"
"github.com/ccfos/nightingale/v6/center/cconf"
"github.com/ccfos/nightingale/v6/center/cstats"
"github.com/ccfos/nightingale/v6/center/cconf/rsa"
"github.com/ccfos/nightingale/v6/center/integration"
"github.com/ccfos/nightingale/v6/center/metas"
centerrt "github.com/ccfos/nightingale/v6/center/router"
"github.com/ccfos/nightingale/v6/center/sso"
"github.com/ccfos/nightingale/v6/conf"
"github.com/ccfos/nightingale/v6/cron"
"github.com/ccfos/nightingale/v6/dumper"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/models/migrate"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/flashduty"
"github.com/ccfos/nightingale/v6/pkg/httpx"
"github.com/ccfos/nightingale/v6/pkg/i18nx"
"github.com/ccfos/nightingale/v6/pkg/logx"
"github.com/ccfos/nightingale/v6/pkg/macros"
"github.com/ccfos/nightingale/v6/pkg/version"
"github.com/ccfos/nightingale/v6/prom"
"github.com/ccfos/nightingale/v6/pushgw/idents"
pushgwrt "github.com/ccfos/nightingale/v6/pushgw/router"
"github.com/ccfos/nightingale/v6/pushgw/writer"
"github.com/ccfos/nightingale/v6/storage"
"github.com/ccfos/nightingale/v6/tdengine"
alertrt "github.com/ccfos/nightingale/v6/alert/router"
centerrt "github.com/ccfos/nightingale/v6/center/router"
pushgwrt "github.com/ccfos/nightingale/v6/pushgw/router"
"github.com/flashcatcloud/ibex/src/cmd/ibex"
)
func Initialize(configDir string, cryptoKey string) (func(), error) {
@@ -43,71 +52,180 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
cconf.MergeOperationConf()
if config.Alert.Heartbeat.EngineName == "" {
config.Alert.Heartbeat.EngineName = "default"
}
logxClean, err := logx.Init(config.Log)
if err != nil {
return nil, err
}
i18nx.Init(configDir)
cstats.Init()
flashduty.Init(config.Center.FlashDuty)
db, err := storage.New(config.DB)
if err != nil {
return nil, err
}
ctx := ctx.NewContext(context.Background(), db, true)
models.InitRoot(ctx)
migrate.Migrate(db)
httpx.InitRSAConfig(&config.HTTP.RSA)
isRootInit := models.InitRoot(ctx)
redis, err := storage.NewRedis(config.Redis)
config.HTTP.JWTAuth.SigningKey = models.InitJWTSigningKey(ctx)
err = rsa.InitRSAConfig(ctx, &config.HTTP.RSA)
if err != nil {
return nil, err
}
go integration.Init(ctx, config.Center.BuiltinIntegrationsDir)
var redis storage.Redis
redis, err = storage.NewRedis(config.Redis)
if err != nil {
return nil, err
}
metas := metas.New(redis)
idents := idents.New(ctx)
idents := idents.New(ctx, redis, config.Pushgw)
syncStats := memsto.NewSyncStats()
alertStats := astats.NewSyncStats()
sso := sso.Init(config.Center, ctx)
if config.Center.MigrateBusiGroupLabel || models.CanMigrateBg(ctx) {
models.MigrateBg(ctx, config.Pushgw.BusiGroupLabelKey)
}
if models.CanMigrateEP(ctx) {
models.MigrateEP(ctx)
}
// 初始化 siteUrl如果为空则设置默认值
InitSiteUrl(ctx, config.Alert.Heartbeat.IP, config.HTTP.Port)
configCache := memsto.NewConfigCache(ctx, syncStats, config.HTTP.RSA.RSAPrivateKey, config.HTTP.RSA.RSAPassWord)
busiGroupCache := memsto.NewBusiGroupCache(ctx, syncStats)
targetCache := memsto.NewTargetCache(ctx, syncStats, redis)
dsCache := memsto.NewDatasourceCache(ctx, syncStats)
alertMuteCache := memsto.NewAlertMuteCache(ctx, syncStats)
alertRuleCache := memsto.NewAlertRuleCache(ctx, syncStats)
notifyConfigCache := memsto.NewNotifyConfigCache(ctx)
notifyConfigCache := memsto.NewNotifyConfigCache(ctx, configCache)
userCache := memsto.NewUserCache(ctx, syncStats)
userGroupCache := memsto.NewUserGroupCache(ctx, syncStats)
taskTplCache := memsto.NewTaskTplCache(ctx)
configCvalCache := memsto.NewCvalCache(ctx, syncStats)
notifyRuleCache := memsto.NewNotifyRuleCache(ctx, syncStats)
notifyChannelCache := memsto.NewNotifyChannelCache(ctx, syncStats)
messageTemplateCache := memsto.NewMessageTemplateCache(ctx, syncStats)
userTokenCache := memsto.NewUserTokenCache(ctx, syncStats)
promClients := prom.NewPromClient(ctx, config.Alert.Heartbeat)
tdengineClients := tdengine.NewTdengineClient(ctx, config.Alert.Heartbeat)
sso := sso.Init(config.Center, ctx, configCache)
promClients := prom.NewPromClient(ctx)
dispatch.InitRegisterQueryFunc(promClients)
externalProcessors := process.NewExternalProcessors()
alert.Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache, alertRuleCache, notifyConfigCache, dsCache, ctx, promClients, tdengineClients, userCache, userGroupCache)
macros.RegisterMacro(macros.MacroInVain)
dscache.Init(ctx, false)
alert.Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache, alertRuleCache, notifyConfigCache, taskTplCache, dsCache, ctx, promClients, userCache, userGroupCache, notifyRuleCache, notifyChannelCache, messageTemplateCache, configCvalCache)
writers := writer.NewWriters(config.Pushgw)
go version.GetGithubVersion()
alertrtRouter := alertrt.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors)
centerRouter := centerrt.New(config.HTTP, config.Center, cconf.Operations, dsCache, notifyConfigCache, promClients, tdengineClients,
redis, sso, ctx, metas, idents, targetCache, userCache, userGroupCache)
pushgwRouter := pushgwrt.New(config.HTTP, config.Pushgw, targetCache, busiGroupCache, idents, writers, ctx)
go cron.CleanNotifyRecord(ctx, config.Center.CleanNotifyRecordDay)
r := httpx.GinEngine(config.Global.RunMode, config.HTTP)
alertrtRouter := alertrt.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors)
centerRouter := centerrt.New(config.HTTP, config.Center, config.Alert, config.Ibex,
cconf.Operations, dsCache, notifyConfigCache, promClients,
redis, sso, ctx, metas, idents, targetCache, userCache, userGroupCache, userTokenCache)
pushgwRouter := pushgwrt.New(config.HTTP, config.Pushgw, config.Alert, targetCache, busiGroupCache, idents, metas, writers, ctx)
r := httpx.GinEngine(config.Global.RunMode, config.HTTP, configCvalCache.PrintBodyPaths, configCvalCache.PrintAccessLog)
centerRouter.Config(r)
alertrtRouter.Config(r)
pushgwRouter.Config(r)
dumper.ConfigRouter(r)
if config.Ibex.Enable {
migrate.MigrateIbexTables(db)
ibex.ServerStart(true, db, redis, config.HTTP.APIForService.BasicAuth, config.Alert.Heartbeat, &config.CenterApi, r, centerRouter, config.Ibex, config.HTTP.Port)
}
httpClean := httpx.Init(config.HTTP, r)
fmt.Printf("please view n9e at http://%v:%v\n", config.Alert.Heartbeat.IP, config.HTTP.Port)
if isRootInit {
fmt.Println("username/password: root/root.2020")
}
return func() {
logxClean()
httpClean()
}, nil
}
// initSiteUrl 初始化 site_info 中的 site_url如果为空则使用服务器IP和端口设置默认值
func InitSiteUrl(ctx *ctx.Context, serverIP string, serverPort int) {
// 构造默认的 SiteUrl
defaultSiteUrl := fmt.Sprintf("http://%s:%d", serverIP, serverPort)
// 获取现有的 site_info 配置
siteInfoStr, err := models.ConfigsGet(ctx, "site_info")
if err != nil {
logger.Errorf("failed to get site_info config: %v", err)
return
}
// 如果 site_info 不存在,创建新的
if siteInfoStr == "" {
newSiteInfo := memsto.SiteInfo{
SiteUrl: defaultSiteUrl,
}
siteInfoBytes, err := json.Marshal(newSiteInfo)
if err != nil {
logger.Errorf("failed to marshal site_info: %v", err)
return
}
err = models.ConfigsSet(ctx, "site_info", string(siteInfoBytes))
if err != nil {
logger.Errorf("failed to set site_info: %v", err)
return
}
logger.Infof("initialized site_url with default value: %s", defaultSiteUrl)
return
}
// 检查现有的 site_info 中的 site_url 字段
var existingSiteInfo memsto.SiteInfo
err = json.Unmarshal([]byte(siteInfoStr), &existingSiteInfo)
if err != nil {
logger.Errorf("failed to unmarshal site_info: %v", err)
return
}
// 如果 site_url 已经有值,则不需要初始化
if existingSiteInfo.SiteUrl != "" {
return
}
// 设置 site_url
existingSiteInfo.SiteUrl = defaultSiteUrl
siteInfoBytes, err := json.Marshal(existingSiteInfo)
if err != nil {
logger.Errorf("failed to marshal updated site_info: %v", err)
return
}
err = models.ConfigsSet(ctx, "site_info", string(siteInfoBytes))
if err != nil {
logger.Errorf("failed to update site_info: %v", err)
return
}
logger.Infof("initialized site_url with default value: %s", defaultSiteUrl)
}

View File

@@ -6,40 +6,49 @@ import (
"github.com/prometheus/client_golang/prometheus"
)
const Service = "n9e-center"
const (
namespace = "n9e"
subsystem = "center"
)
var (
labels = []string{"service", "code", "path", "method"}
uptime = prometheus.NewCounterVec(
uptime = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "uptime",
Help: "HTTP service uptime.",
}, []string{"service"},
)
RequestCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "http_request_count_total",
Help: "Total number of HTTP requests made.",
}, labels,
Namespace: namespace,
Subsystem: subsystem,
Name: "uptime",
Help: "HTTP service uptime.",
},
)
RequestDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Buckets: []float64{.01, .1, 1, 10},
Name: "http_request_duration_seconds",
Help: "HTTP request latencies in seconds.",
}, labels,
Namespace: namespace,
Subsystem: subsystem,
Buckets: prometheus.DefBuckets,
Name: "http_request_duration_seconds",
Help: "HTTP request latencies in seconds.",
}, []string{"code", "path", "method"},
)
RedisOperationLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "redis_operation_latency_seconds",
Help: "Histogram of latencies for Redis operations",
Buckets: []float64{.005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5},
},
[]string{"operation", "status"},
)
)
func Init() {
func init() {
// Register the summary and the histogram with Prometheus's default registry.
prometheus.MustRegister(
uptime,
RequestCounter,
RequestDuration,
RedisOperationLatency,
)
go recordUptime()
@@ -48,6 +57,6 @@ func Init() {
// recordUptime increases service uptime per second.
func recordUptime() {
for range time.Tick(time.Second) {
uptime.WithLabelValues(Service).Inc()
uptime.Inc()
}
}

652
center/integration/init.go Normal file
View File

@@ -0,0 +1,652 @@
package integration
import (
"encoding/json"
"path"
"sort"
"strings"
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/pkg/errors"
"github.com/toolkits/pkg/container/set"
"github.com/toolkits/pkg/file"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/runner"
)
const SYSTEM = "system"
var BuiltinPayloadInFile *BuiltinPayloadInFileType
type BuiltinPayloadInFileType struct {
Data map[uint64]map[string]map[string][]*models.BuiltinPayload // map[component_id]map[type]map[cate][]*models.BuiltinPayload
IndexData map[int64]*models.BuiltinPayload // map[uuid]payload
BuiltinMetrics map[string]*models.BuiltinMetric
}
func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
BuiltinPayloadInFile = NewBuiltinPayloadInFileType()
err := models.InitBuiltinPayloads(ctx)
if err != nil {
logger.Warning("init old builtinPayloads fail ", err)
return
}
if res, err := models.ConfigsSelectByCkey(ctx, "disable_integration_init"); err != nil {
logger.Error("fail to get value 'disable_integration_init' from configs", err)
return
} else if len(res) != 0 {
logger.Info("disable_integration_init is set, skip integration init")
return
}
fp := builtinIntegrationsDir
if fp == "" {
fp = path.Join(runner.Cwd, "integrations")
}
// var fileList []string
dirList, err := file.DirsUnder(fp)
if err != nil {
logger.Warning("read builtin component dir fail ", err)
return
}
for _, dir := range dirList {
// components icon
componentDir := fp + "/" + dir
component := models.BuiltinComponent{
Ident: dir,
}
// get logo name
// /api/n9e/integrations/icon/AliYun/aliyun.png
files, err := file.FilesUnder(componentDir + "/icon")
if err == nil && len(files) > 0 {
component.Logo = "/api/n9e/integrations/icon/" + component.Ident + "/" + files[0]
} else if err != nil {
logger.Warningf("read builtin component icon dir fail %s %v", component.Ident, err)
}
// get description
files, err = file.FilesUnder(componentDir + "/markdown")
if err == nil && len(files) > 0 {
var readmeFile string
for _, file := range files {
if strings.HasSuffix(strings.ToLower(file), "md") {
readmeFile = componentDir + "/markdown/" + file
break
}
}
if readmeFile != "" {
component.Readme, _ = file.ReadString(readmeFile)
}
} else if err != nil {
logger.Warningf("read builtin component markdown dir fail %s %v", component.Ident, err)
}
exists, _ := models.BuiltinComponentExists(ctx, &component)
if !exists {
err = component.Add(ctx, SYSTEM)
if err != nil {
logger.Warning("add builtin component fail ", component, err)
continue
}
} else {
old, err := models.BuiltinComponentGet(ctx, "ident = ?", component.Ident)
if err != nil {
logger.Warning("get builtin component fail ", component, err)
continue
}
if old == nil {
logger.Warning("get builtin component nil ", component)
continue
}
if old.UpdatedBy == SYSTEM {
now := time.Now().Unix()
old.CreatedAt = now
old.UpdatedAt = now
old.Readme = component.Readme
old.UpdatedBy = SYSTEM
err = models.DB(ctx).Model(old).Select("*").Updates(old).Error
if err != nil {
logger.Warning("update builtin component fail ", old, err)
}
}
component.ID = old.ID
}
// delete uuid is empty
err = models.DB(ctx).Exec("delete from builtin_payloads where uuid = 0 and type != 'collect' and (updated_by = 'system' or updated_by = '')").Error
if err != nil {
logger.Warning("delete builtin payloads fail ", err)
}
// delete builtin metrics uuid is empty
err = models.DB(ctx).Exec("delete from builtin_metrics where uuid = 0 and (updated_by = 'system' or updated_by = '')").Error
if err != nil {
logger.Warning("delete builtin metrics fail ", err)
}
// 删除 uuid%1000 不为 0 uuid > 1000000000000000000 且 type 为 dashboard 的记录
err = models.DB(ctx).Exec("delete from builtin_payloads where uuid%1000 != 0 and uuid > 1000000000000000000 and type = 'dashboard' and updated_by = 'system'").Error
if err != nil {
logger.Warning("delete builtin payloads fail ", err)
}
// alerts
files, err = file.FilesUnder(componentDir + "/alerts")
if err == nil && len(files) > 0 {
for _, f := range files {
fp := componentDir + "/alerts/" + f
bs, err := file.ReadBytes(fp)
if err != nil {
logger.Warning("read builtin component alerts file fail ", f, err)
continue
}
alerts := []models.AlertRule{}
err = json.Unmarshal(bs, &alerts)
if err != nil {
logger.Warning("parse builtin component alerts file fail ", f, err)
continue
}
newAlerts := []models.AlertRule{}
for _, alert := range alerts {
if alert.UUID == 0 {
time.Sleep(time.Microsecond)
alert.UUID = time.Now().UnixMicro()
}
newAlerts = append(newAlerts, alert)
content, err := json.Marshal(alert)
if err != nil {
logger.Warning("marshal builtin alert fail ", alert, err)
continue
}
cate := strings.Replace(f, ".json", "", -1)
builtinAlert := models.BuiltinPayload{
ComponentID: component.ID,
Type: "alert",
Cate: cate,
Name: alert.Name,
Tags: alert.AppendTags,
Content: string(content),
UUID: alert.UUID,
ID: alert.UUID,
CreatedBy: SYSTEM,
UpdatedBy: SYSTEM,
}
BuiltinPayloadInFile.AddBuiltinPayload(&builtinAlert)
}
}
}
// dashboards
files, err = file.FilesUnder(componentDir + "/dashboards")
if err == nil && len(files) > 0 {
for _, f := range files {
fp := componentDir + "/dashboards/" + f
bs, err := file.ReadBytes(fp)
if err != nil {
logger.Warning("read builtin component dashboards file fail ", f, err)
continue
}
dashboard := BuiltinBoard{}
err = json.Unmarshal(bs, &dashboard)
if err != nil {
logger.Warning("parse builtin component dashboards file fail ", f, err)
continue
}
if dashboard.UUID == 0 {
time.Sleep(time.Microsecond)
dashboard.UUID = time.Now().UnixMicro()
// 补全文件中的 uuid
bs, err = json.MarshalIndent(dashboard, "", " ")
if err != nil {
logger.Warning("marshal builtin dashboard fail ", dashboard, err)
continue
}
_, err = file.WriteBytes(fp, bs)
if err != nil {
logger.Warning("write builtin dashboard file fail ", f, err)
}
}
content, err := json.Marshal(dashboard)
if err != nil {
logger.Warning("marshal builtin dashboard fail ", dashboard, err)
continue
}
builtinDashboard := models.BuiltinPayload{
ComponentID: component.ID,
Type: "dashboard",
Cate: "",
Name: dashboard.Name,
Tags: dashboard.Tags,
Note: dashboard.Note,
Content: string(content),
UUID: dashboard.UUID,
ID: dashboard.UUID,
CreatedBy: SYSTEM,
UpdatedBy: SYSTEM,
}
BuiltinPayloadInFile.AddBuiltinPayload(&builtinDashboard)
}
} else if err != nil {
logger.Warningf("read builtin component dash dir fail %s %v", component.Ident, err)
}
// metrics
files, err = file.FilesUnder(componentDir + "/metrics")
if err == nil && len(files) > 0 {
for _, f := range files {
fp := componentDir + "/metrics/" + f
bs, err := file.ReadBytes(fp)
if err != nil {
logger.Warning("read builtin component metrics file fail", f, err)
continue
}
metrics := []models.BuiltinMetric{}
err = json.Unmarshal(bs, &metrics)
if err != nil {
logger.Warning("parse builtin component metrics file fail", f, err)
continue
}
for _, metric := range metrics {
if metric.UUID == 0 {
time.Sleep(time.Microsecond)
metric.UUID = time.Now().UnixMicro()
}
metric.ID = metric.UUID
metric.CreatedBy = SYSTEM
metric.UpdatedBy = SYSTEM
BuiltinPayloadInFile.BuiltinMetrics[metric.Expression] = &metric
}
}
} else if err != nil {
logger.Warningf("read builtin component metrics dir fail %s %v", component.Ident, err)
}
}
}
type BuiltinBoard struct {
Id int64 `json:"id" gorm:"primaryKey"`
GroupId int64 `json:"group_id"`
Name string `json:"name"`
Ident string `json:"ident"`
Tags string `json:"tags"`
Note string `json:"note"`
CreateAt int64 `json:"create_at"`
CreateBy string `json:"create_by"`
UpdateAt int64 `json:"update_at"`
UpdateBy string `json:"update_by"`
Configs interface{} `json:"configs" gorm:"-"`
Public int `json:"public"` // 0: false, 1: true
PublicCate int `json:"public_cate"` // 0: anonymous, 1: login, 2: busi
Bgids []int64 `json:"bgids" gorm:"-"`
BuiltIn int `json:"built_in"` // 0: false, 1: true
Hide int `json:"hide"` // 0: false, 1: true
UUID int64 `json:"uuid"`
}
func NewBuiltinPayloadInFileType() *BuiltinPayloadInFileType {
return &BuiltinPayloadInFileType{
Data: make(map[uint64]map[string]map[string][]*models.BuiltinPayload),
IndexData: make(map[int64]*models.BuiltinPayload),
BuiltinMetrics: make(map[string]*models.BuiltinMetric),
}
}
func (b *BuiltinPayloadInFileType) AddBuiltinPayload(bp *models.BuiltinPayload) {
if _, exists := b.Data[bp.ComponentID]; !exists {
b.Data[bp.ComponentID] = make(map[string]map[string][]*models.BuiltinPayload)
}
bpInType := b.Data[bp.ComponentID]
if _, exists := bpInType[bp.Type]; !exists {
bpInType[bp.Type] = make(map[string][]*models.BuiltinPayload)
}
bpInCate := bpInType[bp.Type]
if _, exists := bpInCate[bp.Cate]; !exists {
bpInCate[bp.Cate] = make([]*models.BuiltinPayload, 0)
}
bpInCate[bp.Cate] = append(bpInCate[bp.Cate], bp)
b.IndexData[bp.UUID] = bp
}
func (b *BuiltinPayloadInFileType) GetComponentIdentByCate(typ, cate string) string {
for _, source := range b.Data {
if source == nil {
continue
}
typeMap, exists := source[typ]
if !exists {
continue
}
payloads, exists := typeMap[cate]
if !exists {
continue
}
if len(payloads) > 0 {
return payloads[0].Component
}
}
return ""
}
func (b *BuiltinPayloadInFileType) GetBuiltinPayload(typ, cate, query string, componentId uint64) ([]*models.BuiltinPayload, error) {
var result []*models.BuiltinPayload
source := b.Data[componentId]
if source == nil {
return nil, nil
}
typeMap, exists := source[typ]
if !exists {
return nil, nil
}
if cate != "" {
payloads, exists := typeMap[cate]
if !exists {
return nil, nil
}
result = append(result, filterByQuery(payloads, query)...)
} else {
for _, payloads := range typeMap {
result = append(result, filterByQuery(payloads, query)...)
}
}
if len(result) > 0 {
sort.Slice(result, func(i, j int) bool {
return result[i].Name < result[j].Name
})
}
return result, nil
}
func (b *BuiltinPayloadInFileType) GetBuiltinPayloadCates(typ string, componentId uint64) ([]string, error) {
var result []string
source := b.Data[componentId]
if source == nil {
return result, nil
}
typeData := source[typ]
if typeData == nil {
return result, nil
}
for cate := range typeData {
result = append(result, cate)
}
sort.Strings(result)
return result, nil
}
func filterByQuery(payloads []*models.BuiltinPayload, query string) []*models.BuiltinPayload {
if query == "" {
return payloads
}
queryLower := strings.ToLower(query)
var filtered []*models.BuiltinPayload
for _, p := range payloads {
if strings.Contains(strings.ToLower(p.Name), queryLower) || strings.Contains(strings.ToLower(p.Tags), queryLower) {
filtered = append(filtered, p)
}
}
return filtered
}
func (b *BuiltinPayloadInFileType) BuiltinMetricGets(metricsInDB []*models.BuiltinMetric, lang, collector, typ, query, unit string, limit, offset int) ([]*models.BuiltinMetric, int, error) {
var filteredMetrics []*models.BuiltinMetric
expressionSet := set.NewStringSet()
builtinMetricsByDB := convertBuiltinMetricByDB(metricsInDB)
builtinMetricsMap := make(map[string]*models.BuiltinMetric)
for expression, metric := range builtinMetricsByDB {
builtinMetricsMap[expression] = metric
}
for expression, metric := range b.BuiltinMetrics {
builtinMetricsMap[expression] = metric
}
for _, metric := range builtinMetricsMap {
if !applyFilter(metric, collector, typ, query, unit) {
continue
}
// Skip if expression is already in db cache
// NOTE: 忽略重复的expression特别的在旧版本中用户可能已经创建了重复的metrics需要覆盖掉ByFile中相同的Metrics
// NOTE: Ignore duplicate expressions, especially in the old version, users may have created duplicate metrics,
if expressionSet.Exists(metric.Expression) {
continue
}
// Add db expression in set.
expressionSet.Add(metric.Expression)
// Apply language
trans, err := getTranslationWithLanguage(metric, lang)
if err != nil {
logger.Errorf("Error getting translation for metric %s: %v", metric.Name, err)
continue // Skip if translation not found
}
metric.Name = trans.Name
metric.Note = trans.Note
filteredMetrics = append(filteredMetrics, metric)
}
// Sort metrics
sort.Slice(filteredMetrics, func(i, j int) bool {
if filteredMetrics[i].Collector != filteredMetrics[j].Collector {
return filteredMetrics[i].Collector < filteredMetrics[j].Collector
}
if filteredMetrics[i].Typ != filteredMetrics[j].Typ {
return filteredMetrics[i].Typ < filteredMetrics[j].Typ
}
return filteredMetrics[i].Expression < filteredMetrics[j].Expression
})
totalCount := len(filteredMetrics)
// Validate parameters
if offset < 0 {
offset = 0
}
if limit < 0 {
limit = 0
}
// Handle edge cases
if offset >= totalCount || limit == 0 {
return []*models.BuiltinMetric{}, totalCount, nil
}
// Apply pagination
end := offset + limit
if end > totalCount {
end = totalCount
}
return filteredMetrics[offset:end], totalCount, nil
}
func (b *BuiltinPayloadInFileType) BuiltinMetricTypes(lang, collector, query string) []string {
typeSet := set.NewStringSet()
for _, metric := range b.BuiltinMetrics {
if !applyFilter(metric, collector, "", query, "") {
continue
}
typeSet.Add(metric.Typ)
}
return typeSet.ToSlice()
}
func (b *BuiltinPayloadInFileType) BuiltinMetricCollectors(lang, typ, query string) []string {
collectorSet := set.NewStringSet()
for _, metric := range b.BuiltinMetrics {
if !applyFilter(metric, "", typ, query, "") {
continue
}
collectorSet.Add(metric.Collector)
}
return collectorSet.ToSlice()
}
func applyFilter(metric *models.BuiltinMetric, collector, typ, query, unit string) bool {
if collector != "" && collector != metric.Collector {
return false
}
if typ != "" && typ != metric.Typ {
return false
}
if unit != "" && !containsUnit(unit, metric.Unit) {
return false
}
if query != "" && !applyQueryFilter(metric, query) {
return false
}
return true
}
func containsUnit(unit, metricUnit string) bool {
us := strings.Split(unit, ",")
for _, u := range us {
if u == metricUnit {
return true
}
}
return false
}
func applyQueryFilter(metric *models.BuiltinMetric, query string) bool {
qs := strings.Split(query, " ")
for _, q := range qs {
if strings.HasPrefix(q, "-") {
q = strings.TrimPrefix(q, "-")
if strings.Contains(metric.Name, q) || strings.Contains(metric.Note, q) || strings.Contains(metric.Expression, q) {
return false
}
} else {
if !strings.Contains(metric.Name, q) && !strings.Contains(metric.Note, q) && !strings.Contains(metric.Expression, q) {
return false
}
}
}
return true
}
func getTranslationWithLanguage(bm *models.BuiltinMetric, lang string) (*models.Translation, error) {
var defaultTranslation *models.Translation
for _, t := range bm.Translation {
if t.Lang == lang {
return &t, nil
}
if t.Lang == "en_US" {
defaultTranslation = &t
}
}
if defaultTranslation != nil {
return defaultTranslation, nil
}
return nil, errors.Errorf("translation not found for metric %s", bm.Name)
}
func convertBuiltinMetricByDB(metricsInDB []*models.BuiltinMetric) map[string]*models.BuiltinMetric {
builtinMetricsByDB := make(map[string]*models.BuiltinMetric)
builtinMetricsByDBList := make(map[string][]*models.BuiltinMetric)
for _, metric := range metricsInDB {
builtinMetrics, ok := builtinMetricsByDBList[metric.Expression]
if !ok {
builtinMetrics = []*models.BuiltinMetric{}
}
builtinMetrics = append(builtinMetrics, metric)
builtinMetricsByDBList[metric.Expression] = builtinMetrics
}
for expression, builtinMetrics := range builtinMetricsByDBList {
if len(builtinMetrics) == 0 {
continue
}
// NOTE: 为兼容旧版本用户已经创建的 metrics同时将修改 metrics 收敛到同一个记录上,
// 我们选择使用 expression 相同但是 id 最小的 metric 记录作为主要的 Metric。
sort.Slice(builtinMetrics, func(i, j int) bool {
return builtinMetrics[i].ID < builtinMetrics[j].ID
})
currentBuiltinMetric := builtinMetrics[0]
// User has no customized translation, so we can merge it
if len(currentBuiltinMetric.Translation) == 0 {
translationMap := make(map[string]models.Translation)
for _, bm := range builtinMetrics {
for _, t := range getDefaultTranslation(bm) {
translationMap[t.Lang] = t
}
}
currentBuiltinMetric.Translation = make([]models.Translation, 0, len(translationMap))
for _, t := range translationMap {
currentBuiltinMetric.Translation = append(currentBuiltinMetric.Translation, t)
}
}
builtinMetricsByDB[expression] = currentBuiltinMetric
}
return builtinMetricsByDB
}
func getDefaultTranslation(bm *models.BuiltinMetric) []models.Translation {
if len(bm.Translation) != 0 {
return bm.Translation
}
return []models.Translation{{
Lang: bm.Lang,
Name: bm.Name,
Note: bm.Note,
}}
}

View File

@@ -2,9 +2,11 @@ package metas
import (
"context"
"encoding/json"
"sync"
"time"
"github.com/ccfos/nightingale/v6/center/cstats"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/storage"
@@ -90,15 +92,49 @@ func (s *Set) updateMeta(items map[string]models.HostMeta) {
}
func (s *Set) updateTargets(m map[string]models.HostMeta) error {
if s.redis == nil {
logger.Warningf("redis is nil")
return nil
}
count := int64(len(m))
if count == 0 {
return nil
}
newMap := make(map[string]interface{}, count)
extendMap := make(map[string]interface{})
for ident, meta := range m {
if meta.ExtendInfo != nil {
extendMeta := meta.ExtendInfo
meta.ExtendInfo = make(map[string]interface{})
extendMetaStr, err := json.Marshal(extendMeta)
if err != nil {
return err
}
extendMap[models.WrapExtendIdent(ident)] = extendMetaStr
}
newMap[models.WrapIdent(ident)] = meta
}
start := time.Now()
err := storage.MSet(context.Background(), s.redis, newMap)
if err != nil {
cstats.RedisOperationLatency.WithLabelValues("mset_target_meta", "fail").Observe(time.Since(start).Seconds())
return err
} else {
cstats.RedisOperationLatency.WithLabelValues("mset_target_meta", "success").Observe(time.Since(start).Seconds())
}
if len(extendMap) > 0 {
err = storage.MSet(context.Background(), s.redis, extendMap)
if err != nil {
cstats.RedisOperationLatency.WithLabelValues("mset_target_extend", "fail").Observe(time.Since(start).Seconds())
return err
} else {
cstats.RedisOperationLatency.WithLabelValues("mset_target_extend", "success").Observe(time.Since(start).Seconds())
}
}
return err
}

View File

@@ -8,12 +8,15 @@ import (
"strings"
"time"
"github.com/ccfos/nightingale/v6/alert/aconf"
"github.com/ccfos/nightingale/v6/center/cconf"
"github.com/ccfos/nightingale/v6/center/cstats"
"github.com/ccfos/nightingale/v6/center/metas"
"github.com/ccfos/nightingale/v6/center/sso"
"github.com/ccfos/nightingale/v6/conf"
_ "github.com/ccfos/nightingale/v6/front/statik"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/aop"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/httpx"
@@ -21,7 +24,7 @@ import (
"github.com/ccfos/nightingale/v6/prom"
"github.com/ccfos/nightingale/v6/pushgw/idents"
"github.com/ccfos/nightingale/v6/storage"
"github.com/ccfos/nightingale/v6/tdengine"
"gorm.io/gorm"
"github.com/gin-gonic/gin"
"github.com/rakyll/statik/fs"
@@ -33,11 +36,12 @@ import (
type Router struct {
HTTP httpx.Config
Center cconf.Center
Ibex conf.Ibex
Alert aconf.Alert
Operations cconf.Operation
DatasourceCache *memsto.DatasourceCacheType
NotifyConfigCache *memsto.NotifyConfigCacheType
PromClients *prom.PromClientMap
TdendgineClients *tdengine.TdengineClientMap
Redis storage.Redis
MetaSet *metas.Set
IdentSet *idents.Set
@@ -45,32 +49,40 @@ type Router struct {
Sso *sso.SsoClient
UserCache *memsto.UserCacheType
UserGroupCache *memsto.UserGroupCacheType
UserTokenCache *memsto.UserTokenCacheType
Ctx *ctx.Context
DatasourceCheckHook func(*gin.Context) bool
HeartbeatHook HeartbeatHookFunc
TargetDeleteHook models.TargetDeleteHookFunc
AlertRuleModifyHook AlertRuleModifyHookFunc
}
func New(httpConfig httpx.Config, center cconf.Center, operations cconf.Operation, ds *memsto.DatasourceCacheType, ncc *memsto.NotifyConfigCacheType,
pc *prom.PromClientMap, tdendgineClients *tdengine.TdengineClientMap, redis storage.Redis, sso *sso.SsoClient, ctx *ctx.Context, metaSet *metas.Set, idents *idents.Set, tc *memsto.TargetCacheType,
uc *memsto.UserCacheType, ugc *memsto.UserGroupCacheType) *Router {
func New(httpConfig httpx.Config, center cconf.Center, alert aconf.Alert, ibex conf.Ibex,
operations cconf.Operation, ds *memsto.DatasourceCacheType, ncc *memsto.NotifyConfigCacheType,
pc *prom.PromClientMap, redis storage.Redis,
sso *sso.SsoClient, ctx *ctx.Context, metaSet *metas.Set, idents *idents.Set,
tc *memsto.TargetCacheType, uc *memsto.UserCacheType, ugc *memsto.UserGroupCacheType, utc *memsto.UserTokenCacheType) *Router {
return &Router{
HTTP: httpConfig,
Center: center,
Operations: operations,
DatasourceCache: ds,
NotifyConfigCache: ncc,
PromClients: pc,
TdendgineClients: tdendgineClients,
Redis: redis,
MetaSet: metaSet,
IdentSet: idents,
TargetCache: tc,
Sso: sso,
UserCache: uc,
UserGroupCache: ugc,
Ctx: ctx,
DatasourceCheckHook: func(ctx *gin.Context) bool { return false },
HTTP: httpConfig,
Center: center,
Alert: alert,
Ibex: ibex,
Operations: operations,
DatasourceCache: ds,
NotifyConfigCache: ncc,
PromClients: pc,
Redis: redis,
MetaSet: metaSet,
IdentSet: idents,
TargetCache: tc,
Sso: sso,
UserCache: uc,
UserGroupCache: ugc,
UserTokenCache: utc,
Ctx: ctx,
HeartbeatHook: func(ident string) map[string]interface{} { return nil },
TargetDeleteHook: func(tx *gorm.DB, idents []string) error { return nil },
AlertRuleModifyHook: func(ar *models.AlertRule) {},
}
}
@@ -81,10 +93,9 @@ func stat() gin.HandlerFunc {
code := fmt.Sprintf("%d", c.Writer.Status())
method := c.Request.Method
labels := []string{cstats.Service, code, c.FullPath(), method}
labels := []string{code, c.FullPath(), method}
cstats.RequestCounter.WithLabelValues(labels...).Inc()
cstats.RequestDuration.WithLabelValues(labels...).Observe(float64(time.Since(start).Seconds()))
cstats.RequestDuration.WithLabelValues(labels...).Observe(time.Since(start).Seconds())
}
}
@@ -94,15 +105,17 @@ func languageDetector(i18NHeaderKey string) gin.HandlerFunc {
if headerKey != "" {
lang := c.GetHeader(headerKey)
if lang != "" {
if strings.HasPrefix(lang, "zh") {
c.Request.Header.Set("X-Language", "zh")
if strings.HasPrefix(lang, "zh_HK") {
c.Request.Header.Set("X-Language", "zh_HK")
} else if strings.HasPrefix(lang, "zh") {
c.Request.Header.Set("X-Language", "zh_CN")
} else if strings.HasPrefix(lang, "en") {
c.Request.Header.Set("X-Language", "en")
} else {
c.Request.Header.Set("X-Language", lang)
}
} else {
c.Request.Header.Set("X-Language", "en")
c.Request.Header.Set("X-Language", "zh_CN")
}
}
c.Next()
@@ -115,7 +128,7 @@ func (rt *Router) configNoRoute(r *gin.Engine, fs *http.FileSystem) {
suffix := arr[len(arr)-1]
switch suffix {
case "png", "jpeg", "jpg", "svg", "ico", "gif", "css", "js", "html", "htm", "gz", "zip", "map", "ttf":
case "png", "jpeg", "jpg", "svg", "ico", "gif", "css", "js", "html", "htm", "gz", "zip", "map", "ttf", "md":
if !rt.Center.UseFileAssets {
c.FileFromFS(c.Request.URL.Path, *fs)
} else {
@@ -164,36 +177,69 @@ func (rt *Router) Config(r *gin.Engine) {
pages := r.Group(pagesPrefix)
{
pages.DELETE("/datasource/series", rt.auth(), rt.admin(), rt.deleteDatasourceSeries)
if rt.Center.AnonymousAccess.PromQuerier {
pages.Any("/proxy/:id/*url", rt.dsProxy)
pages.POST("/query-range-batch", rt.promBatchQueryRange)
pages.POST("/query-instant-batch", rt.promBatchQueryInstant)
pages.GET("/datasource/brief", rt.datasourceBriefs)
pages.POST("/datasource/query", rt.datasourceQuery)
pages.POST("/ds-query", rt.QueryData)
pages.POST("/logs-query", rt.QueryLog)
pages.POST("/logs-query", rt.QueryLogV2)
pages.POST("/tdengine-databases", rt.tdengineDatabases)
pages.POST("/tdengine-tables", rt.tdengineTables)
pages.POST("/tdengine-columns", rt.tdengineColumns)
pages.GET("/sql-template", rt.QuerySqlTemplate)
pages.POST("/log-query-batch", rt.QueryLogBatch)
// 数据库元数据接口
pages.POST("/db-databases", rt.ShowDatabases)
pages.POST("/db-tables", rt.ShowTables)
pages.POST("/db-desc-table", rt.DescribeTable)
// es 专用接口
pages.POST("/indices", rt.auth(), rt.user(), rt.QueryIndices)
pages.POST("/es-variable", rt.auth(), rt.user(), rt.QueryESVariable)
pages.POST("/fields", rt.auth(), rt.user(), rt.QueryFields)
pages.POST("/log-query", rt.auth(), rt.user(), rt.QueryLog)
} else {
pages.Any("/proxy/:id/*url", rt.auth(), rt.dsProxy)
pages.POST("/query-range-batch", rt.auth(), rt.promBatchQueryRange)
pages.POST("/query-instant-batch", rt.auth(), rt.promBatchQueryInstant)
pages.GET("/datasource/brief", rt.auth(), rt.datasourceBriefs)
pages.GET("/datasource/brief", rt.auth(), rt.user(), rt.datasourceBriefs)
pages.POST("/datasource/query", rt.auth(), rt.user(), rt.datasourceQuery)
pages.POST("/ds-query", rt.auth(), rt.QueryData)
pages.POST("/logs-query", rt.auth(), rt.QueryLog)
pages.POST("/ds-query", rt.auth(), rt.user(), rt.QueryData)
pages.POST("/logs-query", rt.auth(), rt.user(), rt.QueryLogV2)
pages.POST("/tdengine-databases", rt.auth(), rt.tdengineDatabases)
pages.POST("/tdengine-tables", rt.auth(), rt.tdengineTables)
pages.POST("/tdengine-columns", rt.auth(), rt.tdengineColumns)
pages.POST("/log-query-batch", rt.auth(), rt.user(), rt.QueryLogBatch)
// 数据库元数据接口
pages.POST("/db-databases", rt.auth(), rt.user(), rt.ShowDatabases)
pages.POST("/db-tables", rt.auth(), rt.user(), rt.ShowTables)
pages.POST("/db-desc-table", rt.auth(), rt.user(), rt.DescribeTable)
// es 专用接口
pages.POST("/indices", rt.auth(), rt.user(), rt.QueryIndices)
pages.POST("/es-variable", rt.QueryESVariable)
pages.POST("/fields", rt.QueryFields)
pages.POST("/log-query", rt.QueryLog)
}
// OpenSearch 专用接口
pages.POST("/os-indices", rt.QueryOSIndices)
pages.POST("/os-variable", rt.QueryOSVariable)
pages.POST("/os-fields", rt.QueryOSFields)
pages.GET("/sql-template", rt.QuerySqlTemplate)
pages.POST("/auth/login", rt.jwtMock(), rt.loginPost)
pages.POST("/auth/logout", rt.jwtMock(), rt.auth(), rt.logoutPost)
pages.POST("/auth/logout", rt.jwtMock(), rt.auth(), rt.user(), rt.logoutPost)
pages.POST("/auth/refresh", rt.jwtMock(), rt.refreshPost)
pages.POST("/auth/captcha", rt.jwtMock(), rt.generateCaptcha)
pages.POST("/auth/captcha-verify", rt.jwtMock(), rt.captchaVerify)
@@ -204,9 +250,11 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/auth/redirect", rt.loginRedirect)
pages.GET("/auth/redirect/cas", rt.loginRedirectCas)
pages.GET("/auth/redirect/oauth", rt.loginRedirectOAuth)
pages.GET("/auth/redirect/dingtalk", rt.loginRedirectDingTalk)
pages.GET("/auth/callback", rt.loginCallback)
pages.GET("/auth/callback/cas", rt.loginCallbackCas)
pages.GET("/auth/callback/oauth", rt.loginCallbackOAuth)
pages.GET("/auth/callback/dingtalk", rt.loginCallbackDingTalk)
pages.GET("/auth/perms", rt.allPerms)
pages.GET("/metrics/desc", rt.metricsDescGetFile)
@@ -214,24 +262,42 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/notify-channels", rt.notifyChannelsGets)
pages.GET("/contact-keys", rt.contactKeysGets)
pages.GET("/install-date", rt.installDateGet)
pages.GET("/self/perms", rt.auth(), rt.user(), rt.permsGets)
pages.GET("/self/profile", rt.auth(), rt.user(), rt.selfProfileGet)
pages.PUT("/self/profile", rt.auth(), rt.user(), rt.selfProfilePut)
pages.PUT("/self/password", rt.auth(), rt.user(), rt.selfPasswordPut)
pages.GET("/self/token", rt.auth(), rt.user(), rt.getToken)
pages.POST("/self/token", rt.auth(), rt.user(), rt.addToken)
pages.DELETE("/self/token/:id", rt.auth(), rt.user(), rt.deleteToken)
pages.GET("/users", rt.auth(), rt.user(), rt.perm("/users"), rt.userGets)
pages.POST("/users", rt.auth(), rt.admin(), rt.userAddPost)
pages.POST("/users", rt.auth(), rt.user(), rt.perm("/users/add"), rt.userAddPost)
pages.GET("/user/:id/profile", rt.auth(), rt.userProfileGet)
pages.PUT("/user/:id/profile", rt.auth(), rt.admin(), rt.userProfilePut)
pages.PUT("/user/:id/password", rt.auth(), rt.admin(), rt.userPasswordPut)
pages.DELETE("/user/:id", rt.auth(), rt.admin(), rt.userDel)
pages.PUT("/user/:id/profile", rt.auth(), rt.user(), rt.perm("/users/put"), rt.userProfilePut)
pages.PUT("/user/:id/password", rt.auth(), rt.user(), rt.perm("/users/put"), rt.userPasswordPut)
pages.DELETE("/user/:id", rt.auth(), rt.user(), rt.perm("/users/del"), rt.userDel)
pages.GET("/metric-views", rt.auth(), rt.metricViewGets)
pages.DELETE("/metric-views", rt.auth(), rt.user(), rt.metricViewDel)
pages.POST("/metric-views", rt.auth(), rt.user(), rt.metricViewAdd)
pages.PUT("/metric-views", rt.auth(), rt.user(), rt.metricViewPut)
pages.GET("/builtin-metric-filters", rt.auth(), rt.user(), rt.metricFilterGets)
pages.DELETE("/builtin-metric-filters", rt.auth(), rt.user(), rt.metricFilterDel)
pages.POST("/builtin-metric-filters", rt.auth(), rt.user(), rt.metricFilterAdd)
pages.PUT("/builtin-metric-filters", rt.auth(), rt.user(), rt.metricFilterPut)
pages.POST("/builtin-metric-promql", rt.auth(), rt.user(), rt.getMetricPromql)
pages.POST("/builtin-metrics", rt.auth(), rt.user(), rt.perm("/builtin-metrics/add"), rt.builtinMetricsAdd)
pages.PUT("/builtin-metrics", rt.auth(), rt.user(), rt.perm("/builtin-metrics/put"), rt.builtinMetricsPut)
pages.DELETE("/builtin-metrics", rt.auth(), rt.user(), rt.perm("/builtin-metrics/del"), rt.builtinMetricsDel)
pages.GET("/builtin-metrics", rt.auth(), rt.user(), rt.builtinMetricsGets)
pages.GET("/builtin-metrics/types", rt.auth(), rt.user(), rt.builtinMetricsTypes)
pages.GET("/builtin-metrics/types/default", rt.auth(), rt.user(), rt.builtinMetricsDefaultTypes)
pages.GET("/builtin-metrics/collectors", rt.auth(), rt.user(), rt.builtinMetricsCollectors)
pages.GET("/user-groups", rt.auth(), rt.user(), rt.userGroupGets)
pages.POST("/user-groups", rt.auth(), rt.user(), rt.perm("/user-groups/add"), rt.userGroupAdd)
pages.GET("/user-group/:id", rt.auth(), rt.user(), rt.userGroupGet)
@@ -249,31 +315,39 @@ func (rt *Router) Config(r *gin.Engine) {
pages.DELETE("/busi-group/:id/members", rt.auth(), rt.user(), rt.perm("/busi-groups/put"), rt.bgrw(), rt.busiGroupMemberDel)
pages.DELETE("/busi-group/:id", rt.auth(), rt.user(), rt.perm("/busi-groups/del"), rt.bgrw(), rt.busiGroupDel)
pages.GET("/busi-group/:id/perm/:perm", rt.auth(), rt.user(), rt.checkBusiGroupPerm)
pages.GET("/busi-groups/tags", rt.auth(), rt.user(), rt.busiGroupsGetTags)
pages.GET("/targets", rt.auth(), rt.user(), rt.targetGets)
pages.POST("/target-update", rt.auth(), rt.targetUpdate)
pages.GET("/target/extra-meta", rt.auth(), rt.user(), rt.targetExtendInfoByIdent)
pages.POST("/target/list", rt.auth(), rt.user(), rt.targetGetsByHostFilter)
pages.DELETE("/targets", rt.auth(), rt.user(), rt.perm("/targets/del"), rt.targetDel)
pages.GET("/targets/tags", rt.auth(), rt.user(), rt.targetGetTags)
pages.POST("/targets/tags", rt.auth(), rt.user(), rt.perm("/targets/put"), rt.targetBindTagsByFE)
pages.DELETE("/targets/tags", rt.auth(), rt.user(), rt.perm("/targets/put"), rt.targetUnbindTagsByFE)
pages.PUT("/targets/note", rt.auth(), rt.user(), rt.perm("/targets/put"), rt.targetUpdateNote)
pages.PUT("/targets/bgid", rt.auth(), rt.user(), rt.perm("/targets/put"), rt.targetUpdateBgid)
pages.PUT("/targets/bgids", rt.auth(), rt.user(), rt.perm("/targets/put"), rt.targetBindBgids)
pages.POST("/builtin-cate-favorite", rt.auth(), rt.user(), rt.builtinCateFavoriteAdd)
pages.DELETE("/builtin-cate-favorite/:name", rt.auth(), rt.user(), rt.builtinCateFavoriteDel)
pages.GET("/builtin-boards", rt.builtinBoardGets)
pages.GET("/builtin-board/:name", rt.builtinBoardGet)
pages.GET("/dashboards/builtin/list", rt.builtinBoardGets)
pages.GET("/builtin-boards-cates", rt.auth(), rt.user(), rt.builtinBoardCateGets)
pages.POST("/builtin-boards-detail", rt.auth(), rt.user(), rt.builtinBoardDetailGets)
pages.GET("/integrations/icon/:cate/:name", rt.builtinIcon)
pages.GET("/integrations/makedown/:cate", rt.builtinMarkdown)
// pages.GET("/builtin-boards", rt.builtinBoardGets)
// pages.GET("/builtin-board/:name", rt.builtinBoardGet)
// pages.GET("/dashboards/builtin/list", rt.builtinBoardGets)
// pages.GET("/builtin-boards-cates", rt.auth(), rt.user(), rt.builtinBoardCateGets)
// pages.POST("/builtin-boards-detail", rt.auth(), rt.user(), rt.builtinBoardDetailGets)
// pages.GET("/integrations/makedown/:cate", rt.builtinMarkdown)
pages.GET("/busi-groups/public-boards", rt.auth(), rt.user(), rt.perm("/dashboards"), rt.publicBoardGets)
pages.GET("/busi-groups/boards", rt.auth(), rt.user(), rt.perm("/dashboards"), rt.boardGetsByGids)
pages.GET("/busi-group/:id/boards", rt.auth(), rt.user(), rt.perm("/dashboards"), rt.bgro(), rt.boardGets)
pages.POST("/busi-group/:id/boards", rt.auth(), rt.user(), rt.perm("/dashboards/add"), rt.bgrw(), rt.boardAdd)
pages.POST("/busi-group/:id/board/:bid/clone", rt.auth(), rt.user(), rt.perm("/dashboards/add"), rt.bgrw(), rt.boardClone)
pages.POST("/busi-groups/boards/clones", rt.auth(), rt.user(), rt.perm("/dashboards/add"), rt.boardBatchClone)
pages.GET("/boards", rt.auth(), rt.user(), rt.boardGetsByBids)
pages.GET("/board/:bid", rt.boardGet)
pages.GET("/board/:bid/pure", rt.boardPureGet)
pages.PUT("/board/:bid", rt.auth(), rt.user(), rt.perm("/dashboards/put"), rt.boardPut)
@@ -284,18 +358,34 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/share-charts", rt.chartShareGets)
pages.POST("/share-charts", rt.auth(), rt.chartShareAdd)
pages.GET("/alert-rules/builtin/alerts-cates", rt.auth(), rt.user(), rt.builtinAlertCateGets)
pages.GET("/alert-rules/builtin/list", rt.auth(), rt.user(), rt.builtinAlertRules)
pages.POST("/dashboard-annotations", rt.auth(), rt.user(), rt.perm("/dashboards/put"), rt.dashAnnotationAdd)
pages.GET("/dashboard-annotations", rt.dashAnnotationGets)
pages.PUT("/dashboard-annotation/:id", rt.auth(), rt.user(), rt.perm("/dashboards/put"), rt.dashAnnotationPut)
pages.DELETE("/dashboard-annotation/:id", rt.auth(), rt.user(), rt.perm("/dashboards/del"), rt.dashAnnotationDel)
// pages.GET("/alert-rules/builtin/alerts-cates", rt.auth(), rt.user(), rt.builtinAlertCateGets)
// pages.GET("/alert-rules/builtin/list", rt.auth(), rt.user(), rt.builtinAlertRules)
pages.GET("/alert-rules/callbacks", rt.auth(), rt.user(), rt.alertRuleCallbacks)
pages.GET("/busi-groups/alert-rules", rt.auth(), rt.user(), rt.perm("/alert-rules"), rt.alertRuleGetsByGids)
pages.GET("/busi-group/:id/alert-rules", rt.auth(), rt.user(), rt.perm("/alert-rules"), rt.alertRuleGets)
pages.POST("/busi-group/:id/alert-rules", rt.auth(), rt.user(), rt.perm("/alert-rules/add"), rt.bgrw(), rt.alertRuleAddByFE)
pages.POST("/busi-group/:id/alert-rules/import", rt.auth(), rt.user(), rt.perm("/alert-rules/add"), rt.bgrw(), rt.alertRuleAddByImport)
pages.POST("/busi-group/:id/alert-rules/import-prom-rule", rt.auth(),
rt.user(), rt.perm("/alert-rules/add"), rt.bgrw(), rt.alertRuleAddByImportPromRule)
pages.DELETE("/busi-group/:id/alert-rules", rt.auth(), rt.user(), rt.perm("/alert-rules/del"), rt.bgrw(), rt.alertRuleDel)
pages.PUT("/busi-group/:id/alert-rules/fields", rt.auth(), rt.user(), rt.perm("/alert-rules/put"), rt.bgrw(), rt.alertRulePutFields)
pages.PUT("/busi-group/:id/alert-rule/:arid", rt.auth(), rt.user(), rt.perm("/alert-rules/put"), rt.alertRulePutByFE)
pages.GET("/alert-rule/:arid", rt.auth(), rt.user(), rt.perm("/alert-rules"), rt.alertRuleGet)
pages.GET("/alert-rule/:arid/pure", rt.auth(), rt.user(), rt.perm("/alert-rules"), rt.alertRulePureGet)
pages.PUT("/busi-group/alert-rule/validate", rt.auth(), rt.user(), rt.perm("/alert-rules/put"), rt.alertRuleValidation)
pages.POST("/relabel-test", rt.auth(), rt.user(), rt.relabelTest)
pages.POST("/busi-group/:id/alert-rules/clone", rt.auth(), rt.user(), rt.perm("/alert-rules/add"), rt.bgrw(), rt.cloneToMachine)
pages.POST("/busi-groups/alert-rules/clones", rt.auth(), rt.user(), rt.perm("/alert-rules/add"), rt.batchAlertRuleClone)
pages.POST("/busi-group/alert-rules/notify-tryrun", rt.auth(), rt.user(), rt.perm("/alert-rules/add"), rt.alertRuleNotifyTryRun)
pages.POST("/busi-group/alert-rules/enable-tryrun", rt.auth(), rt.user(), rt.perm("/alert-rules/add"), rt.alertRuleEnableTryRun)
pages.GET("/busi-groups/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules"), rt.recordingRuleGetsByGids)
pages.GET("/busi-group/:id/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules"), rt.recordingRuleGets)
pages.POST("/busi-group/:id/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules/add"), rt.bgrw(), rt.recordingRuleAddByFE)
pages.DELETE("/busi-group/:id/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules/del"), rt.bgrw(), rt.recordingRuleDel)
@@ -303,32 +393,34 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/recording-rule/:rrid", rt.auth(), rt.user(), rt.perm("/recording-rules"), rt.recordingRuleGet)
pages.PUT("/busi-group/:id/recording-rules/fields", rt.auth(), rt.user(), rt.perm("/recording-rules/put"), rt.recordingRulePutFields)
pages.GET("/busi-groups/alert-mutes", rt.auth(), rt.user(), rt.perm("/alert-mutes"), rt.alertMuteGetsByGids)
pages.GET("/busi-group/:id/alert-mutes", rt.auth(), rt.user(), rt.perm("/alert-mutes"), rt.bgro(), rt.alertMuteGetsByBG)
pages.POST("/busi-group/:id/alert-mutes/preview", rt.auth(), rt.user(), rt.perm("/alert-mutes/add"), rt.bgrw(), rt.alertMutePreview)
pages.POST("/busi-group/:id/alert-mutes", rt.auth(), rt.user(), rt.perm("/alert-mutes/add"), rt.bgrw(), rt.alertMuteAdd)
pages.DELETE("/busi-group/:id/alert-mutes", rt.auth(), rt.user(), rt.perm("/alert-mutes/del"), rt.bgrw(), rt.alertMuteDel)
pages.PUT("/busi-group/:id/alert-mute/:amid", rt.auth(), rt.user(), rt.perm("/alert-mutes/put"), rt.alertMutePutByFE)
pages.GET("/busi-group/:id/alert-mute/:amid", rt.auth(), rt.user(), rt.perm("/alert-mutes"), rt.alertMuteGet)
pages.PUT("/busi-group/:id/alert-mutes/fields", rt.auth(), rt.user(), rt.perm("/alert-mutes/put"), rt.bgrw(), rt.alertMutePutFields)
pages.POST("/alert-mute-tryrun", rt.auth(), rt.user(), rt.perm("/alert-mutes/add"), rt.alertMuteTryRun)
pages.GET("/busi-groups/alert-subscribes", rt.auth(), rt.user(), rt.perm("/alert-subscribes"), rt.alertSubscribeGetsByGids)
pages.GET("/busi-group/:id/alert-subscribes", rt.auth(), rt.user(), rt.perm("/alert-subscribes"), rt.bgro(), rt.alertSubscribeGets)
pages.GET("/alert-subscribe/:sid", rt.auth(), rt.user(), rt.perm("/alert-subscribes"), rt.alertSubscribeGet)
pages.POST("/busi-group/:id/alert-subscribes", rt.auth(), rt.user(), rt.perm("/alert-subscribes/add"), rt.bgrw(), rt.alertSubscribeAdd)
pages.PUT("/busi-group/:id/alert-subscribes", rt.auth(), rt.user(), rt.perm("/alert-subscribes/put"), rt.bgrw(), rt.alertSubscribePut)
pages.DELETE("/busi-group/:id/alert-subscribes", rt.auth(), rt.user(), rt.perm("/alert-subscribes/del"), rt.bgrw(), rt.alertSubscribeDel)
pages.POST("/alert-subscribe/alert-subscribes-tryrun", rt.auth(), rt.user(), rt.perm("/alert-subscribes/add"), rt.alertSubscribeTryRun)
if rt.Center.AnonymousAccess.AlertDetail {
pages.GET("/alert-cur-event/:eid", rt.alertCurEventGet)
pages.GET("/alert-his-event/:eid", rt.alertHisEventGet)
} else {
pages.GET("/alert-cur-event/:eid", rt.auth(), rt.alertCurEventGet)
pages.GET("/alert-his-event/:eid", rt.auth(), rt.alertHisEventGet)
}
pages.GET("/alert-cur-event/:eid", rt.alertCurEventGet)
pages.GET("/alert-his-event/:eid", rt.alertHisEventGet)
pages.GET("/event-notify-records/:eid", rt.notificationRecordList)
// card logic
pages.GET("/alert-cur-events/list", rt.auth(), rt.alertCurEventsList)
pages.GET("/alert-cur-events/card", rt.auth(), rt.alertCurEventsCard)
pages.GET("/alert-cur-events/list", rt.auth(), rt.user(), rt.alertCurEventsList)
pages.GET("/alert-cur-events/card", rt.auth(), rt.user(), rt.alertCurEventsCard)
pages.POST("/alert-cur-events/card/details", rt.auth(), rt.alertCurEventsCardDetails)
pages.GET("/alert-his-events/list", rt.auth(), rt.alertHisEventsList)
pages.GET("/alert-his-events/list", rt.auth(), rt.user(), rt.alertHisEventsList)
pages.DELETE("/alert-his-events", rt.auth(), rt.admin(), rt.alertHisEventsDelete)
pages.DELETE("/alert-cur-events", rt.auth(), rt.user(), rt.perm("/alert-cur-events/del"), rt.alertCurEventDel)
pages.GET("/alert-cur-events/stats", rt.auth(), rt.alertCurEventsStatistics)
@@ -337,6 +429,7 @@ func (rt *Router) Config(r *gin.Engine) {
pages.POST("/alert-aggr-views", rt.auth(), rt.user(), rt.alertAggrViewAdd)
pages.PUT("/alert-aggr-views", rt.auth(), rt.user(), rt.alertAggrViewPut)
pages.GET("/busi-groups/task-tpls", rt.auth(), rt.user(), rt.perm("/job-tpls"), rt.taskTplGetsByGids)
pages.GET("/busi-group/:id/task-tpls", rt.auth(), rt.user(), rt.perm("/job-tpls"), rt.bgro(), rt.taskTplGets)
pages.POST("/busi-group/:id/task-tpls", rt.auth(), rt.user(), rt.perm("/job-tpls/add"), rt.bgrw(), rt.taskTplAdd)
pages.DELETE("/busi-group/:id/task-tpl/:tid", rt.auth(), rt.user(), rt.perm("/job-tpls/del"), rt.bgrw(), rt.taskTplDel)
@@ -345,70 +438,158 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/busi-group/:id/task-tpl/:tid", rt.auth(), rt.user(), rt.perm("/job-tpls"), rt.bgro(), rt.taskTplGet)
pages.PUT("/busi-group/:id/task-tpl/:tid", rt.auth(), rt.user(), rt.perm("/job-tpls/put"), rt.bgrw(), rt.taskTplPut)
pages.GET("/busi-groups/tasks", rt.auth(), rt.user(), rt.perm("/job-tasks"), rt.taskGetsByGids)
pages.GET("/busi-group/:id/tasks", rt.auth(), rt.user(), rt.perm("/job-tasks"), rt.bgro(), rt.taskGets)
pages.POST("/busi-group/:id/tasks", rt.auth(), rt.user(), rt.perm("/job-tasks/add"), rt.bgrw(), rt.taskAdd)
pages.GET("/busi-group/:id/task/*url", rt.auth(), rt.user(), rt.perm("/job-tasks"), rt.taskProxy)
pages.PUT("/busi-group/:id/task/*url", rt.auth(), rt.user(), rt.perm("/job-tasks/put"), rt.bgrw(), rt.taskProxy)
pages.GET("/servers", rt.auth(), rt.admin(), rt.serversGet)
pages.GET("/server-clusters", rt.auth(), rt.admin(), rt.serverClustersGet)
pages.GET("/servers", rt.auth(), rt.user(), rt.serversGet)
pages.GET("/server-clusters", rt.auth(), rt.user(), rt.serverClustersGet)
pages.POST("/datasource/list", rt.auth(), rt.datasourceList)
pages.POST("/datasource/list", rt.auth(), rt.user(), rt.datasourceList)
pages.POST("/datasource/plugin/list", rt.auth(), rt.pluginList)
pages.POST("/datasource/upsert", rt.auth(), rt.admin(), rt.datasourceUpsert)
pages.POST("/datasource/desc", rt.auth(), rt.admin(), rt.datasourceGet)
pages.POST("/datasource/status/update", rt.auth(), rt.admin(), rt.datasourceUpdataStatus)
pages.DELETE("/datasource/", rt.auth(), rt.admin(), rt.datasourceDel)
pages.GET("/roles", rt.auth(), rt.admin(), rt.roleGets)
pages.POST("/roles", rt.auth(), rt.admin(), rt.roleAdd)
pages.PUT("/roles", rt.auth(), rt.admin(), rt.rolePut)
pages.DELETE("/role/:id", rt.auth(), rt.admin(), rt.roleDel)
pages.GET("/roles", rt.auth(), rt.user(), rt.roleGets)
pages.POST("/roles", rt.auth(), rt.user(), rt.perm("/roles/add"), rt.roleAdd)
pages.PUT("/roles", rt.auth(), rt.user(), rt.perm("/roles/put"), rt.rolePut)
pages.DELETE("/role/:id", rt.auth(), rt.user(), rt.perm("/roles/del"), rt.roleDel)
pages.GET("/role/:id/ops", rt.auth(), rt.admin(), rt.operationOfRole)
pages.PUT("/role/:id/ops", rt.auth(), rt.admin(), rt.roleBindOperation)
pages.GET("/role/:id/ops", rt.auth(), rt.user(), rt.perm("/roles"), rt.operationOfRole)
pages.PUT("/role/:id/ops", rt.auth(), rt.user(), rt.perm("/roles/put"), rt.roleBindOperation)
pages.GET("/operation", rt.operations)
pages.GET("/notify-tpls", rt.auth(), rt.admin(), rt.notifyTplGets)
pages.PUT("/notify-tpl/content", rt.auth(), rt.admin(), rt.notifyTplUpdateContent)
pages.PUT("/notify-tpl", rt.auth(), rt.admin(), rt.notifyTplUpdate)
pages.POST("/notify-tpl", rt.auth(), rt.admin(), rt.notifyTplAdd)
pages.DELETE("/notify-tpl/:id", rt.auth(), rt.admin(), rt.notifyTplDel)
pages.POST("/notify-tpl/preview", rt.auth(), rt.admin(), rt.notifyTplPreview)
pages.GET("/notify-tpls", rt.auth(), rt.user(), rt.notifyTplGets)
pages.PUT("/notify-tpl/content", rt.auth(), rt.user(), rt.notifyTplUpdateContent)
pages.PUT("/notify-tpl", rt.auth(), rt.user(), rt.notifyTplUpdate)
pages.POST("/notify-tpl", rt.auth(), rt.user(), rt.notifyTplAdd)
pages.DELETE("/notify-tpl/:id", rt.auth(), rt.user(), rt.notifyTplDel)
pages.POST("/notify-tpl/preview", rt.auth(), rt.user(), rt.notifyTplPreview)
pages.GET("/sso-configs", rt.auth(), rt.admin(), rt.ssoConfigGets)
pages.PUT("/sso-config", rt.auth(), rt.admin(), rt.ssoConfigUpdate)
pages.GET("/webhooks", rt.auth(), rt.admin(), rt.webhookGets)
pages.GET("/webhooks", rt.auth(), rt.user(), rt.webhookGets)
pages.PUT("/webhooks", rt.auth(), rt.admin(), rt.webhookPuts)
pages.GET("/notify-script", rt.auth(), rt.admin(), rt.notifyScriptGet)
pages.GET("/notify-script", rt.auth(), rt.user(), rt.perm("/help/notification-settings"), rt.notifyScriptGet)
pages.PUT("/notify-script", rt.auth(), rt.admin(), rt.notifyScriptPut)
pages.GET("/notify-channel", rt.auth(), rt.admin(), rt.notifyChannelGets)
pages.GET("/notify-channel", rt.auth(), rt.user(), rt.perm("/help/notification-settings"), rt.notifyChannelGets)
pages.PUT("/notify-channel", rt.auth(), rt.admin(), rt.notifyChannelPuts)
pages.GET("/notify-contact", rt.auth(), rt.admin(), rt.notifyContactGets)
pages.GET("/notify-contact", rt.auth(), rt.user(), rt.notifyContactGets)
pages.PUT("/notify-contact", rt.auth(), rt.admin(), rt.notifyContactPuts)
pages.GET("/notify-config", rt.auth(), rt.admin(), rt.notifyConfigGet)
pages.GET("/notify-config", rt.auth(), rt.user(), rt.perm("/help/notification-settings"), rt.notifyConfigGet)
pages.PUT("/notify-config", rt.auth(), rt.admin(), rt.notifyConfigPut)
pages.PUT("/smtp-config-test", rt.auth(), rt.admin(), rt.attemptSendEmail)
pages.GET("/es-index-pattern", rt.auth(), rt.esIndexPatternGet)
pages.GET("/es-index-pattern-list", rt.auth(), rt.esIndexPatternGetList)
pages.POST("/es-index-pattern", rt.auth(), rt.admin(), rt.esIndexPatternAdd)
pages.PUT("/es-index-pattern", rt.auth(), rt.admin(), rt.esIndexPatternPut)
pages.DELETE("/es-index-pattern", rt.auth(), rt.admin(), rt.esIndexPatternDel)
pages.POST("/es-index-pattern", rt.auth(), rt.user(), rt.perm("/log/index-patterns/add"), rt.esIndexPatternAdd)
pages.PUT("/es-index-pattern", rt.auth(), rt.user(), rt.perm("/log/index-patterns/put"), rt.esIndexPatternPut)
pages.DELETE("/es-index-pattern", rt.auth(), rt.user(), rt.perm("/log/index-patterns/del"), rt.esIndexPatternDel)
pages.GET("/user-variable-configs", rt.auth(), rt.admin(), rt.userVariableConfigGets)
pages.POST("/user-variable-config", rt.auth(), rt.admin(), rt.userVariableConfigAdd)
pages.PUT("/user-variable-config/:id", rt.auth(), rt.admin(), rt.userVariableConfigPut)
pages.DELETE("/user-variable-config/:id", rt.auth(), rt.admin(), rt.userVariableConfigDel)
pages.GET("/embedded-dashboards", rt.auth(), rt.user(), rt.perm("/embedded-dashboards"), rt.embeddedDashboardsGet)
pages.PUT("/embedded-dashboards", rt.auth(), rt.user(), rt.perm("/embedded-dashboards/put"), rt.embeddedDashboardsPut)
// 获取 embedded-product 列表
pages.GET("/embedded-product", rt.auth(), rt.user(), rt.embeddedProductGets)
pages.GET("/embedded-product/:id", rt.auth(), rt.user(), rt.embeddedProductGet)
pages.POST("/embedded-product", rt.auth(), rt.user(), rt.perm("/embedded-product/add"), rt.embeddedProductAdd)
pages.PUT("/embedded-product/:id", rt.auth(), rt.user(), rt.perm("/embedded-product/put"), rt.embeddedProductPut)
pages.DELETE("/embedded-product/:id", rt.auth(), rt.user(), rt.perm("/embedded-product/delete"), rt.embeddedProductDelete)
pages.GET("/user-variable-configs", rt.auth(), rt.user(), rt.perm("/help/variable-configs"), rt.userVariableConfigGets)
pages.POST("/user-variable-config", rt.auth(), rt.user(), rt.perm("/help/variable-configs"), rt.userVariableConfigAdd)
pages.PUT("/user-variable-config/:id", rt.auth(), rt.user(), rt.perm("/help/variable-configs"), rt.userVariableConfigPut)
pages.DELETE("/user-variable-config/:id", rt.auth(), rt.user(), rt.perm("/help/variable-configs"), rt.userVariableConfigDel)
pages.GET("/config", rt.auth(), rt.admin(), rt.configGetByKey)
pages.PUT("/config", rt.auth(), rt.admin(), rt.configPutByKey)
pages.GET("/site-info", rt.siteInfo)
// source token 相关路由
pages.POST("/source-token", rt.auth(), rt.user(), rt.sourceTokenAdd)
// for admin api
pages.GET("/user/busi-groups", rt.auth(), rt.admin(), rt.userBusiGroupsGets)
pages.GET("/builtin-components", rt.auth(), rt.user(), rt.builtinComponentsGets)
pages.POST("/builtin-components", rt.auth(), rt.user(), rt.perm("/components/add"), rt.builtinComponentsAdd)
pages.PUT("/builtin-components", rt.auth(), rt.user(), rt.perm("/components/put"), rt.builtinComponentsPut)
pages.DELETE("/builtin-components", rt.auth(), rt.user(), rt.perm("/components/del"), rt.builtinComponentsDel)
pages.GET("/builtin-payloads", rt.auth(), rt.user(), rt.builtinPayloadsGets)
pages.GET("/builtin-payloads/cates", rt.auth(), rt.user(), rt.builtinPayloadcatesGet)
pages.POST("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/components/add"), rt.builtinPayloadsAdd)
pages.PUT("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/components/put"), rt.builtinPayloadsPut)
pages.DELETE("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/components/del"), rt.builtinPayloadsDel)
pages.GET("/builtin-payload", rt.auth(), rt.user(), rt.builtinPayloadsGetByUUID)
pages.POST("/message-templates", rt.auth(), rt.user(), rt.perm("/notification-templates/add"), rt.messageTemplatesAdd)
pages.DELETE("/message-templates", rt.auth(), rt.user(), rt.perm("/notification-templates/del"), rt.messageTemplatesDel)
pages.PUT("/message-template/:id", rt.auth(), rt.user(), rt.perm("/notification-templates/put"), rt.messageTemplatePut)
pages.GET("/message-template/:id", rt.auth(), rt.user(), rt.perm("/notification-templates"), rt.messageTemplateGet)
pages.GET("/message-templates", rt.auth(), rt.user(), rt.messageTemplatesGet)
pages.POST("/events-message", rt.auth(), rt.user(), rt.eventsMessage)
pages.POST("/notify-rules", rt.auth(), rt.user(), rt.perm("/notification-rules/add"), rt.notifyRulesAdd)
pages.DELETE("/notify-rules", rt.auth(), rt.user(), rt.perm("/notification-rules/del"), rt.notifyRulesDel)
pages.PUT("/notify-rule/:id", rt.auth(), rt.user(), rt.perm("/notification-rules/put"), rt.notifyRulePut)
pages.GET("/notify-rule/:id", rt.auth(), rt.user(), rt.perm("/notification-rules"), rt.notifyRuleGet)
pages.GET("/notify-rules", rt.auth(), rt.user(), rt.perm("/notification-rules"), rt.notifyRulesGet)
pages.POST("/notify-rule/test", rt.auth(), rt.user(), rt.perm("/notification-rules"), rt.notifyTest)
pages.GET("/notify-rule/custom-params", rt.auth(), rt.user(), rt.perm("/notification-rules"), rt.notifyRuleCustomParamsGet)
pages.POST("/notify-rule/event-pipelines-tryrun", rt.auth(), rt.user(), rt.perm("/notification-rules/add"), rt.tryRunEventProcessorByNotifyRule)
pages.GET("/event-tagkeys", rt.auth(), rt.user(), rt.eventTagKeys)
pages.GET("/event-tagvalues", rt.auth(), rt.user(), rt.eventTagValues)
// 事件Pipeline相关路由
pages.GET("/event-pipelines", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.eventPipelinesList)
pages.POST("/event-pipeline", rt.auth(), rt.user(), rt.perm("/event-pipelines/add"), rt.addEventPipeline)
pages.PUT("/event-pipeline", rt.auth(), rt.user(), rt.perm("/event-pipelines/put"), rt.updateEventPipeline)
pages.GET("/event-pipeline/:id", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.getEventPipeline)
pages.DELETE("/event-pipelines", rt.auth(), rt.user(), rt.perm("/event-pipelines/del"), rt.deleteEventPipelines)
pages.POST("/event-pipeline-tryrun", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.tryRunEventPipeline)
pages.POST("/event-processor-tryrun", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.tryRunEventProcessor)
// API 触发工作流
pages.POST("/event-pipeline/:id/trigger", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.triggerEventPipelineByAPI)
// SSE 流式执行工作流
pages.POST("/event-pipeline/:id/stream", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.streamEventPipeline)
// 事件Pipeline执行记录路由
pages.GET("/event-pipeline-executions", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.listAllEventPipelineExecutions)
pages.GET("/event-pipeline/:id/executions", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.listEventPipelineExecutions)
pages.GET("/event-pipeline/:id/execution/:exec_id", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.getEventPipelineExecution)
pages.GET("/event-pipeline-execution/:exec_id", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.getEventPipelineExecution)
pages.GET("/event-pipeline/:id/execution-stats", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.getEventPipelineExecutionStats)
pages.POST("/event-pipeline-executions/clean", rt.auth(), rt.user(), rt.admin(), rt.cleanEventPipelineExecutions)
pages.POST("/notify-channel-configs", rt.auth(), rt.user(), rt.perm("/notification-channels/add"), rt.notifyChannelsAdd)
pages.DELETE("/notify-channel-configs", rt.auth(), rt.user(), rt.perm("/notification-channels/del"), rt.notifyChannelsDel)
pages.PUT("/notify-channel-config/:id", rt.auth(), rt.user(), rt.perm("/notification-channels/put"), rt.notifyChannelPut)
pages.GET("/notify-channel-config/:id", rt.auth(), rt.user(), rt.perm("/notification-channels"), rt.notifyChannelGet)
pages.GET("/notify-channel-configs", rt.auth(), rt.user(), rt.perm("/notification-channels"), rt.notifyChannelsGet)
pages.GET("/simplified-notify-channel-configs", rt.notifyChannelsGetForNormalUser)
pages.GET("/flashduty-channel-list/:id", rt.auth(), rt.user(), rt.flashDutyNotifyChannelsGet)
pages.GET("/pagerduty-integration-key/:id/:service_id/:integration_id", rt.auth(), rt.user(), rt.pagerDutyIntegrationKeyGet)
pages.GET("/pagerduty-service-list/:id", rt.auth(), rt.user(), rt.pagerDutyNotifyServicesGet)
pages.GET("/notify-channel-config", rt.auth(), rt.user(), rt.notifyChannelGetBy)
pages.GET("/notify-channel-config/idents", rt.notifyChannelIdentsGet)
// saved view 查询条件保存相关路由
pages.GET("/saved-views", rt.auth(), rt.user(), rt.savedViewGets)
pages.POST("/saved-views", rt.auth(), rt.user(), rt.savedViewAdd)
pages.PUT("/saved-view/:id", rt.auth(), rt.user(), rt.savedViewPut)
pages.DELETE("/saved-view/:id", rt.auth(), rt.user(), rt.savedViewDel)
pages.POST("/saved-view/:id/favorite", rt.auth(), rt.user(), rt.savedViewFavoriteAdd)
pages.DELETE("/saved-view/:id/favorite", rt.auth(), rt.user(), rt.savedViewFavoriteDel)
}
r.GET("/api/n9e/versions", func(c *gin.Context) {
@@ -418,7 +599,12 @@ func (rt *Router) Config(r *gin.Engine) {
v = version.Version[:lastIndex]
}
ginx.NewRender(c).Data(gin.H{"version": v, "github_verison": version.GithubVersion.Load().(string)}, nil)
gv := version.GithubVersion.Load()
if gv != nil {
ginx.NewRender(c).Data(gin.H{"version": v, "github_verison": gv.(string)}, nil)
} else {
ginx.NewRender(c).Data(gin.H{"version": v, "github_verison": ""}, nil)
}
})
if rt.HTTP.APIForService.Enable {
@@ -429,18 +615,27 @@ func (rt *Router) Config(r *gin.Engine) {
{
service.Any("/prometheus/*url", rt.dsProxy)
service.POST("/users", rt.userAddPost)
service.PUT("/user/:id", rt.userProfilePutByService)
service.DELETE("/user/:id", rt.userDel)
service.GET("/users", rt.userFindAll)
service.GET("/user-groups", rt.userGroupGetsByService)
service.GET("/user-group-members", rt.userGroupMemberGetsByService)
service.GET("/targets", rt.targetGetsByService)
service.GET("/target/extra-meta", rt.targetExtendInfoByIdent)
service.POST("/target/list", rt.targetGetsByHostFilter)
service.DELETE("/targets", rt.targetDelByService)
service.GET("/targets/tags", rt.targetGetTags)
service.POST("/targets/tags", rt.targetBindTagsByService)
service.DELETE("/targets/tags", rt.targetUnbindTagsByService)
service.PUT("/targets/note", rt.targetUpdateNoteByService)
service.PUT("/targets/bgid", rt.targetUpdateBgidByService)
service.POST("/targets-of-host-query", rt.targetsOfHostQuery)
service.POST("/alert-rules", rt.alertRuleAddByService)
service.POST("/alert-rule-add", rt.alertRuleAddOneByService)
service.DELETE("/alert-rules", rt.alertRuleDelByService)
service.PUT("/alert-rule/:arid", rt.alertRulePutByService)
service.GET("/alert-rule/:arid", rt.alertRuleGet)
@@ -451,6 +646,7 @@ func (rt *Router) Config(r *gin.Engine) {
service.GET("/busi-groups", rt.busiGroupGetsByService)
service.GET("/datasources", rt.datasourceGetsByService)
service.GET("/datasource-rsa-config", rt.datasourceRsaConfigGet)
service.GET("/datasource-ids", rt.getDatasourceIds)
service.POST("/server-heartbeat", rt.serverHeartbeat)
service.GET("/servers-active", rt.serversActive)
@@ -458,6 +654,7 @@ func (rt *Router) Config(r *gin.Engine) {
service.GET("/recording-rules", rt.recordingRuleGetsByService)
service.GET("/alert-mutes", rt.alertMuteGets)
service.GET("/active-alert-mutes", rt.activeAlertMuteGets)
service.POST("/alert-mutes", rt.alertMuteAddByService)
service.DELETE("/alert-mutes", rt.alertMuteDel)
@@ -467,10 +664,13 @@ func (rt *Router) Config(r *gin.Engine) {
service.GET("/alert-his-event/:eid", rt.alertHisEventGet)
service.GET("/task-tpl/:tid", rt.taskTplGetByService)
service.GET("/task-tpls", rt.taskTplGetsByService)
service.GET("/task-tpl/statistics", rt.taskTplStatistics)
service.GET("/config/:id", rt.configGet)
service.GET("/configs", rt.configsGet)
service.GET("/config", rt.configGetByKey)
service.GET("/all-configs", rt.configGetAll)
service.PUT("/configs", rt.configsPut)
service.POST("/configs", rt.configsPost)
service.DELETE("/configs", rt.configsDel)
@@ -483,6 +683,36 @@ func (rt *Router) Config(r *gin.Engine) {
service.GET("/notify-tpls", rt.notifyTplGets)
service.POST("/task-record-add", rt.taskRecordAdd)
service.GET("/user-variable/decrypt", rt.userVariableGetDecryptByService)
service.GET("/targets-of-alert-rule", rt.targetsOfAlertRule)
service.POST("/notify-record", rt.notificationRecordAdd)
service.GET("/alert-cur-events-del-by-hash", rt.alertCurEventDelByHash)
service.POST("/center/heartbeat", rt.heartbeat)
service.GET("/es-index-pattern-list", rt.esIndexPatternGetList)
service.GET("/notify-rules", rt.notifyRulesGetByService)
service.GET("/notify-channels", rt.notifyChannelConfigGets)
service.GET("/message-templates", rt.messageTemplateGets)
service.GET("/event-pipelines", rt.eventPipelinesListByService)
service.POST("/event-pipeline/:id/trigger", rt.triggerEventPipelineByService)
service.POST("/event-pipeline/:id/stream", rt.streamEventPipelineByService)
// 手机号加密存储配置接口
service.POST("/users/phone/encrypt", rt.usersPhoneEncrypt)
service.POST("/users/phone/decrypt", rt.usersPhoneDecrypt)
service.POST("/users/phone/refresh-encryption-config", rt.usersPhoneDecryptRefresh)
service.GET("/builtin-components", rt.builtinComponentsGets)
service.GET("/builtin-payloads", rt.builtinPayloadsGets)
}
}

View File

@@ -1,51 +1,55 @@
package router
import (
"fmt"
"net/http"
"sort"
"strings"
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
func parseAggrRules(c *gin.Context) []*models.AggrRule {
aggrRules := strings.Split(ginx.QueryStr(c, "rule", ""), "::") // e.g. field:group_name::field:severity::tagkey:ident
if len(aggrRules) == 0 {
ginx.Bomb(http.StatusBadRequest, "rule empty")
func getUserGroupIds(ctx *gin.Context, rt *Router, myGroups bool) ([]int64, error) {
if !myGroups {
return nil, nil
}
rules := make([]*models.AggrRule, len(aggrRules))
for i := 0; i < len(aggrRules); i++ {
pair := strings.Split(aggrRules[i], ":")
if len(pair) != 2 {
ginx.Bomb(http.StatusBadRequest, "rule invalid")
}
if !(pair[0] == "field" || pair[0] == "tagkey") {
ginx.Bomb(http.StatusBadRequest, "rule invalid")
}
rules[i] = &models.AggrRule{
Type: pair[0],
Value: pair[1],
}
}
return rules
me := ctx.MustGet("user").(*models.User)
return models.MyGroupIds(rt.Ctx, me.Id)
}
func (rt *Router) alertCurEventsCard(c *gin.Context) {
stime, etime := getTimeRange(c)
severity := ginx.QueryInt(c, "severity", -1)
severity := strx.IdsInt64ForAPI(ginx.QueryStr(c, "severity", ""), ",")
query := ginx.QueryStr(c, "query", "")
busiGroupId := ginx.QueryInt64(c, "bgid", 0)
myGroups := ginx.QueryBool(c, "my_groups", false) // 是否只看自己组默认false
var gids []int64
var err error
if myGroups {
gids, err = getUserGroupIds(c, rt, myGroups)
ginx.Dangerous(err)
if len(gids) == 0 {
gids = append(gids, -1)
}
}
viewId := ginx.QueryInt64(c, "view_id")
alertView, err := models.GetAlertAggrViewByViewID(rt.Ctx, viewId)
ginx.Dangerous(err)
if alertView == nil {
ginx.Bomb(http.StatusNotFound, "alert aggr view not found")
}
dsIds := queryDatasourceIds(c)
rules := parseAggrRules(c)
prod := ginx.QueryStr(c, "prods", "")
if prod == "" {
@@ -62,13 +66,18 @@ func (rt *Router) alertCurEventsCard(c *gin.Context) {
cates = strings.Split(cate, ",")
}
bgids, err := GetBusinessGroupIds(c, rt.Ctx, rt.Center.EventHistoryGroupView, myGroups)
ginx.Dangerous(err)
// 最多获取50000个获取太多也没啥意义
list, err := models.AlertCurEventGets(rt.Ctx, prods, busiGroupId, stime, etime, severity, dsIds, cates, query, 50000, 0)
list, err := models.AlertCurEventsGet(rt.Ctx, prods, bgids, stime, etime, severity, dsIds,
cates, 0, query, 50000, 0, []int64{})
ginx.Dangerous(err)
cardmap := make(map[string]*AlertCard)
for _, event := range list {
title := event.GenCardTitle(rules)
title, err := event.GenCardTitle(alertView.Rule)
ginx.Dangerous(err)
if _, has := cardmap[title]; has {
cardmap[title].Total++
cardmap[title].EventIds = append(cardmap[title].EventIds, event.Id)
@@ -83,6 +92,10 @@ func (rt *Router) alertCurEventsCard(c *gin.Context) {
Severity: event.Severity,
}
}
if cardmap[title].Severity < 1 {
cardmap[title].Severity = 3
}
}
titles := make([]string, 0, len(cardmap))
@@ -139,12 +152,15 @@ func (rt *Router) alertCurEventsGetByRid(c *gin.Context) {
// 列表方式,拉取活跃告警
func (rt *Router) alertCurEventsList(c *gin.Context) {
stime, etime := getTimeRange(c)
severity := ginx.QueryInt(c, "severity", -1)
severity := strx.IdsInt64ForAPI(ginx.QueryStr(c, "severity", ""), ",")
query := ginx.QueryStr(c, "query", "")
limit := ginx.QueryInt(c, "limit", 20)
busiGroupId := ginx.QueryInt64(c, "bgid", 0)
myGroups := ginx.QueryBool(c, "my_groups", false) // 是否只看自己组默认false
dsIds := queryDatasourceIds(c)
eventIds := strx.IdsInt64ForAPI(ginx.QueryStr(c, "event_ids", ""), ",")
prod := ginx.QueryStr(c, "prods", "")
if prod == "" {
prod = ginx.QueryStr(c, "rule_prods", "")
@@ -161,13 +177,21 @@ func (rt *Router) alertCurEventsList(c *gin.Context) {
cates = strings.Split(cate, ",")
}
total, err := models.AlertCurEventTotal(rt.Ctx, prods, busiGroupId, stime, etime, severity, dsIds, cates, query)
ruleId := ginx.QueryInt64(c, "rid", 0)
bgids, err := GetBusinessGroupIds(c, rt.Ctx, rt.Center.EventHistoryGroupView, myGroups)
ginx.Dangerous(err)
list, err := models.AlertCurEventGets(rt.Ctx, prods, busiGroupId, stime, etime, severity, dsIds, cates, query, limit, ginx.Offset(c, limit))
total, err := models.AlertCurEventTotal(rt.Ctx, prods, bgids, stime, etime, severity, dsIds,
cates, ruleId, query, eventIds)
ginx.Dangerous(err)
list, err := models.AlertCurEventsGet(rt.Ctx, prods, bgids, stime, etime, severity, dsIds,
cates, ruleId, query, limit, ginx.Offset(c, limit), eventIds)
ginx.Dangerous(err)
cache := make(map[int64]*models.UserGroup)
for i := 0; i < len(list); i++ {
list[i].FillNotifyGroups(rt.Ctx, cache)
}
@@ -197,7 +221,9 @@ func (rt *Router) checkCurEventBusiGroupRWPermission(c *gin.Context, ids []int64
for i := 0; i < len(ids); i++ {
event, err := models.AlertCurEventGetById(rt.Ctx, ids[i])
ginx.Dangerous(err)
if event == nil {
continue
}
if _, has := set[event.GroupId]; !has {
rt.bgrwCheck(c, event.GroupId)
set[event.GroupId] = struct{}{}
@@ -207,17 +233,196 @@ func (rt *Router) checkCurEventBusiGroupRWPermission(c *gin.Context, ids []int64
func (rt *Router) alertCurEventGet(c *gin.Context) {
eid := ginx.UrlParamInt64(c, "eid")
event, err := models.AlertCurEventGetById(rt.Ctx, eid)
ginx.Dangerous(err)
event, err := GetCurEventDetail(rt.Ctx, eid)
if event == nil {
ginx.Bomb(404, "No such active event")
hasPermission := HasPermission(rt.Ctx, c, "event", fmt.Sprintf("%d", eid), rt.Center.AnonymousAccess.AlertDetail)
if !hasPermission {
rt.auth()(c)
rt.user()(c)
rt.bgroCheck(c, event.GroupId)
}
ginx.NewRender(c).Data(event, nil)
ginx.NewRender(c).Data(event, err)
}
func GetCurEventDetail(ctx *ctx.Context, eid int64) (*models.AlertCurEvent, error) {
event, err := models.AlertCurEventGetById(ctx, eid)
if err != nil {
return nil, err
}
if event == nil {
return nil, fmt.Errorf("no such active event")
}
ruleConfig, needReset := models.FillRuleConfigTplName(ctx, event.RuleConfig)
if needReset {
event.RuleConfigJson = ruleConfig
}
event.LastEvalTime = event.TriggerTime
event.NotifyVersion, err = GetEventNotifyVersion(ctx, event.RuleId, event.NotifyRuleIds)
ginx.Dangerous(err)
event.NotifyRules, err = GetEventNotifyRuleNames(ctx, event.NotifyRuleIds)
return event, err
}
func GetEventNotifyRuleNames(ctx *ctx.Context, notifyRuleIds []int64) ([]*models.EventNotifyRule, error) {
notifyRuleNames := make([]*models.EventNotifyRule, 0)
notifyRules, err := models.NotifyRulesGet(ctx, "id in ?", notifyRuleIds)
if err != nil {
return nil, err
}
for _, notifyRule := range notifyRules {
notifyRuleNames = append(notifyRuleNames, &models.EventNotifyRule{
Id: notifyRule.ID,
Name: notifyRule.Name,
})
}
return notifyRuleNames, nil
}
func GetEventNotifyVersion(ctx *ctx.Context, ruleId int64, notifyRuleIds []int64) (int, error) {
if len(notifyRuleIds) != 0 {
// 如果存在 notify_rule_ids则认为使用新的告警通知方式
return 1, nil
}
rule, err := models.AlertRuleGetById(ctx, ruleId)
if err != nil {
return 0, err
}
return rule.NotifyVersion, nil
}
func (rt *Router) alertCurEventsStatistics(c *gin.Context) {
ginx.NewRender(c).Data(models.AlertCurEventStatistics(rt.Ctx, time.Now()), nil)
}
func (rt *Router) alertCurEventDelByHash(c *gin.Context) {
hash := ginx.QueryStr(c, "hash")
ginx.NewRender(c).Message(models.AlertCurEventDelByHash(rt.Ctx, hash))
}
func (rt *Router) eventTagKeys(c *gin.Context) {
// 获取最近1天的活跃告警事件
now := time.Now().Unix()
stime := now - 24*3600
etime := now
// 获取用户可见的业务组ID列表
bgids, err := GetBusinessGroupIds(c, rt.Ctx, rt.Center.EventHistoryGroupView, false)
if err != nil {
logger.Warningf("failed to get business group ids: %v", err)
ginx.NewRender(c).Data([]string{"ident", "app", "service", "instance"}, nil)
return
}
// 查询活跃告警事件,限制数量以提高性能
events, err := models.AlertCurEventsGet(rt.Ctx, []string{}, bgids, stime, etime, []int64{}, []int64{}, []string{}, 0, "", 200, 0, []int64{})
if err != nil {
logger.Warningf("failed to get current alert events: %v", err)
ginx.NewRender(c).Data([]string{"ident", "app", "service", "instance"}, nil)
return
}
// 如果没有查到事件,返回默认标签
if len(events) == 0 {
ginx.NewRender(c).Data([]string{"ident", "app", "service", "instance"}, nil)
return
}
// 收集所有标签键并去重
tagKeys := make(map[string]struct{})
for _, event := range events {
for key := range event.TagsMap {
tagKeys[key] = struct{}{}
}
}
// 转换为字符串切片
var result []string
for key := range tagKeys {
result = append(result, key)
}
// 如果没有收集到任何标签键,返回默认值
if len(result) == 0 {
result = []string{"ident", "app", "service", "instance"}
}
ginx.NewRender(c).Data(result, nil)
}
func (rt *Router) eventTagValues(c *gin.Context) {
// 获取标签key
tagKey := ginx.QueryStr(c, "key")
// 获取最近1天的活跃告警事件
now := time.Now().Unix()
stime := now - 24*3600
etime := now
// 获取用户可见的业务组ID列表
bgids, err := GetBusinessGroupIds(c, rt.Ctx, rt.Center.EventHistoryGroupView, false)
if err != nil {
logger.Warningf("failed to get business group ids: %v", err)
ginx.NewRender(c).Data([]string{}, nil)
return
}
// 查询活跃告警事件,获取更多数据以保证统计准确性
events, err := models.AlertCurEventsGet(rt.Ctx, []string{}, bgids, stime, etime, []int64{}, []int64{}, []string{}, 0, "", 1000, 0, []int64{})
if err != nil {
logger.Warningf("failed to get current alert events: %v", err)
ginx.NewRender(c).Data([]string{}, nil)
return
}
// 如果没有查到事件,返回空数组
if len(events) == 0 {
ginx.NewRender(c).Data([]string{}, nil)
return
}
// 统计标签值出现次数
valueCount := make(map[string]int)
for _, event := range events {
// TagsMap已经在AlertCurEventsGet中处理直接使用
if value, exists := event.TagsMap[tagKey]; exists && value != "" {
valueCount[value]++
}
}
// 转换为切片并按出现次数降序排序
type tagValue struct {
value string
count int
}
tagValues := make([]tagValue, 0, len(valueCount))
for value, count := range valueCount {
tagValues = append(tagValues, tagValue{value, count})
}
// 按出现次数降序排序
sort.Slice(tagValues, func(i, j int) bool {
return tagValues[i].count > tagValues[j].count
})
// 只取Top20并转换为字符串数组
limit := 20
if len(tagValues) < limit {
limit = len(tagValues)
}
result := make([]string, 0, limit)
for i := 0; i < limit; i++ {
result = append(result, tagValues[i].value)
}
ginx.NewRender(c).Data(result, nil)
}

View File

@@ -1,13 +1,18 @@
package router
import (
"fmt"
"net/http"
"strings"
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
"golang.org/x/exp/slices"
)
func getTimeRange(c *gin.Context) (stime, etime int64) {
@@ -33,7 +38,6 @@ func (rt *Router) alertHisEventsList(c *gin.Context) {
recovered := ginx.QueryInt(c, "is_recovered", -1)
query := ginx.QueryStr(c, "query", "")
limit := ginx.QueryInt(c, "limit", 20)
busiGroupId := ginx.QueryInt64(c, "bgid", 0)
dsIds := queryDatasourceIds(c)
prod := ginx.QueryStr(c, "prods", "")
@@ -52,10 +56,17 @@ func (rt *Router) alertHisEventsList(c *gin.Context) {
cates = strings.Split(cate, ",")
}
total, err := models.AlertHisEventTotal(rt.Ctx, prods, busiGroupId, stime, etime, severity, recovered, dsIds, cates, query)
ruleId := ginx.QueryInt64(c, "rid", 0)
bgids, err := GetBusinessGroupIds(c, rt.Ctx, rt.Center.EventHistoryGroupView, false)
ginx.Dangerous(err)
list, err := models.AlertHisEventGets(rt.Ctx, prods, busiGroupId, stime, etime, severity, recovered, dsIds, cates, query, limit, ginx.Offset(c, limit))
total, err := models.AlertHisEventTotal(rt.Ctx, prods, bgids, stime, etime, severity,
recovered, dsIds, cates, ruleId, query, []int64{})
ginx.Dangerous(err)
list, err := models.AlertHisEventGets(rt.Ctx, prods, bgids, stime, etime, severity, recovered,
dsIds, cates, ruleId, query, limit, ginx.Offset(c, limit), []int64{})
ginx.Dangerous(err)
cache := make(map[int64]*models.UserGroup)
@@ -69,14 +80,123 @@ func (rt *Router) alertHisEventsList(c *gin.Context) {
}, nil)
}
type alertHisEventsDeleteForm struct {
Severities []int `json:"severities"`
Timestamp int64 `json:"timestamp" binding:"required"`
}
func (rt *Router) alertHisEventsDelete(c *gin.Context) {
var f alertHisEventsDeleteForm
ginx.BindJSON(c, &f)
// 校验
if f.Timestamp == 0 {
ginx.Bomb(http.StatusBadRequest, "timestamp parameter is required")
return
}
user := c.MustGet("user").(*models.User)
// 启动后台清理任务
go func() {
limit := 100
for {
n, err := models.AlertHisEventBatchDelete(rt.Ctx, f.Timestamp, f.Severities, limit)
if err != nil {
logger.Errorf("Failed to delete alert history events: operator=%s, timestamp=%d, severities=%v, error=%v",
user.Username, f.Timestamp, f.Severities, err)
break
}
logger.Debugf("Successfully deleted alert history events: operator=%s, timestamp=%d, severities=%v, deleted=%d",
user.Username, f.Timestamp, f.Severities, n)
if n < int64(limit) {
break // 已经删完
}
time.Sleep(100 * time.Millisecond) // 防止锁表
}
}()
ginx.NewRender(c).Data("Alert history events deletion started", nil)
}
var TransferEventToCur func(*ctx.Context, *models.AlertHisEvent) *models.AlertCurEvent
func init() {
TransferEventToCur = transferEventToCur
}
func transferEventToCur(ctx *ctx.Context, event *models.AlertHisEvent) *models.AlertCurEvent {
cur := event.ToCur()
return cur
}
func (rt *Router) alertHisEventGet(c *gin.Context) {
eid := ginx.UrlParamInt64(c, "eid")
event, err := models.AlertHisEventGetById(rt.Ctx, eid)
ginx.Dangerous(err)
if event == nil {
ginx.Bomb(404, "No such alert event")
}
ginx.NewRender(c).Data(event, err)
hasPermission := HasPermission(rt.Ctx, c, "event", fmt.Sprintf("%d", eid), rt.Center.AnonymousAccess.AlertDetail)
if !hasPermission {
rt.auth()(c)
rt.user()(c)
rt.bgroCheck(c, event.GroupId)
}
ruleConfig, needReset := models.FillRuleConfigTplName(rt.Ctx, event.RuleConfig)
if needReset {
event.RuleConfigJson = ruleConfig
}
event.NotifyVersion, err = GetEventNotifyVersion(rt.Ctx, event.RuleId, event.NotifyRuleIds)
ginx.Dangerous(err)
event.NotifyRules, err = GetEventNotifyRuleNames(rt.Ctx, event.NotifyRuleIds)
ginx.NewRender(c).Data(TransferEventToCur(rt.Ctx, event), err)
}
func GetBusinessGroupIds(c *gin.Context, ctx *ctx.Context, onlySelfGroupView bool, myGroups bool) ([]int64, error) {
bgid := ginx.QueryInt64(c, "bgid", 0)
var bgids []int64
if strings.HasPrefix(c.Request.URL.Path, "/v1") {
// 如果请求路径以 /v1 开头,不查询用户信息
if bgid > 0 {
return []int64{bgid}, nil
}
return bgids, nil
}
user := c.MustGet("user").(*models.User)
if myGroups || (onlySelfGroupView && !user.IsAdmin()) {
// 1. 页面上勾选了我的业务组,需要查询用户所属的业务组
// 2. 如果 onlySelfGroupView 为 true表示只允许查询用户所属的业务组
bussGroupIds, err := models.MyBusiGroupIds(ctx, user.Id)
if err != nil {
return nil, err
}
if len(bussGroupIds) == 0 {
// 如果没查到用户属于任何业务组需要返回一个0否则会导致查询到全部告警历史
return []int64{0}, nil
}
if bgid > 0 {
if !slices.Contains(bussGroupIds, bgid) && !user.IsAdmin() {
return nil, fmt.Errorf("business group ID not allowed")
}
return []int64{bgid}, nil
}
return bussGroupIds, nil
}
if bgid > 0 {
return []int64{bgid}, nil
}
return bgids, nil
}

View File

@@ -1,18 +1,32 @@
package router
import (
"encoding/json"
"fmt"
"net/http"
"regexp"
"strconv"
"strings"
"time"
"gopkg.in/yaml.v2"
"github.com/ccfos/nightingale/v6/alert/mute"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pushgw/pconf"
"github.com/ccfos/nightingale/v6/pushgw/writer"
"github.com/gin-gonic/gin"
"github.com/jinzhu/copier"
"github.com/pkg/errors"
"github.com/prometheus/prometheus/prompb"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
)
type AlertRuleModifyHookFunc func(ar *models.AlertRule)
// Return all, front-end search and paging
func (rt *Router) alertRuleGets(c *gin.Context) {
busiGroupId := ginx.UrlParamInt64(c, "id")
@@ -21,7 +35,74 @@ func (rt *Router) alertRuleGets(c *gin.Context) {
cache := make(map[int64]*models.UserGroup)
for i := 0; i < len(ars); i++ {
ars[i].FillNotifyGroups(rt.Ctx, cache)
ars[i].FillSeverities()
}
}
ginx.NewRender(c).Data(ars, err)
}
func GetAlertCueEventTimeRange(c *gin.Context) (stime, etime int64) {
stime = ginx.QueryInt64(c, "stime", 0)
etime = ginx.QueryInt64(c, "etime", 0)
if etime == 0 {
etime = time.Now().Unix()
}
if stime == 0 || stime >= etime {
stime = etime - 30*24*int64(time.Hour.Seconds())
}
return
}
func (rt *Router) alertRuleGetsByGids(c *gin.Context) {
gids := strx.IdsInt64ForAPI(ginx.QueryStr(c, "gids", ""), ",")
if len(gids) > 0 {
for _, gid := range gids {
rt.bgroCheck(c, gid)
}
} else {
me := c.MustGet("user").(*models.User)
if !me.IsAdmin() {
var err error
gids, err = models.MyBusiGroupIds(rt.Ctx, me.Id)
ginx.Dangerous(err)
if len(gids) == 0 {
ginx.NewRender(c).Data([]int{}, nil)
return
}
}
}
ars, err := models.AlertRuleGetsByBGIds(rt.Ctx, gids)
if err == nil {
cache := make(map[int64]*models.UserGroup)
rids := make([]int64, 0, len(ars))
names := make([]string, 0, len(ars))
for i := 0; i < len(ars); i++ {
ars[i].FillNotifyGroups(rt.Ctx, cache)
if len(ars[i].DatasourceQueries) != 0 {
ars[i].DatasourceIdsJson = rt.DatasourceCache.GetIDsByDsCateAndQueries(ars[i].Cate, ars[i].DatasourceQueries)
}
rids = append(rids, ars[i].Id)
names = append(names, ars[i].UpdateBy)
}
stime, etime := GetAlertCueEventTimeRange(c)
cnt := models.AlertCurEventCountByRuleId(rt.Ctx, rids, stime, etime)
if cnt != nil {
for i := 0; i < len(ars); i++ {
ars[i].CurEventCount = cnt[ars[i].Id]
}
}
users := models.UserMapGet(rt.Ctx, "username in (?)", names)
if users != nil {
for i := 0; i < len(ars); i++ {
if user, exist := users[ars[i].UpdateBy]; exist {
ars[i].UpdateByNickname = user.Nickname
}
}
}
}
ginx.NewRender(c).Data(ars, err)
@@ -49,6 +130,10 @@ func (rt *Router) alertRulesGetByService(c *gin.Context) {
cache := make(map[int64]*models.UserGroup)
for i := 0; i < len(ars); i++ {
ars[i].FillNotifyGroups(rt.Ctx, cache)
if len(ars[i].DatasourceQueries) != 0 {
ars[i].DatasourceIdsJson = rt.DatasourceCache.GetIDsByDsCateAndQueries(ars[i].Cate, ars[i].DatasourceQueries)
}
}
}
ginx.NewRender(c).Data(ars, err)
@@ -72,6 +157,120 @@ func (rt *Router) alertRuleAddByFE(c *gin.Context) {
ginx.NewRender(c).Data(reterr, nil)
}
type AlertRuleTryRunForm struct {
EventId int64 `json:"event_id" binding:"required"`
AlertRuleConfig models.AlertRule `json:"config" binding:"required"`
}
func (rt *Router) alertRuleNotifyTryRun(c *gin.Context) {
// check notify channels of old version
var f AlertRuleTryRunForm
ginx.BindJSON(c, &f)
hisEvent, err := models.AlertHisEventGetById(rt.Ctx, f.EventId)
ginx.Dangerous(err)
if hisEvent == nil {
ginx.Bomb(http.StatusNotFound, "event not found")
}
curEvent := *hisEvent.ToCur()
curEvent.SetTagsMap()
if f.AlertRuleConfig.NotifyVersion == 1 {
for _, id := range f.AlertRuleConfig.NotifyRuleIds {
notifyRule, err := models.GetNotifyRule(rt.Ctx, id)
ginx.Dangerous(err)
for _, notifyConfig := range notifyRule.NotifyConfigs {
_, err = SendNotifyChannelMessage(rt.Ctx, rt.UserCache, rt.UserGroupCache, notifyConfig, []*models.AlertCurEvent{&curEvent})
ginx.Dangerous(err)
}
}
ginx.NewRender(c).Data("notification test ok", nil)
return
}
if len(f.AlertRuleConfig.NotifyChannelsJSON) == 0 {
ginx.Bomb(http.StatusOK, "no notify channels selected")
}
if len(f.AlertRuleConfig.NotifyGroupsJSON) == 0 {
ginx.Bomb(http.StatusOK, "no notify groups selected")
}
ancs := make([]string, 0, len(curEvent.NotifyChannelsJSON))
ugids := f.AlertRuleConfig.NotifyGroupsJSON
ngids := make([]int64, 0)
for i := 0; i < len(ugids); i++ {
if gid, err := strconv.ParseInt(ugids[i], 10, 64); err == nil {
ngids = append(ngids, gid)
}
}
userGroups := rt.UserGroupCache.GetByUserGroupIds(ngids)
uids := make([]int64, 0)
for i := range userGroups {
uids = append(uids, userGroups[i].UserIds...)
}
users := rt.UserCache.GetByUserIds(uids)
for _, NotifyChannels := range curEvent.NotifyChannelsJSON {
flag := true
// ignore non-default channels
switch NotifyChannels {
case models.Dingtalk, models.Wecom, models.Feishu, models.Mm,
models.Telegram, models.Email, models.FeishuCard:
// do nothing
default:
continue
}
// default channels
for ui := range users {
if _, b := users[ui].ExtractToken(NotifyChannels); b {
flag = false
break
}
}
if flag {
ancs = append(ancs, NotifyChannels)
}
}
if len(ancs) > 0 {
ginx.Dangerous(errors.New(fmt.Sprintf("All users are missing notify channel configurations. Please check for missing tokens (each channel should be configured with at least one user). %v", ancs)))
}
ginx.NewRender(c).Data("notification test ok", nil)
}
func (rt *Router) alertRuleEnableTryRun(c *gin.Context) {
// check notify channels of old version
var f AlertRuleTryRunForm
ginx.BindJSON(c, &f)
hisEvent, err := models.AlertHisEventGetById(rt.Ctx, f.EventId)
ginx.Dangerous(err)
if hisEvent == nil {
ginx.Bomb(http.StatusNotFound, "event not found")
}
curEvent := *hisEvent.ToCur()
curEvent.SetTagsMap()
if f.AlertRuleConfig.Disabled == 1 {
ginx.Bomb(http.StatusOK, "rule is disabled")
}
if mute.TimeSpanMuteStrategy(&f.AlertRuleConfig, &curEvent) {
ginx.Bomb(http.StatusOK, "event is not match for period of time")
}
if mute.BgNotMatchMuteStrategy(&f.AlertRuleConfig, &curEvent, rt.TargetCache) {
ginx.Bomb(http.StatusOK, "event target busi group not match rule busi group")
}
ginx.NewRender(c).Data("event is effective", nil)
}
func (rt *Router) alertRuleAddByImport(c *gin.Context) {
username := c.MustGet("username").(string)
@@ -83,12 +282,90 @@ func (rt *Router) alertRuleAddByImport(c *gin.Context) {
ginx.Bomb(http.StatusBadRequest, "input json is empty")
}
for i := range lst {
if len(lst[i].DatasourceQueries) == 0 {
lst[i].DatasourceQueries = []models.DatasourceQuery{
models.DataSourceQueryAll,
}
}
// 将导入的规则统一转为新版本的通知规则配置
lst[i].NotifyVersion = 1
lst[i].NotifyChannelsJSON = []string{}
lst[i].NotifyGroupsJSON = []string{}
lst[i].NotifyChannels = ""
lst[i].NotifyGroups = ""
lst[i].Callbacks = ""
lst[i].CallbacksJSON = []string{}
}
bgid := ginx.UrlParamInt64(c, "id")
reterr := rt.alertRuleAdd(lst, username, bgid, c.GetHeader("X-Language"))
ginx.NewRender(c).Data(reterr, nil)
}
type promRuleForm struct {
Payload string `json:"payload" binding:"required"`
DatasourceQueries []models.DatasourceQuery `json:"datasource_queries" binding:"required"`
Disabled int `json:"disabled" binding:"gte=0,lte=1"`
}
func (rt *Router) alertRuleAddByImportPromRule(c *gin.Context) {
var f promRuleForm
ginx.Dangerous(c.BindJSON(&f))
// 首先尝试解析带 groups 的格式
var pr struct {
Groups []models.PromRuleGroup `yaml:"groups"`
}
err := yaml.Unmarshal([]byte(f.Payload), &pr)
var groups []models.PromRuleGroup
if err != nil || len(pr.Groups) == 0 {
// 如果解析失败或没有 groups尝试解析规则数组格式
var rules []models.PromRule
err = yaml.Unmarshal([]byte(f.Payload), &rules)
if err != nil {
// 最后尝试解析单个规则格式
var singleRule models.PromRule
err = yaml.Unmarshal([]byte(f.Payload), &singleRule)
if err != nil {
ginx.Bomb(http.StatusBadRequest, "invalid yaml format. err: %v", err)
}
// 验证单个规则是否有效
if singleRule.Alert == "" && singleRule.Record == "" {
ginx.Bomb(http.StatusBadRequest, "input yaml is empty or invalid")
}
rules = []models.PromRule{singleRule}
}
// 验证规则数组是否为空
if len(rules) == 0 {
ginx.Bomb(http.StatusBadRequest, "input yaml contains no rules")
}
// 将规则数组包装成 group
groups = []models.PromRuleGroup{
{
Name: "imported_rules",
Rules: rules,
},
}
} else {
// 使用已解析的 groups
groups = pr.Groups
}
lst := models.DealPromGroup(groups, f.DatasourceQueries, f.Disabled)
username := c.MustGet("username").(string)
bgid := ginx.UrlParamInt64(c, "id")
ginx.NewRender(c).Data(rt.alertRuleAdd(lst, username, bgid, c.GetHeader("X-Language")), nil)
}
func (rt *Router) alertRuleAddByService(c *gin.Context) {
var lst []models.AlertRule
ginx.BindJSON(c, &lst)
@@ -101,6 +378,17 @@ func (rt *Router) alertRuleAddByService(c *gin.Context) {
ginx.NewRender(c).Data(reterr, nil)
}
func (rt *Router) alertRuleAddOneByService(c *gin.Context) {
var f models.AlertRule
ginx.BindJSON(c, &f)
err := f.FE2DB()
ginx.Dangerous(err)
err = f.Add(rt.Ctx)
ginx.NewRender(c).Data(f.Id, err)
}
func (rt *Router) alertRuleAddForService(lst []models.AlertRule, username string) map[string]string {
count := len(lst)
// alert rule name -> error string
@@ -217,8 +505,8 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
ginx.Bomb(http.StatusBadRequest, "fields empty")
}
f.Fields["update_by"] = c.MustGet("username").(string)
f.Fields["update_at"] = time.Now().Unix()
updateBy := c.MustGet("username").(string)
updateAt := time.Now().Unix()
for i := 0; i < len(f.Ids); i++ {
ar, err := models.AlertRuleGetById(rt.Ctx, f.Ids[i])
@@ -228,13 +516,46 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
continue
}
if f.Action == "update_triggers" {
if triggers, has := f.Fields["triggers"]; has {
originRule := ar.RuleConfigJson.(map[string]interface{})
originRule["triggers"] = triggers
b, err := json.Marshal(originRule)
ginx.Dangerous(err)
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"rule_config": string(b)}))
}
}
if f.Action == "annotations_add" {
if annotations, has := f.Fields["annotations"]; has {
annotationsMap := annotations.(map[string]interface{})
for k, v := range annotationsMap {
ar.AnnotationsJSON[k] = v.(string)
}
b, err := json.Marshal(ar.AnnotationsJSON)
ginx.Dangerous(err)
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"annotations": string(b)}))
}
}
if f.Action == "annotations_del" {
if annotations, has := f.Fields["annotations"]; has {
annotationsKeys := annotations.(map[string]interface{})
for key := range annotationsKeys {
delete(ar.AnnotationsJSON, key)
}
b, err := json.Marshal(ar.AnnotationsJSON)
ginx.Dangerous(err)
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"annotations": string(b)}))
}
}
if f.Action == "callback_add" {
// 增加一个 callback 地址
if callbacks, has := f.Fields["callbacks"]; has {
callback := callbacks.(string)
if !strings.Contains(ar.Callbacks, callback) {
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"callbacks": ar.Callbacks + " " + callback}))
continue
}
}
}
@@ -244,13 +565,36 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
if callbacks, has := f.Fields["callbacks"]; has {
callback := callbacks.(string)
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"callbacks": strings.ReplaceAll(ar.Callbacks, callback, "")}))
continue
}
}
if f.Action == "datasource_change" {
// 修改数据源
if datasourceQueries, has := f.Fields["datasource_queries"]; has {
bytes, err := json.Marshal(datasourceQueries)
ginx.Dangerous(err)
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"datasource_queries": bytes}))
}
}
for k, v := range f.Fields {
ginx.Dangerous(ar.UpdateColumn(rt.Ctx, k, v))
// 检查 v 是否为各种切片类型
switch v.(type) {
case []interface{}, []int64, []int, []string:
// 将切片转换为 JSON 字符串
bytes, err := json.Marshal(v)
ginx.Dangerous(err)
ginx.Dangerous(ar.UpdateColumn(rt.Ctx, k, string(bytes)))
default:
ginx.Dangerous(ar.UpdateColumn(rt.Ctx, k, v))
}
}
// 统一更新更新时间和更新人,只有更新时间变了,告警规则才会被引擎拉取
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{
"update_by": updateBy,
"update_at": updateAt,
}))
}
ginx.NewRender(c).Message(nil)
@@ -267,9 +611,28 @@ func (rt *Router) alertRuleGet(c *gin.Context) {
return
}
if len(ar.DatasourceQueries) != 0 {
ar.DatasourceIdsJson = rt.DatasourceCache.GetIDsByDsCateAndQueries(ar.Cate, ar.DatasourceQueries)
}
err = ar.FillNotifyGroups(rt.Ctx, make(map[int64]*models.UserGroup))
ginx.Dangerous(err)
rt.AlertRuleModifyHook(ar)
ginx.NewRender(c).Data(ar, err)
}
func (rt *Router) alertRulePureGet(c *gin.Context) {
arid := ginx.UrlParamInt64(c, "arid")
ar, err := models.AlertRuleGetById(rt.Ctx, arid)
ginx.Dangerous(err)
if ar == nil {
ginx.NewRender(c, http.StatusNotFound).Message("No such AlertRule")
return
}
ginx.NewRender(c).Data(ar, err)
}
@@ -323,3 +686,206 @@ func (rt *Router) alertRuleValidation(c *gin.Context) {
ginx.NewRender(c).Message("")
}
func (rt *Router) alertRuleCallbacks(c *gin.Context) {
user := c.MustGet("user").(*models.User)
bussGroupIds, err := models.MyBusiGroupIds(rt.Ctx, user.Id)
ginx.Dangerous(err)
ars, err := models.AlertRuleGetsByBGIds(rt.Ctx, bussGroupIds)
ginx.Dangerous(err)
var callbacks []string
callbackFilter := make(map[string]struct{})
for i := range ars {
for _, callback := range ars[i].CallbacksJSON {
if _, ok := callbackFilter[callback]; !ok {
callbackFilter[callback] = struct{}{}
callbacks = append(callbacks, callback)
}
}
}
ginx.NewRender(c).Data(callbacks, nil)
}
type alertRuleTestForm struct {
Configs []*pconf.RelabelConfig `json:"configs"`
Tags []string `json:"tags"`
}
func (rt *Router) relabelTest(c *gin.Context) {
var f alertRuleTestForm
ginx.BindJSON(c, &f)
if len(f.Tags) == 0 || len(f.Configs) == 0 {
ginx.Bomb(http.StatusBadRequest, "relabel config is empty")
}
labels := make([]prompb.Label, len(f.Tags))
for i, tag := range f.Tags {
label := strings.SplitN(tag, "=", 2)
if len(label) != 2 {
ginx.Bomb(http.StatusBadRequest, "tag:%s format error", tag)
}
labels[i] = prompb.Label{Name: label[0], Value: label[1]}
}
for i := 0; i < len(f.Configs); i++ {
if f.Configs[i].Replacement == "" {
f.Configs[i].Replacement = "$1"
}
if f.Configs[i].Separator == "" {
f.Configs[i].Separator = ";"
}
if f.Configs[i].Regex == "" {
f.Configs[i].Regex = "(.*)"
}
}
relabels := writer.Process(labels, f.Configs...)
var tags []string
for _, label := range relabels {
tags = append(tags, fmt.Sprintf("%s=%s", label.Name, label.Value))
}
ginx.NewRender(c).Data(tags, nil)
}
type identListForm struct {
Ids []int64 `json:"ids"`
IdentList []string `json:"ident_list"`
}
func containsIdentOperator(s string) bool {
pattern := `ident\s*(!=|!~|=~)`
matched, err := regexp.MatchString(pattern, s)
if err != nil {
return false
}
return matched
}
func (rt *Router) cloneToMachine(c *gin.Context) {
var f identListForm
ginx.BindJSON(c, &f)
if len(f.IdentList) == 0 {
ginx.Bomb(http.StatusBadRequest, "ident_list is empty")
}
alertRules, err := models.AlertRuleGetsByIds(rt.Ctx, f.Ids)
ginx.Dangerous(err)
re := regexp.MustCompile(`ident\s*=\s*\\".*?\\"`)
user := c.MustGet("username").(string)
now := time.Now().Unix()
newRules := make([]*models.AlertRule, 0)
reterr := make(map[string]map[string]string)
for i := range alertRules {
errMsg := make(map[string]string)
if alertRules[i].Cate != "prometheus" {
errMsg["all"] = "Only Prometheus rule can be cloned to machines"
reterr[alertRules[i].Name] = errMsg
continue
}
if containsIdentOperator(alertRules[i].RuleConfig) {
errMsg["all"] = "promql is missing ident"
reterr[alertRules[i].Name] = errMsg
continue
}
for j := range f.IdentList {
alertRules[i].RuleConfig = re.ReplaceAllString(alertRules[i].RuleConfig, fmt.Sprintf(`ident=\"%s\"`, f.IdentList[j]))
newRule := &models.AlertRule{}
if err := copier.Copy(newRule, alertRules[i]); err != nil {
errMsg[f.IdentList[j]] = fmt.Sprintf("fail to clone rule, err: %s", err)
continue
}
newRule.Id = 0
newRule.Name = alertRules[i].Name + "_" + f.IdentList[j]
newRule.CreateBy = user
newRule.UpdateBy = user
newRule.UpdateAt = now
newRule.CreateAt = now
newRule.RuleConfig = alertRules[i].RuleConfig
exist, err := models.AlertRuleExists(rt.Ctx, 0, newRule.GroupId, newRule.Name)
if err != nil {
errMsg[f.IdentList[j]] = err.Error()
continue
}
if exist {
errMsg[f.IdentList[j]] = fmt.Sprintf("rule already exists, ruleName: %s", newRule.Name)
continue
}
newRules = append(newRules, newRule)
}
if len(errMsg) > 0 {
reterr[alertRules[i].Name] = errMsg
}
}
ginx.NewRender(c).Data(reterr, models.InsertAlertRule(rt.Ctx, newRules))
}
type alertBatchCloneForm struct {
RuleIds []int64 `json:"rule_ids"`
Bgids []int64 `json:"bgids"`
}
// 批量克隆告警规则
func (rt *Router) batchAlertRuleClone(c *gin.Context) {
me := c.MustGet("user").(*models.User)
var f alertBatchCloneForm
ginx.BindJSON(c, &f)
// 校验 bgids 操作权限
for _, bgid := range f.Bgids {
rt.bgrwCheck(c, bgid)
}
reterr := make(map[string]string, len(f.RuleIds))
lang := c.GetHeader("X-Language")
for _, arid := range f.RuleIds {
ar, err := models.AlertRuleGetById(rt.Ctx, arid)
for _, bgid := range f.Bgids {
// 为了让 bgid 和 arid 对应,将上面的 err 放到这里处理
if err != nil {
reterr[fmt.Sprintf("%d-%d", arid, bgid)] = i18n.Sprintf(lang, err.Error())
continue
}
if ar == nil {
reterr[fmt.Sprintf("%d-%d", arid, bgid)] = i18n.Sprintf(lang, "alert rule not found")
continue
}
newAr := ar.Clone(me.Username, bgid)
err = newAr.Add(rt.Ctx)
if err != nil {
reterr[fmt.Sprintf("%d-%d", arid, bgid)] = i18n.Sprintf(lang, err.Error())
continue
}
}
}
ginx.NewRender(c).Data(reterr, nil)
}

View File

@@ -2,12 +2,17 @@ package router
import (
"net/http"
"strconv"
"strings"
"time"
"github.com/ccfos/nightingale/v6/alert/common"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
)
// Return all, front-end search and paging
@@ -21,7 +26,43 @@ func (rt *Router) alertSubscribeGets(c *gin.Context) {
for i := 0; i < len(lst); i++ {
ginx.Dangerous(lst[i].FillUserGroups(rt.Ctx, ugcache))
ginx.Dangerous(lst[i].FillRuleName(rt.Ctx, rulecache))
ginx.Dangerous(lst[i].FillRuleNames(rt.Ctx, rulecache))
ginx.Dangerous(lst[i].FillDatasourceIds(rt.Ctx))
ginx.Dangerous(lst[i].DB2FE())
}
ginx.NewRender(c).Data(lst, err)
}
func (rt *Router) alertSubscribeGetsByGids(c *gin.Context) {
gids := strx.IdsInt64ForAPI(ginx.QueryStr(c, "gids", ""), ",")
if len(gids) > 0 {
for _, gid := range gids {
rt.bgroCheck(c, gid)
}
} else {
me := c.MustGet("user").(*models.User)
if !me.IsAdmin() {
var err error
gids, err = models.MyBusiGroupIds(rt.Ctx, me.Id)
ginx.Dangerous(err)
if len(gids) == 0 {
ginx.NewRender(c).Data([]int{}, nil)
return
}
}
}
lst, err := models.AlertSubscribeGetsByBGIds(rt.Ctx, gids)
ginx.Dangerous(err)
ugcache := make(map[int64]*models.UserGroup)
rulecache := make(map[int64]string)
for i := 0; i < len(lst); i++ {
ginx.Dangerous(lst[i].FillUserGroups(rt.Ctx, ugcache))
ginx.Dangerous(lst[i].FillRuleNames(rt.Ctx, rulecache))
ginx.Dangerous(lst[i].FillDatasourceIds(rt.Ctx))
ginx.Dangerous(lst[i].DB2FE())
}
@@ -44,7 +85,7 @@ func (rt *Router) alertSubscribeGet(c *gin.Context) {
ginx.Dangerous(sub.FillUserGroups(rt.Ctx, ugcache))
rulecache := make(map[int64]string)
ginx.Dangerous(sub.FillRuleName(rt.Ctx, rulecache))
ginx.Dangerous(sub.FillRuleNames(rt.Ctx, rulecache))
ginx.Dangerous(sub.FillDatasourceIds(rt.Ctx))
ginx.Dangerous(sub.DB2FE())
@@ -67,6 +108,148 @@ func (rt *Router) alertSubscribeAdd(c *gin.Context) {
ginx.NewRender(c).Message(f.Add(rt.Ctx))
}
type SubscribeTryRunForm struct {
EventId int64 `json:"event_id" binding:"required"`
SubscribeConfig models.AlertSubscribe `json:"config" binding:"required"`
}
func (rt *Router) alertSubscribeTryRun(c *gin.Context) {
var f SubscribeTryRunForm
ginx.BindJSON(c, &f)
ginx.Dangerous(f.SubscribeConfig.Verify())
hisEvent, err := models.AlertHisEventGetById(rt.Ctx, f.EventId)
ginx.Dangerous(err)
if hisEvent == nil {
ginx.Bomb(http.StatusNotFound, "event not found")
}
curEvent := *hisEvent.ToCur()
curEvent.SetTagsMap()
lang := c.GetHeader("X-Language")
// 先判断匹配条件
if !f.SubscribeConfig.MatchCluster(curEvent.DatasourceId) {
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "event datasource not match"))
}
if len(f.SubscribeConfig.RuleIds) != 0 {
match := false
for _, rid := range f.SubscribeConfig.RuleIds {
if rid == curEvent.RuleId {
match = true
break
}
}
if !match {
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "event rule id not match"))
}
}
// 匹配 tag
f.SubscribeConfig.Parse()
if !common.MatchTags(curEvent.TagsMap, f.SubscribeConfig.ITags) {
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "event tags not match"))
}
// 匹配group name
if !common.MatchGroupsName(curEvent.GroupName, f.SubscribeConfig.IBusiGroups) {
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "event group name not match"))
}
// 检查严重级别Severity匹配
if len(f.SubscribeConfig.SeveritiesJson) != 0 {
match := false
for _, s := range f.SubscribeConfig.SeveritiesJson {
if s == curEvent.Severity || s == 0 {
match = true
break
}
}
if !match {
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "event severity not match"))
}
}
// 新版本通知规则
if f.SubscribeConfig.NotifyVersion == 1 {
if len(f.SubscribeConfig.NotifyRuleIds) == 0 {
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "no notify rules selected"))
}
for _, id := range f.SubscribeConfig.NotifyRuleIds {
notifyRule, err := models.GetNotifyRule(rt.Ctx, id)
if err != nil {
ginx.Bomb(http.StatusNotFound, i18n.Sprintf(lang, "subscribe notify rule not found: %v", err))
}
for _, notifyConfig := range notifyRule.NotifyConfigs {
_, err = SendNotifyChannelMessage(rt.Ctx, rt.UserCache, rt.UserGroupCache, notifyConfig, []*models.AlertCurEvent{&curEvent})
if err != nil {
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "notify rule send error: %v", err))
}
}
}
ginx.NewRender(c).Data(i18n.Sprintf(lang, "event match subscribe and notification test ok"), nil)
return
}
// 旧版通知方式
f.SubscribeConfig.ModifyEvent(&curEvent)
if len(curEvent.NotifyChannelsJSON) == 0 {
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "no notify channels selected"))
}
if len(curEvent.NotifyGroupsJSON) == 0 {
ginx.Bomb(http.StatusOK, i18n.Sprintf(lang, "no notify groups selected"))
}
ancs := make([]string, 0, len(curEvent.NotifyChannelsJSON))
ugids := strings.Fields(f.SubscribeConfig.UserGroupIds)
ngids := make([]int64, 0)
for i := 0; i < len(ugids); i++ {
if gid, err := strconv.ParseInt(ugids[i], 10, 64); err == nil {
ngids = append(ngids, gid)
}
}
userGroups := rt.UserGroupCache.GetByUserGroupIds(ngids)
uids := make([]int64, 0)
for i := range userGroups {
uids = append(uids, userGroups[i].UserIds...)
}
users := rt.UserCache.GetByUserIds(uids)
for _, NotifyChannels := range curEvent.NotifyChannelsJSON {
flag := true
// ignore non-default channels
switch NotifyChannels {
case models.Dingtalk, models.Wecom, models.Feishu, models.Mm,
models.Telegram, models.Email, models.FeishuCard:
// do nothing
default:
continue
}
// default channels
for ui := range users {
if _, b := users[ui].ExtractToken(NotifyChannels); b {
flag = false
break
}
}
if flag {
ancs = append(ancs, NotifyChannels)
}
}
if len(ancs) > 0 {
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "all users missing notify channel configurations: %v", ancs))
}
ginx.NewRender(c).Data(i18n.Sprintf(lang, "event match subscribe and notify settings ok"), nil)
}
func (rt *Router) alertSubscribePut(c *gin.Context) {
var fs []models.AlertSubscribe
ginx.BindJSON(c, &fs)
@@ -76,6 +259,9 @@ func (rt *Router) alertSubscribePut(c *gin.Context) {
for i := 0; i < len(fs); i++ {
fs[i].UpdateBy = username
fs[i].UpdateAt = timestamp
//After adding the function of batch subscription alert rules, rule_ids is used instead of rule_id.
//When the subscription rules are updated, set rule_id=0 to prevent the wrong subscription caused by the old rule_id.
fs[i].RuleId = 0
ginx.Dangerous(fs[i].Update(
rt.Ctx,
"name",
@@ -85,6 +271,7 @@ func (rt *Router) alertSubscribePut(c *gin.Context) {
"datasource_ids",
"cluster",
"rule_id",
"rule_ids",
"tags",
"redefine_severity",
"new_severity",
@@ -99,6 +286,9 @@ func (rt *Router) alertSubscribePut(c *gin.Context) {
"severities",
"extra_config",
"busi_groups",
"note",
"notify_rule_ids",
"notify_version",
))
}

View File

@@ -1,22 +1,27 @@
package router
import (
"fmt"
"net/http"
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
)
type boardForm struct {
Name string `json:"name"`
Ident string `json:"ident"`
Tags string `json:"tags"`
Configs string `json:"configs"`
Public int `json:"public"`
Name string `json:"name"`
Ident string `json:"ident"`
Tags string `json:"tags"`
Note string `json:"note"`
Configs string `json:"configs"`
Public int `json:"public"`
PublicCate int `json:"public_cate"`
Bgids []int64 `json:"bgids"`
}
func (rt *Router) boardAdd(c *gin.Context) {
@@ -30,6 +35,7 @@ func (rt *Router) boardAdd(c *gin.Context) {
Name: f.Name,
Ident: f.Ident,
Tags: f.Tags,
Note: f.Note,
Configs: f.Configs,
CreateBy: me.Username,
UpdateBy: me.Username,
@@ -47,9 +53,14 @@ func (rt *Router) boardAdd(c *gin.Context) {
func (rt *Router) boardGet(c *gin.Context) {
bid := ginx.UrlParamStr(c, "bid")
board, err := models.BoardGet(rt.Ctx, "id = ? or ident = ?", bid, bid)
board, err := models.BoardGet(rt.Ctx, "ident = ?", bid)
ginx.Dangerous(err)
if board == nil {
board, err = models.BoardGet(rt.Ctx, "id = ?", bid)
ginx.Dangerous(err)
}
if board == nil {
ginx.Bomb(http.StatusNotFound, "No such dashboard")
}
@@ -65,9 +76,39 @@ func (rt *Router) boardGet(c *gin.Context) {
}
}
if board.PublicCate == models.PublicLogin {
rt.auth()(c)
} else if board.PublicCate == models.PublicBusi {
rt.auth()(c)
rt.user()(c)
me := c.MustGet("user").(*models.User)
if !me.IsAdmin() {
bgids, err := models.MyBusiGroupIds(rt.Ctx, me.Id)
ginx.Dangerous(err)
if len(bgids) == 0 {
ginx.Bomb(http.StatusForbidden, "forbidden")
}
ok, err := models.BoardBusigroupCheck(rt.Ctx, board.Id, bgids)
ginx.Dangerous(err)
if !ok {
ginx.Bomb(http.StatusForbidden, "forbidden")
}
}
}
ginx.NewRender(c).Data(board, nil)
}
// 根据 bids 参数,获取多个 board
func (rt *Router) boardGetsByBids(c *gin.Context) {
bids := strx.IdsInt64ForAPI(ginx.QueryStr(c, "bids", ""), ",")
boards, err := models.BoardGetsByBids(rt.Ctx, bids)
ginx.Dangerous(err)
ginx.NewRender(c).Data(boards, err)
}
func (rt *Router) boardPureGet(c *gin.Context) {
board, err := models.BoardGetByID(rt.Ctx, ginx.UrlParamInt64(c, "bid"))
ginx.Dangerous(err)
@@ -76,6 +117,10 @@ func (rt *Router) boardPureGet(c *gin.Context) {
ginx.Bomb(http.StatusNotFound, "No such dashboard")
}
// 清除创建者和更新者信息
board.CreateBy = ""
board.UpdateBy = ""
ginx.NewRender(c).Data(board, nil)
}
@@ -141,10 +186,11 @@ func (rt *Router) boardPut(c *gin.Context) {
bo.Name = f.Name
bo.Ident = f.Ident
bo.Tags = f.Tags
bo.Note = f.Note
bo.UpdateBy = me.Username
bo.UpdateAt = time.Now().Unix()
err = bo.Update(rt.Ctx, "name", "ident", "tags", "update_by", "update_at")
err = bo.Update(rt.Ctx, "name", "ident", "tags", "note", "update_by", "update_at")
ginx.NewRender(c).Data(bo, err)
}
@@ -192,10 +238,20 @@ func (rt *Router) boardPutPublic(c *gin.Context) {
}
bo.Public = f.Public
bo.PublicCate = f.PublicCate
if bo.PublicCate == models.PublicBusi {
err := models.BoardBusigroupUpdate(rt.Ctx, bo.Id, f.Bgids)
ginx.Dangerous(err)
} else {
err := models.BoardBusigroupDelByBoardId(rt.Ctx, bo.Id)
ginx.Dangerous(err)
}
bo.UpdateBy = me.Username
bo.UpdateAt = time.Now().Unix()
err := bo.Update(rt.Ctx, "public", "update_by", "update_at")
err := bo.Update(rt.Ctx, "public", "public_cate", "update_by", "update_at")
ginx.NewRender(c).Data(bo, err)
}
@@ -207,21 +263,64 @@ func (rt *Router) boardGets(c *gin.Context) {
ginx.NewRender(c).Data(boards, err)
}
func (rt *Router) publicBoardGets(c *gin.Context) {
me := c.MustGet("user").(*models.User)
bgids, err := models.MyBusiGroupIds(rt.Ctx, me.Id)
ginx.Dangerous(err)
boardIds, err := models.BoardIdsByBusiGroupIds(rt.Ctx, bgids)
ginx.Dangerous(err)
boards, err := models.BoardGets(rt.Ctx, "", "public=1 and (public_cate in (?) or id in (?))", []int64{0, 1}, boardIds)
ginx.NewRender(c).Data(boards, err)
}
func (rt *Router) boardGetsByGids(c *gin.Context) {
gids := strx.IdsInt64ForAPI(ginx.QueryStr(c, "gids", ""), ",")
query := ginx.QueryStr(c, "query", "")
if len(gids) > 0 {
for _, gid := range gids {
rt.bgroCheck(c, gid)
}
} else {
me := c.MustGet("user").(*models.User)
if !me.IsAdmin() {
var err error
gids, err = models.MyBusiGroupIds(rt.Ctx, me.Id)
ginx.Dangerous(err)
if len(gids) == 0 {
ginx.NewRender(c).Data([]int{}, nil)
return
}
}
}
boardBusigroups, err := models.BoardBusigroupGets(rt.Ctx)
ginx.Dangerous(err)
m := make(map[int64][]int64)
for _, boardBusigroup := range boardBusigroups {
m[boardBusigroup.BoardId] = append(m[boardBusigroup.BoardId], boardBusigroup.BusiGroupId)
}
boards, err := models.BoardGetsByBGIds(rt.Ctx, gids, query)
ginx.Dangerous(err)
for i := 0; i < len(boards); i++ {
if ids, ok := m[boards[i].Id]; ok {
boards[i].Bgids = ids
}
}
ginx.NewRender(c).Data(boards, err)
}
func (rt *Router) boardClone(c *gin.Context) {
me := c.MustGet("user").(*models.User)
bo := rt.Board(ginx.UrlParamInt64(c, "bid"))
newBoard := &models.Board{
Name: bo.Name + " Copy",
Tags: bo.Tags,
GroupId: bo.GroupId,
CreateBy: me.Username,
UpdateBy: me.Username,
}
if bo.Ident != "" {
newBoard.Ident = uuid.NewString()
}
newBoard := bo.Clone(me.Username, bo.GroupId, " Cloned")
ginx.Dangerous(newBoard.Add(rt.Ctx))
@@ -235,3 +334,39 @@ func (rt *Router) boardClone(c *gin.Context) {
ginx.NewRender(c).Message(nil)
}
type boardsForm struct {
BoardIds []int64 `json:"board_ids"`
Bgids []int64 `json:"bgids"`
}
func (rt *Router) boardBatchClone(c *gin.Context) {
me := c.MustGet("user").(*models.User)
var f boardsForm
ginx.BindJSON(c, &f)
for _, bgid := range f.Bgids {
rt.bgrwCheck(c, bgid)
}
reterr := make(map[string]string, len(f.BoardIds))
lang := c.GetHeader("X-Language")
for _, bgid := range f.Bgids {
for _, bid := range f.BoardIds {
bo := rt.Board(bid)
newBoard := bo.Clone(me.Username, bgid, "")
payload, err := models.BoardPayloadGet(rt.Ctx, bo.Id)
if err != nil {
reterr[fmt.Sprintf("%s-%d", newBoard.Name, bgid)] = i18n.Sprintf(lang, err.Error())
continue
}
if err = newBoard.AtomicAdd(rt.Ctx, payload); err != nil {
reterr[fmt.Sprintf("%s-%d", newBoard.Name, bgid)] = i18n.Sprintf(lang, err.Error())
}
}
}
ginx.NewRender(c).Data(reterr, nil)
}

View File

@@ -0,0 +1,93 @@
package router
import (
"net/http"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"gorm.io/gorm"
)
const SYSTEM = "system"
func (rt *Router) builtinComponentsAdd(c *gin.Context) {
var lst []models.BuiltinComponent
ginx.BindJSON(c, &lst)
username := Username(c)
count := len(lst)
if count == 0 {
ginx.Bomb(http.StatusBadRequest, "input json is empty")
}
reterr := make(map[string]string)
for i := 0; i < count; i++ {
if err := lst[i].Add(rt.Ctx, username); err != nil {
reterr[lst[i].Ident] = err.Error()
}
}
ginx.NewRender(c).Data(reterr, nil)
}
func (rt *Router) builtinComponentsGets(c *gin.Context) {
query := ginx.QueryStr(c, "query", "")
disabled := ginx.QueryInt(c, "disabled", -1)
bc, err := models.BuiltinComponentGets(rt.Ctx, query, disabled)
ginx.Dangerous(err)
ginx.NewRender(c).Data(bc, nil)
}
func (rt *Router) builtinComponentsPut(c *gin.Context) {
var req models.BuiltinComponent
ginx.BindJSON(c, &req)
bc, err := models.BuiltinComponentGet(rt.Ctx, "id = ?", req.ID)
ginx.Dangerous(err)
if bc == nil {
ginx.NewRender(c, http.StatusNotFound).Message("No such builtin component")
return
}
if bc.CreatedBy == SYSTEM {
req.Ident = bc.Ident
}
username := Username(c)
req.UpdatedBy = username
err = models.DB(rt.Ctx).Transaction(func(tx *gorm.DB) error {
tCtx := &ctx.Context{
DB: tx,
}
txErr := models.BuiltinMetricBatchUpdateColumn(tCtx, "typ", bc.Ident, req.Ident, req.UpdatedBy)
if txErr != nil {
return txErr
}
txErr = bc.Update(tCtx, req)
if txErr != nil {
return txErr
}
return nil
})
ginx.NewRender(c).Message(err)
}
func (rt *Router) builtinComponentsDel(c *gin.Context) {
var req idsForm
ginx.BindJSON(c, &req)
req.Verify()
ginx.NewRender(c).Message(models.BuiltinComponentDels(rt.Ctx, req.Ids))
}

View File

@@ -0,0 +1,120 @@
package router
import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/prom"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) metricFilterGets(c *gin.Context) {
lst, err := models.MetricFilterGets(rt.Ctx, "")
ginx.Dangerous(err)
me := c.MustGet("user").(*models.User)
gids, err := models.MyGroupIds(rt.Ctx, me.Id)
ginx.Dangerous(err)
arr := make([]models.MetricFilter, 0)
for _, f := range lst {
if me.Username == f.CreateBy {
arr = append(arr, f)
continue
}
if HasPerm(gids, f.GroupsPerm, false) {
arr = append(arr, f)
}
}
ginx.NewRender(c).Data(arr, err)
}
func (rt *Router) metricFilterAdd(c *gin.Context) {
var f models.MetricFilter
ginx.BindJSON(c, &f)
me := c.MustGet("user").(*models.User)
f.CreateBy = me.Username
f.UpdateBy = me.Username
ginx.Dangerous(f.Add(rt.Ctx))
ginx.NewRender(c).Data(f, nil)
}
func (rt *Router) metricFilterDel(c *gin.Context) {
var f idsForm
ginx.BindJSON(c, &f)
f.Verify()
me := c.MustGet("user").(*models.User)
for _, id := range f.Ids {
old, err := models.MetricFilterGet(rt.Ctx, id)
ginx.Dangerous(err)
if me.Username != old.CreateBy {
gids, err := models.MyGroupIds(rt.Ctx, me.Id)
ginx.Dangerous(err)
if !HasPerm(gids, old.GroupsPerm, true) {
ginx.NewRender(c).Message("forbidden")
return
}
}
}
ginx.NewRender(c).Message(models.MetricFilterDel(rt.Ctx, f.Ids))
}
func (rt *Router) metricFilterPut(c *gin.Context) {
var f models.MetricFilter
ginx.BindJSON(c, &f)
me := c.MustGet("user").(*models.User)
old, err := models.MetricFilterGet(rt.Ctx, f.ID)
ginx.Dangerous(err)
if me.Username != old.CreateBy {
gids, err := models.MyGroupIds(rt.Ctx, me.Id)
ginx.Dangerous(err)
if !HasPerm(gids, old.GroupsPerm, true) {
ginx.NewRender(c).Message("forbidden")
return
}
}
f.UpdateBy = me.Username
ginx.NewRender(c).Message(f.Update(rt.Ctx))
}
type metricPromqlReq struct {
LabelFilter string `json:"label_filter"`
Promql string `json:"promql"`
}
func (rt *Router) getMetricPromql(c *gin.Context) {
var req metricPromqlReq
ginx.BindJSON(c, &req)
promql := prom.AddLabelToPromQL(req.LabelFilter, req.Promql)
ginx.NewRender(c).Data(promql, nil)
}
func HasPerm(gids []int64, gps []models.GroupPerm, checkWrite bool) bool {
gmap := make(map[int64]struct{})
for _, gp := range gps {
if checkWrite && !gp.Write {
continue
}
gmap[gp.Gid] = struct{}{}
}
for _, gid := range gids {
if _, ok := gmap[gid]; ok {
return true
}
}
return false
}

View File

@@ -0,0 +1,153 @@
package router
import (
"net/http"
"sort"
"time"
"github.com/ccfos/nightingale/v6/center/integration"
"github.com/ccfos/nightingale/v6/models"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
)
// single or import
func (rt *Router) builtinMetricsAdd(c *gin.Context) {
var lst []models.BuiltinMetric
ginx.BindJSON(c, &lst)
username := Username(c)
count := len(lst)
if count == 0 {
ginx.Bomb(http.StatusBadRequest, "input json is empty")
}
lang := c.GetHeader("X-Language")
if lang == "" {
lang = "zh_CN"
}
reterr := make(map[string]string)
for i := 0; i < count; i++ {
lst[i].Lang = lang
lst[i].UUID = time.Now().UnixMicro()
if err := lst[i].Add(rt.Ctx, username); err != nil {
reterr[lst[i].Name] = i18n.Sprintf(c.GetHeader("X-Language"), err.Error())
}
}
ginx.NewRender(c).Data(reterr, nil)
}
func (rt *Router) builtinMetricsGets(c *gin.Context) {
collector := ginx.QueryStr(c, "collector", "")
typ := ginx.QueryStr(c, "typ", "")
query := ginx.QueryStr(c, "query", "")
limit := ginx.QueryInt(c, "limit", 20)
lang := c.GetHeader("X-Language")
unit := ginx.QueryStr(c, "unit", "")
if lang == "" {
lang = "zh_CN"
}
bmInDB, err := models.BuiltinMetricGets(rt.Ctx, "", collector, typ, query, unit)
ginx.Dangerous(err)
bm, total, err := integration.BuiltinPayloadInFile.BuiltinMetricGets(bmInDB, lang, collector, typ, query, unit, limit, ginx.Offset(c, limit))
ginx.Dangerous(err)
ginx.NewRender(c).Data(gin.H{
"list": bm,
"total": total,
}, nil)
}
func (rt *Router) builtinMetricsPut(c *gin.Context) {
var req models.BuiltinMetric
ginx.BindJSON(c, &req)
bm, err := models.BuiltinMetricGet(rt.Ctx, "id = ?", req.ID)
ginx.Dangerous(err)
if bm == nil {
ginx.NewRender(c, http.StatusNotFound).Message("No such builtin metric")
return
}
username := Username(c)
req.UpdatedBy = username
ginx.NewRender(c).Message(bm.Update(rt.Ctx, req))
}
func (rt *Router) builtinMetricsDel(c *gin.Context) {
var req idsForm
ginx.BindJSON(c, &req)
req.Verify()
ginx.NewRender(c).Message(models.BuiltinMetricDels(rt.Ctx, req.Ids))
}
func (rt *Router) builtinMetricsDefaultTypes(c *gin.Context) {
lst := []string{
"Linux",
"Procstat",
"cAdvisor",
"Ping",
"MySQL",
"ClickHouse",
}
ginx.NewRender(c).Data(lst, nil)
}
func (rt *Router) builtinMetricsTypes(c *gin.Context) {
collector := ginx.QueryStr(c, "collector", "")
query := ginx.QueryStr(c, "query", "")
lang := c.GetHeader("X-Language")
metricTypeListInDB, err := models.BuiltinMetricTypes(rt.Ctx, lang, collector, query)
ginx.Dangerous(err)
metricTypeListInFile := integration.BuiltinPayloadInFile.BuiltinMetricTypes(lang, collector, query)
typeMap := make(map[string]struct{})
for _, metricType := range metricTypeListInDB {
typeMap[metricType] = struct{}{}
}
for _, metricType := range metricTypeListInFile {
typeMap[metricType] = struct{}{}
}
metricTypeList := make([]string, 0, len(typeMap))
for metricType := range typeMap {
metricTypeList = append(metricTypeList, metricType)
}
sort.Strings(metricTypeList)
ginx.NewRender(c).Data(metricTypeList, nil)
}
func (rt *Router) builtinMetricsCollectors(c *gin.Context) {
typ := ginx.QueryStr(c, "typ", "")
query := ginx.QueryStr(c, "query", "")
lang := c.GetHeader("X-Language")
collectorListInDB, err := models.BuiltinMetricCollectors(rt.Ctx, lang, typ, query)
ginx.Dangerous(err)
collectorListInFile := integration.BuiltinPayloadInFile.BuiltinMetricCollectors(lang, typ, query)
collectorMap := make(map[string]struct{})
for _, collector := range collectorListInDB {
collectorMap[collector] = struct{}{}
}
for _, collector := range collectorListInFile {
collectorMap[collector] = struct{}{}
}
collectorList := make([]string, 0, len(collectorMap))
for collector := range collectorMap {
collectorList = append(collectorList, collector)
}
sort.Strings(collectorList)
ginx.NewRender(c).Data(collectorList, nil)
}

View File

@@ -0,0 +1,315 @@
package router
import (
"encoding/json"
"net/http"
"strings"
"time"
"github.com/BurntSushi/toml"
"github.com/ccfos/nightingale/v6/center/integration"
"github.com/ccfos/nightingale/v6/models"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
)
type Board struct {
Name string `json:"name"`
Tags string `json:"tags"`
Configs interface{} `json:"configs"`
UUID int64 `json:"uuid"`
Note string `json:"note"`
}
func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
var lst []models.BuiltinPayload
ginx.BindJSON(c, &lst)
username := Username(c)
count := len(lst)
if count == 0 {
ginx.Bomb(http.StatusBadRequest, "input json is empty")
}
reterr := make(map[string]string)
for i := 0; i < count; i++ {
if lst[i].Type == "alert" {
if strings.HasPrefix(strings.TrimSpace(lst[i].Content), "[") {
// 处理多个告警规则模板的情况
alertRules := []models.AlertRule{}
if err := json.Unmarshal([]byte(lst[i].Content), &alertRules); err != nil {
reterr[lst[i].Name] = err.Error()
}
for _, rule := range alertRules {
if rule.UUID == 0 {
rule.UUID = time.Now().UnixMicro()
}
contentBytes, err := json.Marshal(rule)
if err != nil {
reterr[rule.Name] = err.Error()
continue
}
bp := models.BuiltinPayload{
Type: lst[i].Type,
ComponentID: lst[i].ComponentID,
Cate: lst[i].Cate,
Name: rule.Name,
Tags: rule.AppendTags,
UUID: rule.UUID,
Content: string(contentBytes),
CreatedBy: username,
UpdatedBy: username,
}
if err := bp.Add(rt.Ctx, username); err != nil {
reterr[bp.Name] = i18n.Sprintf(c.GetHeader("X-Language"), err.Error())
}
}
continue
}
alertRule := models.AlertRule{}
if err := json.Unmarshal([]byte(lst[i].Content), &alertRule); err != nil {
reterr[lst[i].Name] = err.Error()
continue
}
if alertRule.UUID == 0 {
alertRule.UUID = time.Now().UnixMicro()
}
contentBytes, err := json.Marshal(alertRule)
if err != nil {
reterr[alertRule.Name] = err.Error()
continue
}
bp := models.BuiltinPayload{
Type: lst[i].Type,
ComponentID: lst[i].ComponentID,
Cate: lst[i].Cate,
Name: alertRule.Name,
Tags: alertRule.AppendTags,
UUID: alertRule.UUID,
Content: string(contentBytes),
CreatedBy: username,
UpdatedBy: username,
}
if err := bp.Add(rt.Ctx, username); err != nil {
reterr[bp.Name] = i18n.Sprintf(c.GetHeader("X-Language"), err.Error())
}
} else if lst[i].Type == "dashboard" {
if strings.HasPrefix(strings.TrimSpace(lst[i].Content), "[") {
// 处理多个告警规则模板的情况
dashboards := []Board{}
if err := json.Unmarshal([]byte(lst[i].Content), &dashboards); err != nil {
reterr[lst[i].Name] = err.Error()
}
for _, dashboard := range dashboards {
if dashboard.UUID == 0 {
dashboard.UUID = time.Now().UnixMicro()
}
contentBytes, err := json.Marshal(dashboard)
if err != nil {
reterr[dashboard.Name] = err.Error()
continue
}
bp := models.BuiltinPayload{
Type: lst[i].Type,
ComponentID: lst[i].ComponentID,
Cate: lst[i].Cate,
Name: dashboard.Name,
Tags: dashboard.Tags,
UUID: dashboard.UUID,
Note: dashboard.Note,
Content: string(contentBytes),
CreatedBy: username,
UpdatedBy: username,
}
if err := bp.Add(rt.Ctx, username); err != nil {
reterr[bp.Name] = i18n.Sprintf(c.GetHeader("X-Language"), err.Error())
}
}
continue
}
dashboard := Board{}
if err := json.Unmarshal([]byte(lst[i].Content), &dashboard); err != nil {
reterr[lst[i].Name] = i18n.Sprintf(c.GetHeader("X-Language"), err.Error())
continue
}
if dashboard.UUID == 0 {
dashboard.UUID = time.Now().UnixMicro()
}
contentBytes, err := json.Marshal(dashboard)
if err != nil {
reterr[dashboard.Name] = err.Error()
continue
}
bp := models.BuiltinPayload{
Type: lst[i].Type,
ComponentID: lst[i].ComponentID,
Cate: lst[i].Cate,
Name: dashboard.Name,
Tags: dashboard.Tags,
UUID: dashboard.UUID,
Note: dashboard.Note,
Content: string(contentBytes),
CreatedBy: username,
UpdatedBy: username,
}
if err := bp.Add(rt.Ctx, username); err != nil {
reterr[bp.Name] = i18n.Sprintf(c.GetHeader("X-Language"), err.Error())
}
} else {
if lst[i].Type == "collect" {
c := make(map[string]interface{})
if _, err := toml.Decode(lst[i].Content, &c); err != nil {
reterr[lst[i].Name] = err.Error()
continue
}
}
if err := lst[i].Add(rt.Ctx, username); err != nil {
reterr[lst[i].Name] = i18n.Sprintf(c.GetHeader("X-Language"), err.Error())
}
}
}
ginx.NewRender(c).Data(reterr, nil)
}
func (rt *Router) builtinPayloadsGets(c *gin.Context) {
typ := ginx.QueryStr(c, "type", "")
if typ == "" {
ginx.Bomb(http.StatusBadRequest, "type is required")
return
}
ComponentID := ginx.QueryInt64(c, "component_id", 0)
cate := ginx.QueryStr(c, "cate", "")
query := ginx.QueryStr(c, "query", "")
lst, err := models.BuiltinPayloadGets(rt.Ctx, uint64(ComponentID), typ, cate, query)
ginx.Dangerous(err)
lstInFile, err := integration.BuiltinPayloadInFile.GetBuiltinPayload(typ, cate, query, uint64(ComponentID))
ginx.Dangerous(err)
if len(lstInFile) > 0 {
lst = append(lst, lstInFile...)
}
ginx.NewRender(c).Data(lst, nil)
}
func (rt *Router) builtinPayloadcatesGet(c *gin.Context) {
typ := ginx.QueryStr(c, "type", "")
ComponentID := ginx.QueryInt64(c, "component_id", 0)
cates, err := models.BuiltinPayloadCates(rt.Ctx, typ, uint64(ComponentID))
ginx.Dangerous(err)
catesInFile, err := integration.BuiltinPayloadInFile.GetBuiltinPayloadCates(typ, uint64(ComponentID))
ginx.Dangerous(err)
// 使用 map 进行去重
cateMap := make(map[string]bool)
// 添加数据库中的分类
for _, cate := range cates {
cateMap[cate] = true
}
// 添加文件中的分类
for _, cate := range catesInFile {
cateMap[cate] = true
}
// 将去重后的结果转换回切片
result := make([]string, 0, len(cateMap))
for cate := range cateMap {
result = append(result, cate)
}
ginx.NewRender(c).Data(result, nil)
}
func (rt *Router) builtinPayloadsPut(c *gin.Context) {
var req models.BuiltinPayload
ginx.BindJSON(c, &req)
bp, err := models.BuiltinPayloadGet(rt.Ctx, "id = ?", req.ID)
ginx.Dangerous(err)
if bp == nil {
ginx.NewRender(c, http.StatusNotFound).Message("No such builtin payload")
return
}
if req.Type == "alert" {
alertRule := models.AlertRule{}
if err := json.Unmarshal([]byte(req.Content), &alertRule); err != nil {
ginx.Bomb(http.StatusBadRequest, err.Error())
}
req.Name = alertRule.Name
req.Tags = alertRule.AppendTags
} else if req.Type == "dashboard" {
dashboard := Board{}
if err := json.Unmarshal([]byte(req.Content), &dashboard); err != nil {
ginx.Bomb(http.StatusBadRequest, err.Error())
}
req.Name = dashboard.Name
req.Tags = dashboard.Tags
req.Note = dashboard.Note
} else if req.Type == "collect" {
c := make(map[string]interface{})
if _, err := toml.Decode(req.Content, &c); err != nil {
ginx.Bomb(http.StatusBadRequest, err.Error())
}
}
username := Username(c)
req.UpdatedBy = username
ginx.NewRender(c).Message(bp.Update(rt.Ctx, req))
}
func (rt *Router) builtinPayloadsDel(c *gin.Context) {
var req idsForm
ginx.BindJSON(c, &req)
req.Verify()
ginx.NewRender(c).Message(models.BuiltinPayloadDels(rt.Ctx, req.Ids))
}
func (rt *Router) builtinPayloadsGetByUUID(c *gin.Context) {
uuid := ginx.QueryInt64(c, "uuid")
bp, err := models.BuiltinPayloadGet(rt.Ctx, "uuid = ?", uuid)
ginx.Dangerous(err)
if bp != nil {
ginx.NewRender(c).Data(bp, nil)
} else {
ginx.NewRender(c).Data(integration.BuiltinPayloadInFile.IndexData[uuid], nil)
}
}

View File

@@ -4,11 +4,11 @@ import (
"net/http"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/str"
)
type busiGroupForm struct {
@@ -131,7 +131,7 @@ func (rt *Router) busiGroupGetsByService(c *gin.Context) {
// 这个接口只有在活跃告警页面才调用获取各个BG的活跃告警数量
func (rt *Router) busiGroupAlertingsGets(c *gin.Context) {
ids := ginx.QueryStr(c, "ids", "")
ret, err := models.AlertNumbers(rt.Ctx, str.IdsInt64(ids))
ret, err := models.AlertNumbers(rt.Ctx, strx.IdsInt64ForAPI(ids))
ginx.NewRender(c).Data(ret, err)
}
@@ -140,3 +140,12 @@ func (rt *Router) busiGroupGet(c *gin.Context) {
ginx.Dangerous(bg.FillUserGroups(rt.Ctx))
ginx.NewRender(c).Data(bg, nil)
}
func (rt *Router) busiGroupsGetTags(c *gin.Context) {
bgids := strx.IdsInt64ForAPI(ginx.QueryStr(c, "gids", ""), ",")
targetIdents, err := models.TargetIndentsGetByBgids(rt.Ctx, bgids)
ginx.Dangerous(err)
tags, err := models.TargetGetTags(rt.Ctx, targetIdents, true, "busigroup")
ginx.Dangerous(err)
ginx.NewRender(c).Data(tags, nil)
}

View File

@@ -65,7 +65,7 @@ func (rt *Router) generateCaptcha(c *gin.Context) {
var driver = captcha.NewDriverMath(60, 200, 0, captcha.OptionShowHollowLine, nil, nil, []string{"wqy-microhei.ttc"})
cc := captcha.NewCaptcha(driver, rt.newCaptchaRedisStore())
//data:image/png;base64
id, b64s, err := cc.Generate()
id, b64s, _, err := cc.Generate()
if err != nil {
ginx.NewRender(c).Message(err)

View File

@@ -4,15 +4,15 @@ import (
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/str"
)
func (rt *Router) chartShareGets(c *gin.Context) {
ids := ginx.QueryStr(c, "ids", "")
lst, err := models.ChartShareGetsByIds(rt.Ctx, str.IdsInt64(ids, ","))
lst, err := models.ChartShareGetsByIds(rt.Ctx, strx.IdsInt64ForAPI(ids, ","))
ginx.NewRender(c).Data(lst, err)
}

View File

@@ -62,3 +62,8 @@ func (rt *Router) contactKeysGets(c *gin.Context) {
ginx.NewRender(c).Data(labelAndKeys, nil)
}
func (rt *Router) siteInfo(c *gin.Context) {
config, err := models.ConfigsGet(rt.Ctx, "site_info")
ginx.NewRender(c).Data(config, err)
}

View File

@@ -1,12 +1,16 @@
package router
import (
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
const EMBEDDEDDASHBOARD = "embedded-dashboards"
func (rt *Router) configsGet(c *gin.Context) {
prefix := ginx.QueryStr(c, "prefix", "")
limit := ginx.QueryInt(c, "limit", 10)
@@ -20,6 +24,11 @@ func (rt *Router) configGet(c *gin.Context) {
ginx.NewRender(c).Data(configs, err)
}
func (rt *Router) configGetAll(c *gin.Context) {
config, err := models.ConfigsGetAll(rt.Ctx)
ginx.NewRender(c).Data(config, err)
}
func (rt *Router) configGetByKey(c *gin.Context) {
config, err := models.ConfigsGet(rt.Ctx, ginx.QueryStr(c, "key"))
ginx.NewRender(c).Data(config, err)
@@ -28,7 +37,20 @@ func (rt *Router) configGetByKey(c *gin.Context) {
func (rt *Router) configPutByKey(c *gin.Context) {
var f models.Configs
ginx.BindJSON(c, &f)
ginx.NewRender(c).Message(models.ConfigsSet(rt.Ctx, f.Ckey, f.Cval))
username := c.MustGet("username").(string)
ginx.NewRender(c).Message(models.ConfigsSetWithUname(rt.Ctx, f.Ckey, f.Cval, username))
}
func (rt *Router) embeddedDashboardsGet(c *gin.Context) {
config, err := models.ConfigsGet(rt.Ctx, EMBEDDEDDASHBOARD)
ginx.NewRender(c).Data(config, err)
}
func (rt *Router) embeddedDashboardsPut(c *gin.Context) {
var f models.Configs
ginx.BindJSON(c, &f)
username := c.MustGet("username").(string)
ginx.NewRender(c).Message(models.ConfigsSetWithUname(rt.Ctx, EMBEDDEDDASHBOARD, f.Cval, username))
}
func (rt *Router) configsDel(c *gin.Context) {
@@ -37,22 +59,36 @@ func (rt *Router) configsDel(c *gin.Context) {
ginx.NewRender(c).Message(models.ConfigsDel(rt.Ctx, f.Ids))
}
func (rt *Router) configsPut(c *gin.Context) {
func (rt *Router) configsPut(c *gin.Context) { //for APIForService
var arr []models.Configs
ginx.BindJSON(c, &arr)
username := c.GetString("user")
if username == "" {
username = "default"
}
now := time.Now().Unix()
for i := 0; i < len(arr); i++ {
arr[i].UpdateBy = username
arr[i].UpdateAt = now
ginx.Dangerous(arr[i].Update(rt.Ctx))
}
ginx.NewRender(c).Message(nil)
}
func (rt *Router) configsPost(c *gin.Context) {
func (rt *Router) configsPost(c *gin.Context) { //for APIForService
var arr []models.Configs
ginx.BindJSON(c, &arr)
username := c.GetString("user")
if username == "" {
username = "default"
}
now := time.Now().Unix()
for i := 0; i < len(arr); i++ {
arr[i].CreateBy = username
arr[i].UpdateBy = username
arr[i].CreateAt = now
arr[i].UpdateAt = now
ginx.Dangerous(arr[i].Add(rt.Ctx))
}

View File

@@ -0,0 +1,99 @@
package router
import (
"fmt"
"net/http"
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func checkAnnotationPermission(c *gin.Context, ctx *ctx.Context, dashboardId int64) {
dashboard, err := models.BoardGetByID(ctx, dashboardId)
if err != nil {
ginx.Bomb(http.StatusInternalServerError, "failed to get dashboard: %v", err)
}
if dashboard == nil {
ginx.Bomb(http.StatusNotFound, "dashboard not found")
}
bg := BusiGroup(ctx, dashboard.GroupId)
me := c.MustGet("user").(*models.User)
can, err := me.CanDoBusiGroup(ctx, bg, "rw")
ginx.Dangerous(err)
if !can {
ginx.Bomb(http.StatusForbidden, "forbidden")
}
}
func (rt *Router) dashAnnotationAdd(c *gin.Context) {
var f models.DashAnnotation
ginx.BindJSON(c, &f)
username := c.MustGet("username").(string)
now := time.Now().Unix()
checkAnnotationPermission(c, rt.Ctx, f.DashboardId)
f.CreateBy = username
f.CreateAt = now
f.UpdateBy = username
f.UpdateAt = now
ginx.NewRender(c).Data(f.Id, f.Add(rt.Ctx))
}
func (rt *Router) dashAnnotationGets(c *gin.Context) {
dashboardId := ginx.QueryInt64(c, "dashboard_id")
from := ginx.QueryInt64(c, "from")
to := ginx.QueryInt64(c, "to")
limit := ginx.QueryInt(c, "limit", 100)
lst, err := models.DashAnnotationGets(rt.Ctx, dashboardId, from, to, limit)
ginx.NewRender(c).Data(lst, err)
}
func (rt *Router) dashAnnotationPut(c *gin.Context) {
var f models.DashAnnotation
ginx.BindJSON(c, &f)
id := ginx.UrlParamInt64(c, "id")
annotation, err := getAnnotationById(rt.Ctx, id)
ginx.Dangerous(err)
checkAnnotationPermission(c, rt.Ctx, annotation.DashboardId)
f.Id = id
f.UpdateAt = time.Now().Unix()
f.UpdateBy = c.MustGet("username").(string)
ginx.NewRender(c).Message(f.Update(rt.Ctx))
}
func (rt *Router) dashAnnotationDel(c *gin.Context) {
id := ginx.UrlParamInt64(c, "id")
annotation, err := getAnnotationById(rt.Ctx, id)
ginx.Dangerous(err)
checkAnnotationPermission(c, rt.Ctx, annotation.DashboardId)
ginx.NewRender(c).Message(models.DashAnnotationDel(rt.Ctx, id))
}
// 可以提取获取注释的通用方法
func getAnnotationById(ctx *ctx.Context, id int64) (*models.DashAnnotation, error) {
annotation, err := models.DashAnnotationGet(ctx, "id=?", id)
if err != nil {
return nil, err
}
if annotation == nil {
return nil, fmt.Errorf("annotation not found")
}
return annotation, nil
}

View File

@@ -1,17 +1,23 @@
package router
import (
"context"
"crypto/tls"
"encoding/base64"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"
"github.com/ccfos/nightingale/v6/datasource/opensearch"
"github.com/ccfos/nightingale/v6/dskit/clickhouse"
"github.com/ccfos/nightingale/v6/models"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
"github.com/toolkits/pkg/logger"
)
@@ -26,7 +32,7 @@ type listReq struct {
}
func (rt *Router) datasourceList(c *gin.Context) {
if rt.DatasourceCheckHook(c) {
if rt.DatasourceCache.DatasourceCheckHook(c) {
Render(c, []int{}, nil)
return
}
@@ -38,40 +44,86 @@ func (rt *Router) datasourceList(c *gin.Context) {
category := req.Category
name := req.Name
user := c.MustGet("user").(*models.User)
list, err := models.GetDatasourcesGetsBy(rt.Ctx, typ, category, name, "")
Render(c, list, err)
Render(c, rt.DatasourceCache.DatasourceFilter(list, user), err)
}
func (rt *Router) datasourceGetsByService(c *gin.Context) {
typ := ginx.QueryStr(c, "typ", "")
lst, err := models.GetDatasourcesGetsBy(rt.Ctx, typ, "", "", "")
openRsa := rt.Center.RSA.OpenRSA
for _, item := range lst {
if err := item.Encrypt(openRsa, rt.HTTP.RSA.RSAPublicKey); err != nil {
logger.Errorf("datasource %+v encrypt failed: %v", item, err)
continue
}
}
ginx.NewRender(c).Data(lst, err)
}
type datasourceBrief struct {
Id int64 `json:"id"`
Name string `json:"name"`
PluginType string `json:"plugin_type"`
func (rt *Router) datasourceRsaConfigGet(c *gin.Context) {
if rt.Center.RSA.OpenRSA {
publicKey := ""
privateKey := ""
if len(rt.HTTP.RSA.RSAPublicKey) > 0 {
publicKey = base64.StdEncoding.EncodeToString(rt.HTTP.RSA.RSAPublicKey)
}
if len(rt.HTTP.RSA.RSAPrivateKey) > 0 {
privateKey = base64.StdEncoding.EncodeToString(rt.HTTP.RSA.RSAPrivateKey)
}
logger.Debugf("OpenRSA=%v", rt.Center.RSA.OpenRSA)
ginx.NewRender(c).Data(models.RsaConfig{
OpenRSA: rt.Center.RSA.OpenRSA,
RSAPublicKey: publicKey,
RSAPrivateKey: privateKey,
RSAPassWord: rt.HTTP.RSA.RSAPassWord,
}, nil)
} else {
ginx.NewRender(c).Data(models.RsaConfig{
OpenRSA: rt.Center.RSA.OpenRSA,
}, nil)
}
}
func (rt *Router) datasourceBriefs(c *gin.Context) {
var dss []datasourceBrief
var dss []*models.Datasource
list, err := models.GetDatasourcesGetsBy(rt.Ctx, "", "", "", "")
ginx.Dangerous(err)
for i := range list {
dss = append(dss, datasourceBrief{
Id: list[i].Id,
Name: list[i].Name,
PluginType: list[i].PluginType,
})
for _, item := range list {
item.AuthJson.BasicAuthPassword = ""
if item.PluginType == models.PROMETHEUS {
for k, v := range item.SettingsJson {
if strings.HasPrefix(k, "prometheus.") {
item.SettingsJson[strings.TrimPrefix(k, "prometheus.")] = v
delete(item.SettingsJson, k)
}
}
} else if item.PluginType == "cloudwatch" {
for k := range item.SettingsJson {
if !strings.Contains(k, "region") {
delete(item.SettingsJson, k)
}
}
} else {
item.SettingsJson = nil
}
dss = append(dss, item)
}
if !rt.Center.AnonymousAccess.PromQuerier {
user := c.MustGet("user").(*models.User)
dss = rt.DatasourceCache.DatasourceFilter(dss, user)
}
ginx.NewRender(c).Data(dss, err)
}
func (rt *Router) datasourceUpsert(c *gin.Context) {
if rt.DatasourceCheckHook(c) {
if rt.DatasourceCache.DatasourceCheckHook(c) {
Render(c, []int{}, nil)
return
}
@@ -84,10 +136,129 @@ func (rt *Router) datasourceUpsert(c *gin.Context) {
var err error
var count int64
err = DatasourceCheck(req)
if err != nil {
Dangerous(c, err)
return
if !req.ForceSave {
if req.PluginType == models.PROMETHEUS || req.PluginType == models.LOKI || req.PluginType == models.TDENGINE {
err = DatasourceCheck(c, req)
if err != nil {
Dangerous(c, err)
return
}
}
}
for k, v := range req.SettingsJson {
if strings.Contains(k, "cluster_name") {
req.ClusterName = v.(string)
break
}
}
if req.PluginType == models.OPENSEARCH {
b, err := json.Marshal(req.SettingsJson)
if err != nil {
logger.Warningf("marshal settings fail: %v", err)
return
}
var os opensearch.OpenSearch
err = json.Unmarshal(b, &os)
if err != nil {
logger.Warningf("unmarshal settings fail: %v", err)
return
}
if len(os.Nodes) == 0 {
logger.Warningf("nodes empty, %+v", req)
return
}
req.HTTPJson = models.HTTP{
Timeout: os.Timeout,
Url: os.Nodes[0],
Headers: os.Headers,
TLS: models.TLS{
SkipTlsVerify: os.TLS.SkipTlsVerify,
},
}
req.AuthJson = models.Auth{
BasicAuth: os.Basic.Enable,
BasicAuthUser: os.Basic.Username,
BasicAuthPassword: os.Basic.Password,
}
}
if req.PluginType == models.CLICKHOUSE {
b, err := json.Marshal(req.SettingsJson)
if err != nil {
logger.Warningf("marshal clickhouse settings failed: %v", err)
Dangerous(c, err)
return
}
var ckConfig clickhouse.Clickhouse
err = json.Unmarshal(b, &ckConfig)
if err != nil {
logger.Warningf("unmarshal clickhouse settings failed: %v", err)
Dangerous(c, err)
return
}
// 检查ckconfig的nodes不应该以http://或https://开头
for _, addr := range ckConfig.Nodes {
if strings.HasPrefix(addr, "http://") || strings.HasPrefix(addr, "https://") {
err = fmt.Errorf("clickhouse node address should not start with http:// or https:// : %s", addr)
logger.Warningf("clickhouse node address invalid: %v", err)
Dangerous(c, err)
return
}
}
// InitCli 会自动检测并选择 HTTP 或 Native 协议
err = ckConfig.InitCli()
if err != nil {
logger.Warningf("clickhouse connection failed: %v", err)
Dangerous(c, err)
return
}
// 执行 SHOW DATABASES 测试连通性
_, err = ckConfig.ShowDatabases(context.Background())
if err != nil {
logger.Warningf("clickhouse test query failed: %v", err)
Dangerous(c, err)
return
}
}
if req.PluginType == models.ELASTICSEARCH {
skipAuto := false
// 若用户输入了versionversion字符串存在且不为空则不自动获取
if req.SettingsJson != nil {
if v, ok := req.SettingsJson["version"]; ok {
switch vv := v.(type) {
case string:
if strings.TrimSpace(vv) != "" {
skipAuto = true
}
default:
if strings.TrimSpace(fmt.Sprint(vv)) != "" {
skipAuto = true
}
}
}
}
if !skipAuto {
version, err := getElasticsearchVersion(req, 10*time.Second)
if err != nil {
logger.Warningf("failed to get elasticsearch version: %v", err)
} else {
if req.SettingsJson == nil {
req.SettingsJson = make(map[string]interface{})
}
req.SettingsJson["version"] = version
}
}
}
if req.Id == 0 {
@@ -105,34 +276,41 @@ func (rt *Router) datasourceUpsert(c *gin.Context) {
}
err = req.Add(rt.Ctx)
} else {
err = req.Update(rt.Ctx, "name", "description", "cluster_name", "settings", "http", "auth", "updated_by", "updated_at")
err = req.Update(rt.Ctx, "name", "identifier", "description", "cluster_name", "settings", "http", "auth", "updated_by", "updated_at", "is_default")
}
Render(c, nil, err)
}
func DatasourceCheck(ds models.Datasource) error {
if ds.HTTPJson.Url == "" {
return fmt.Errorf("url is empty")
func DatasourceCheck(c *gin.Context, ds models.Datasource) error {
if ds.PluginType == models.PROMETHEUS || ds.PluginType == models.LOKI || ds.PluginType == models.TDENGINE {
if ds.HTTPJson.Url == "" {
return fmt.Errorf("url is empty")
}
if !strings.HasPrefix(ds.HTTPJson.Url, "http") {
return fmt.Errorf("url must start with http or https")
}
}
if !strings.HasPrefix(ds.HTTPJson.Url, "http") {
return fmt.Errorf("url must start with http or https")
// 使用 TLS 配置(支持 mTLS
tlsConfig, err := ds.HTTPJson.TLS.TLSConfig()
if err != nil {
return fmt.Errorf("failed to create TLS config: %v", err)
}
client := &http.Client{
Transport: &http.Transport{
TLSClientConfig: &tls.Config{
InsecureSkipVerify: ds.HTTPJson.TLS.SkipTlsVerify,
},
TLSClientConfig: tlsConfig,
},
}
fullURL := ds.HTTPJson.Url
req, err := http.NewRequest("GET", fullURL, nil)
ds.HTTPJson.Url = strings.TrimRight(ds.HTTPJson.Url, "/")
var fullURL string
req, err := ds.HTTPJson.NewReq(&fullURL)
if err != nil {
logger.Errorf("Error creating request: %v", err)
return fmt.Errorf("request url:%s failed", fullURL)
return fmt.Errorf("request urls:%v failed: %v", ds.HTTPJson.GetUrls(), err)
}
if ds.PluginType == models.PROMETHEUS {
@@ -148,14 +326,14 @@ func DatasourceCheck(ds models.Datasource) error {
req, err = http.NewRequest("GET", fullURL, nil)
if err != nil {
logger.Errorf("Error creating request: %v", err)
return fmt.Errorf("request url:%s failed", fullURL)
return fmt.Errorf("request url:%s failed: %v", fullURL, err)
}
} else if ds.PluginType == models.TDENGINE {
fullURL = fmt.Sprintf("%s/rest/sql", ds.HTTPJson.Url)
req, err = http.NewRequest("POST", fullURL, strings.NewReader("show databases"))
if err != nil {
logger.Errorf("Error creating request: %v", err)
return fmt.Errorf("request url:%s failed", fullURL)
return fmt.Errorf("request url:%s failed: %v", fullURL, err)
}
}
@@ -167,7 +345,11 @@ func DatasourceCheck(ds models.Datasource) error {
req, err = http.NewRequest("GET", fullURL, nil)
if err != nil {
logger.Errorf("Error creating request: %v", err)
return fmt.Errorf("request url:%s failed", fullURL)
if !strings.Contains(ds.HTTPJson.Url, "/loki") {
lang := c.GetHeader("X-Language")
return fmt.Errorf(i18n.Sprintf(lang, "/loki suffix is miss, please add /loki to the url: %s", ds.HTTPJson.Url+"/loki"))
}
return fmt.Errorf("request url:%s failed: %v", fullURL, err)
}
}
@@ -182,12 +364,16 @@ func DatasourceCheck(ds models.Datasource) error {
resp, err := client.Do(req)
if err != nil {
logger.Errorf("Error making request: %v\n", err)
return fmt.Errorf("request url:%s failed", fullURL)
return fmt.Errorf("request url:%s failed: %v", fullURL, err)
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
logger.Errorf("Error making request: %v\n", resp.StatusCode)
if resp.StatusCode == 404 && ds.PluginType == models.LOKI && !strings.Contains(ds.HTTPJson.Url, "/loki") {
lang := c.GetHeader("X-Language")
return fmt.Errorf(i18n.Sprintf(lang, "/loki suffix is miss, please add /loki to the url: %s", ds.HTTPJson.Url+"/loki"))
}
body, _ := io.ReadAll(resp.Body)
return fmt.Errorf("request url:%s failed code:%d body:%s", fullURL, resp.StatusCode, string(body))
}
@@ -196,7 +382,7 @@ func DatasourceCheck(ds models.Datasource) error {
}
func (rt *Router) datasourceGet(c *gin.Context) {
if rt.DatasourceCheckHook(c) {
if rt.DatasourceCache.DatasourceCheckHook(c) {
Render(c, []int{}, nil)
return
}
@@ -208,7 +394,7 @@ func (rt *Router) datasourceGet(c *gin.Context) {
}
func (rt *Router) datasourceUpdataStatus(c *gin.Context) {
if rt.DatasourceCheckHook(c) {
if rt.DatasourceCache.DatasourceCheckHook(c) {
Render(c, []int{}, nil)
return
}
@@ -222,7 +408,7 @@ func (rt *Router) datasourceUpdataStatus(c *gin.Context) {
}
func (rt *Router) datasourceDel(c *gin.Context) {
if rt.DatasourceCheckHook(c) {
if rt.DatasourceCache.DatasourceCheckHook(c) {
Render(c, []int{}, nil)
return
}
@@ -240,7 +426,115 @@ func (rt *Router) getDatasourceIds(c *gin.Context) {
ginx.NewRender(c).Data(datasourceIds, err)
}
func Username(c *gin.Context) string {
return c.MustGet("username").(string)
type datasourceQueryForm struct {
Cate string `json:"datasource_cate"`
DatasourceQueries []models.DatasourceQuery `json:"datasource_queries"`
}
type datasourceQueryResp struct {
ID int64 `json:"id"`
Name string `json:"name"`
}
func (rt *Router) datasourceQuery(c *gin.Context) {
var dsf datasourceQueryForm
ginx.BindJSON(c, &dsf)
datasources, err := models.GetDatasourcesGetsByTypes(rt.Ctx, []string{dsf.Cate})
ginx.Dangerous(err)
nameToID := make(map[string]int64)
IDToName := make(map[int64]string)
for _, ds := range datasources {
nameToID[ds.Name] = ds.Id
IDToName[ds.Id] = ds.Name
}
ids := models.GetDatasourceIDsByDatasourceQueries(dsf.DatasourceQueries, IDToName, nameToID)
var req []datasourceQueryResp
for _, id := range ids {
req = append(req, datasourceQueryResp{
ID: id,
Name: IDToName[id],
})
}
ginx.NewRender(c).Data(req, err)
}
// getElasticsearchVersion 该函数尝试从提供的Elasticsearch数据源中获取版本号遍历所有URL
// 直到成功获取版本号或所有URL均尝试失败为止。
func getElasticsearchVersion(ds models.Datasource, timeout time.Duration) (string, error) {
client := &http.Client{
Timeout: timeout,
Transport: &http.Transport{
TLSClientConfig: &tls.Config{
InsecureSkipVerify: ds.HTTPJson.TLS.SkipTlsVerify,
},
},
}
urls := make([]string, 0)
if len(ds.HTTPJson.Urls) > 0 {
urls = append(urls, ds.HTTPJson.Urls...)
}
if ds.HTTPJson.Url != "" {
urls = append(urls, ds.HTTPJson.Url)
}
if len(urls) == 0 {
return "", fmt.Errorf("no url provided")
}
var lastErr error
for _, raw := range urls {
baseURL := strings.TrimRight(raw, "/") + "/"
req, err := http.NewRequest("GET", baseURL, nil)
if err != nil {
lastErr = err
continue
}
if ds.AuthJson.BasicAuthUser != "" {
req.SetBasicAuth(ds.AuthJson.BasicAuthUser, ds.AuthJson.BasicAuthPassword)
}
for k, v := range ds.HTTPJson.Headers {
req.Header.Set(k, v)
}
resp, err := client.Do(req)
if err != nil {
lastErr = err
continue
}
body, err := io.ReadAll(resp.Body)
resp.Body.Close()
if err != nil {
lastErr = err
continue
}
if resp.StatusCode != 200 {
lastErr = fmt.Errorf("request to %s failed with status: %d body:%s", baseURL, resp.StatusCode, string(body))
continue
}
var result map[string]interface{}
if err := json.Unmarshal(body, &result); err != nil {
lastErr = err
continue
}
if version, ok := result["version"].(map[string]interface{}); ok {
if number, ok := version["number"].(string); ok && number != "" {
return number, nil
}
}
lastErr = fmt.Errorf("version not found in response from %s", baseURL)
}
if lastErr != nil {
return "", lastErr
}
return "", fmt.Errorf("failed to get elasticsearch version")
}

View File

@@ -0,0 +1,101 @@
package router
import (
"context"
"github.com/ccfos/nightingale/v6/dscache"
"github.com/ccfos/nightingale/v6/dskit/types"
"github.com/ccfos/nightingale/v6/models"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
func (rt *Router) ShowDatabases(c *gin.Context) {
var f models.QueryParam
ginx.BindJSON(c, &f)
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logger.Warningf("cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
var databases []string
var err error
type DatabaseShower interface {
ShowDatabases(context.Context) ([]string, error)
}
switch plug.(type) {
case DatabaseShower:
databases, err = plug.(DatabaseShower).ShowDatabases(c.Request.Context())
ginx.Dangerous(err)
default:
ginx.Bomb(200, "datasource not exists")
}
if len(databases) == 0 {
databases = make([]string, 0)
}
ginx.NewRender(c).Data(databases, nil)
}
func (rt *Router) ShowTables(c *gin.Context) {
var f models.QueryParam
ginx.BindJSON(c, &f)
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logger.Warningf("cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
// 只接受一个入参
tables := make([]string, 0)
var err error
type TableShower interface {
ShowTables(ctx context.Context, database string) ([]string, error)
}
switch plug.(type) {
case TableShower:
if len(f.Queries) > 0 {
database, ok := f.Queries[0].(string)
if ok {
tables, err = plug.(TableShower).ShowTables(c.Request.Context(), database)
}
}
default:
ginx.Bomb(200, "datasource not exists")
}
ginx.NewRender(c).Data(tables, err)
}
func (rt *Router) DescribeTable(c *gin.Context) {
var f models.QueryParam
ginx.BindJSON(c, &f)
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logger.Warningf("cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
// 只接受一个入参
columns := make([]*types.ColumnProperty, 0)
var err error
type TableDescriber interface {
DescribeTable(context.Context, interface{}) ([]*types.ColumnProperty, error)
}
switch plug.(type) {
case TableDescriber:
client := plug.(TableDescriber)
if len(f.Queries) > 0 {
columns, err = client.DescribeTable(c.Request.Context(), f.Queries[0])
}
default:
ginx.Bomb(200, "datasource not exists")
}
ginx.NewRender(c).Data(columns, err)
}

View File

@@ -0,0 +1,141 @@
package router
import (
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) embeddedProductGets(c *gin.Context) {
products, err := models.EmbeddedProductGets(rt.Ctx)
ginx.Dangerous(err)
// 获取当前用户可访问的Group ID 列表
me := c.MustGet("user").(*models.User)
if me.IsAdmin() {
ginx.NewRender(c).Data(products, err)
return
}
gids, err := models.MyGroupIds(rt.Ctx, me.Id)
bgSet := make(map[int64]struct{}, len(gids))
for _, id := range gids {
bgSet[id] = struct{}{}
}
// 过滤出公开或有权限访问的私有 product link
var result []*models.EmbeddedProduct
for _, product := range products {
if !product.IsPrivate {
result = append(result, product)
continue
}
for _, tid := range product.TeamIDs {
if _, ok := bgSet[tid]; ok {
result = append(result, product)
break
}
}
}
ginx.NewRender(c).Data(result, err)
}
func (rt *Router) embeddedProductGet(c *gin.Context) {
id := ginx.UrlParamInt64(c, "id")
if id <= 0 {
ginx.Bomb(400, "invalid id")
}
data, err := models.GetEmbeddedProductByID(rt.Ctx, id)
ginx.Dangerous(err)
me := c.MustGet("user").(*models.User)
hashPermission, err := hasEmbeddedProductAccess(rt.Ctx, me, data)
ginx.Dangerous(err)
if !hashPermission {
ginx.Bomb(403, "forbidden")
}
ginx.NewRender(c).Data(data, nil)
}
func (rt *Router) embeddedProductAdd(c *gin.Context) {
var eps []models.EmbeddedProduct
ginx.BindJSON(c, &eps)
me := c.MustGet("user").(*models.User)
for i := range eps {
eps[i].CreateBy = me.Nickname
eps[i].UpdateBy = me.Nickname
}
err := models.AddEmbeddedProduct(rt.Ctx, eps)
ginx.NewRender(c).Message(err)
}
func (rt *Router) embeddedProductPut(c *gin.Context) {
var ep models.EmbeddedProduct
id := ginx.UrlParamInt64(c, "id")
ginx.BindJSON(c, &ep)
if id <= 0 {
ginx.Bomb(400, "invalid id")
}
oldProduct, err := models.GetEmbeddedProductByID(rt.Ctx, id)
ginx.Dangerous(err)
me := c.MustGet("user").(*models.User)
now := time.Now().Unix()
oldProduct.Name = ep.Name
oldProduct.URL = ep.URL
oldProduct.IsPrivate = ep.IsPrivate
oldProduct.TeamIDs = ep.TeamIDs
oldProduct.UpdateBy = me.Username
oldProduct.UpdateAt = now
err = models.UpdateEmbeddedProduct(rt.Ctx, oldProduct)
ginx.NewRender(c).Message(err)
}
func (rt *Router) embeddedProductDelete(c *gin.Context) {
id := ginx.UrlParamInt64(c, "id")
if id <= 0 {
ginx.Bomb(400, "invalid id")
}
err := models.DeleteEmbeddedProduct(rt.Ctx, id)
ginx.NewRender(c).Message(err)
}
func hasEmbeddedProductAccess(ctx *ctx.Context, user *models.User, ep *models.EmbeddedProduct) (bool, error) {
if user.IsAdmin() || !ep.IsPrivate {
return true, nil
}
gids, err := models.MyGroupIds(ctx, user.Id)
if err != nil {
return false, err
}
groupSet := make(map[int64]struct{}, len(gids))
for _, gid := range gids {
groupSet[gid] = struct{}{}
}
for _, tid := range ep.TeamIDs {
if _, ok := groupSet[tid]; ok {
return true, nil
}
}
return false, nil
}

View File

@@ -0,0 +1,77 @@
package router
import (
"github.com/ccfos/nightingale/v6/datasource/es"
"github.com/ccfos/nightingale/v6/dscache"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
type IndexReq struct {
Cate string `json:"cate"`
DatasourceId int64 `json:"datasource_id"`
Index string `json:"index"`
}
type FieldValueReq struct {
Cate string `json:"cate"`
DatasourceId int64 `json:"datasource_id"`
Index string `json:"index"`
Query FieldObj `json:"query"`
}
type FieldObj struct {
Find string `json:"find"`
Field string `json:"field"`
Query string `json:"query"`
}
func (rt *Router) QueryIndices(c *gin.Context) {
var f IndexReq
ginx.BindJSON(c, &f)
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logger.Warningf("cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
indices, err := plug.(*es.Elasticsearch).QueryIndices()
ginx.Dangerous(err)
ginx.NewRender(c).Data(indices, nil)
}
func (rt *Router) QueryFields(c *gin.Context) {
var f IndexReq
ginx.BindJSON(c, &f)
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logger.Warningf("cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
fields, err := plug.(*es.Elasticsearch).QueryFields([]string{f.Index})
ginx.Dangerous(err)
ginx.NewRender(c).Data(fields, nil)
}
func (rt *Router) QueryESVariable(c *gin.Context) {
var f FieldValueReq
ginx.BindJSON(c, &f)
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logger.Warningf("cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
fields, err := plug.(*es.Elasticsearch).QueryFieldValue([]string{f.Index}, f.Query.Field, f.Query.Query)
ginx.Dangerous(err)
ginx.NewRender(c).Data(fields, nil)
}

View File

@@ -0,0 +1,604 @@
package router
import (
"encoding/json"
"fmt"
"net/http"
"time"
"github.com/ccfos/nightingale/v6/alert/pipeline/engine"
"github.com/ccfos/nightingale/v6/models"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
"github.com/toolkits/pkg/logger"
)
// 获取事件Pipeline列表
func (rt *Router) eventPipelinesList(c *gin.Context) {
me := c.MustGet("user").(*models.User)
pipelines, err := models.ListEventPipelines(rt.Ctx)
ginx.Dangerous(err)
allTids := make([]int64, 0)
for _, pipeline := range pipelines {
allTids = append(allTids, pipeline.TeamIds...)
}
ugMap, err := models.UserGroupIdAndNameMap(rt.Ctx, allTids)
ginx.Dangerous(err)
for _, pipeline := range pipelines {
for _, tid := range pipeline.TeamIds {
pipeline.TeamNames = append(pipeline.TeamNames, ugMap[tid])
}
// 兼容处理:自动填充工作流字段
pipeline.FillWorkflowFields()
}
gids, err := models.MyGroupIdsMap(rt.Ctx, me.Id)
ginx.Dangerous(err)
if me.IsAdmin() {
ginx.NewRender(c).Data(pipelines, nil)
return
}
res := make([]*models.EventPipeline, 0)
for _, pipeline := range pipelines {
for _, tid := range pipeline.TeamIds {
if _, ok := gids[tid]; ok {
res = append(res, pipeline)
break
}
}
}
ginx.NewRender(c).Data(res, nil)
}
// 获取单个事件Pipeline详情
func (rt *Router) getEventPipeline(c *gin.Context) {
me := c.MustGet("user").(*models.User)
id := ginx.UrlParamInt64(c, "id")
pipeline, err := models.GetEventPipeline(rt.Ctx, id)
ginx.Dangerous(err)
ginx.Dangerous(me.CheckGroupPermission(rt.Ctx, pipeline.TeamIds))
err = pipeline.FillTeamNames(rt.Ctx)
ginx.Dangerous(err)
// 兼容处理:自动填充工作流字段
pipeline.FillWorkflowFields()
ginx.NewRender(c).Data(pipeline, nil)
}
// 创建事件Pipeline
func (rt *Router) addEventPipeline(c *gin.Context) {
var pipeline models.EventPipeline
ginx.BindJSON(c, &pipeline)
user := c.MustGet("user").(*models.User)
now := time.Now().Unix()
pipeline.CreateBy = user.Username
pipeline.CreateAt = now
pipeline.UpdateAt = now
pipeline.UpdateBy = user.Username
err := pipeline.Verify()
if err != nil {
ginx.Bomb(http.StatusBadRequest, err.Error())
}
ginx.Dangerous(user.CheckGroupPermission(rt.Ctx, pipeline.TeamIds))
err = models.CreateEventPipeline(rt.Ctx, &pipeline)
ginx.NewRender(c).Message(err)
}
// 更新事件Pipeline
func (rt *Router) updateEventPipeline(c *gin.Context) {
var f models.EventPipeline
ginx.BindJSON(c, &f)
me := c.MustGet("user").(*models.User)
f.UpdateBy = me.Username
f.UpdateAt = time.Now().Unix()
pipeline, err := models.GetEventPipeline(rt.Ctx, f.ID)
if err != nil {
ginx.Bomb(http.StatusNotFound, "No such event pipeline")
}
ginx.Dangerous(me.CheckGroupPermission(rt.Ctx, pipeline.TeamIds))
ginx.NewRender(c).Message(pipeline.Update(rt.Ctx, &f))
}
// 删除事件Pipeline
func (rt *Router) deleteEventPipelines(c *gin.Context) {
var f struct {
Ids []int64 `json:"ids"`
}
ginx.BindJSON(c, &f)
if len(f.Ids) == 0 {
ginx.Bomb(http.StatusBadRequest, "ids required")
}
me := c.MustGet("user").(*models.User)
for _, id := range f.Ids {
pipeline, err := models.GetEventPipeline(rt.Ctx, id)
ginx.Dangerous(err)
ginx.Dangerous(me.CheckGroupPermission(rt.Ctx, pipeline.TeamIds))
}
err := models.DeleteEventPipelines(rt.Ctx, f.Ids)
ginx.NewRender(c).Message(err)
}
// 测试事件Pipeline
func (rt *Router) tryRunEventPipeline(c *gin.Context) {
var f struct {
EventId int64 `json:"event_id"`
PipelineConfig models.EventPipeline `json:"pipeline_config"`
EnvVariables map[string]string `json:"env_variables,omitempty"`
}
ginx.BindJSON(c, &f)
hisEvent, err := models.AlertHisEventGetById(rt.Ctx, f.EventId)
if err != nil || hisEvent == nil {
ginx.Bomb(http.StatusBadRequest, "event not found")
}
event := hisEvent.ToCur()
lang := c.GetHeader("X-Language")
me := c.MustGet("user").(*models.User)
// 统一使用工作流引擎执行(兼容线性模式和工作流模式)
workflowEngine := engine.NewWorkflowEngine(rt.Ctx)
triggerCtx := &models.WorkflowTriggerContext{
Mode: models.TriggerModeAPI,
TriggerBy: me.Username,
EnvOverrides: f.EnvVariables,
}
resultEvent, result, err := workflowEngine.Execute(&f.PipelineConfig, event, triggerCtx)
if err != nil {
ginx.Bomb(http.StatusBadRequest, "pipeline execute error: %v", err)
}
m := map[string]interface{}{
"event": resultEvent,
"result": i18n.Sprintf(lang, result.Message),
"status": result.Status,
"node_results": result.NodeResults,
}
if resultEvent == nil {
m["result"] = i18n.Sprintf(lang, "event is dropped")
}
ginx.NewRender(c).Data(m, nil)
}
// 测试事件处理器
func (rt *Router) tryRunEventProcessor(c *gin.Context) {
var f struct {
EventId int64 `json:"event_id"`
ProcessorConfig models.ProcessorConfig `json:"processor_config"`
}
ginx.BindJSON(c, &f)
hisEvent, err := models.AlertHisEventGetById(rt.Ctx, f.EventId)
if err != nil || hisEvent == nil {
ginx.Bomb(http.StatusBadRequest, "event not found")
}
event := hisEvent.ToCur()
processor, err := models.GetProcessorByType(f.ProcessorConfig.Typ, f.ProcessorConfig.Config)
if err != nil {
ginx.Bomb(200, "get processor err: %+v", err)
}
wfCtx := &models.WorkflowContext{
Event: event,
Vars: make(map[string]interface{}),
}
wfCtx, res, err := processor.Process(rt.Ctx, wfCtx)
if err != nil {
ginx.Bomb(200, "processor err: %+v", err)
}
lang := c.GetHeader("X-Language")
ginx.NewRender(c).Data(map[string]interface{}{
"event": wfCtx.Event,
"result": i18n.Sprintf(lang, res),
}, nil)
}
func (rt *Router) tryRunEventProcessorByNotifyRule(c *gin.Context) {
var f struct {
EventId int64 `json:"event_id"`
PipelineConfigs []models.PipelineConfig `json:"pipeline_configs"`
}
ginx.BindJSON(c, &f)
hisEvent, err := models.AlertHisEventGetById(rt.Ctx, f.EventId)
if err != nil || hisEvent == nil {
ginx.Bomb(http.StatusBadRequest, "event not found")
}
event := hisEvent.ToCur()
pids := make([]int64, 0)
for _, pc := range f.PipelineConfigs {
if pc.Enable {
pids = append(pids, pc.PipelineId)
}
}
pipelines, err := models.GetEventPipelinesByIds(rt.Ctx, pids)
if err != nil {
ginx.Bomb(http.StatusBadRequest, "processors not found")
}
wfCtx := &models.WorkflowContext{
Event: event,
Vars: make(map[string]interface{}),
}
for _, pl := range pipelines {
for _, p := range pl.ProcessorConfigs {
processor, err := models.GetProcessorByType(p.Typ, p.Config)
if err != nil {
ginx.Bomb(http.StatusBadRequest, "get processor: %+v err: %+v", p, err)
}
wfCtx, _, err = processor.Process(rt.Ctx, wfCtx)
if err != nil {
ginx.Bomb(http.StatusBadRequest, "processor: %+v err: %+v", p, err)
}
if wfCtx == nil || wfCtx.Event == nil {
lang := c.GetHeader("X-Language")
ginx.NewRender(c).Data(map[string]interface{}{
"event": nil,
"result": i18n.Sprintf(lang, "event is dropped"),
}, nil)
return
}
}
}
ginx.NewRender(c).Data(wfCtx.Event, nil)
}
func (rt *Router) eventPipelinesListByService(c *gin.Context) {
pipelines, err := models.ListEventPipelines(rt.Ctx)
ginx.NewRender(c).Data(pipelines, err)
}
type EventPipelineRequest struct {
// 事件数据(可选,如果不传则使用空事件)
Event *models.AlertCurEvent `json:"event,omitempty"`
// 环境变量覆盖
EnvOverrides map[string]string `json:"env_overrides,omitempty"`
Username string `json:"username,omitempty"`
}
// executePipelineTrigger 执行 Pipeline 触发的公共逻辑
func (rt *Router) executePipelineTrigger(pipeline *models.EventPipeline, req *EventPipelineRequest, triggerBy string) (string, error) {
// 准备事件数据
var event *models.AlertCurEvent
if req.Event != nil {
event = req.Event
} else {
// 创建空事件
event = &models.AlertCurEvent{
TriggerTime: time.Now().Unix(),
}
}
// 校验必填环境变量
if err := pipeline.ValidateEnvVariables(req.EnvOverrides); err != nil {
return "", fmt.Errorf("env validation failed: %v", err)
}
// 生成执行ID
executionID := uuid.New().String()
// 创建触发上下文
triggerCtx := &models.WorkflowTriggerContext{
Mode: models.TriggerModeAPI,
TriggerBy: triggerBy,
EnvOverrides: req.EnvOverrides,
RequestID: executionID,
}
// 异步执行工作流
go func() {
workflowEngine := engine.NewWorkflowEngine(rt.Ctx)
_, _, err := workflowEngine.Execute(pipeline, event, triggerCtx)
if err != nil {
logger.Errorf("async workflow execute error: pipeline_id=%d execution_id=%s err=%v",
pipeline.ID, executionID, err)
}
}()
return executionID, nil
}
// triggerEventPipelineByService Service 调用触发工作流执行
func (rt *Router) triggerEventPipelineByService(c *gin.Context) {
pipelineID := ginx.UrlParamInt64(c, "id")
var f EventPipelineRequest
ginx.BindJSON(c, &f)
// 获取 Pipeline
pipeline, err := models.GetEventPipeline(rt.Ctx, pipelineID)
if err != nil {
ginx.Bomb(http.StatusNotFound, "pipeline not found: %v", err)
}
executionID, err := rt.executePipelineTrigger(pipeline, &f, f.Username)
if err != nil {
ginx.Bomb(http.StatusBadRequest, "%v", err)
}
ginx.NewRender(c).Data(gin.H{
"execution_id": executionID,
"message": "workflow execution started",
}, nil)
}
// triggerEventPipelineByAPI API 触发工作流执行
func (rt *Router) triggerEventPipelineByAPI(c *gin.Context) {
pipelineID := ginx.UrlParamInt64(c, "id")
var f EventPipelineRequest
ginx.BindJSON(c, &f)
// 获取 Pipeline
pipeline, err := models.GetEventPipeline(rt.Ctx, pipelineID)
if err != nil {
ginx.Bomb(http.StatusNotFound, "pipeline not found: %v", err)
}
// 检查权限
me := c.MustGet("user").(*models.User)
ginx.Dangerous(me.CheckGroupPermission(rt.Ctx, pipeline.TeamIds))
executionID, err := rt.executePipelineTrigger(pipeline, &f, me.Username)
if err != nil {
ginx.Bomb(http.StatusBadRequest, err.Error())
}
ginx.NewRender(c).Data(gin.H{
"execution_id": executionID,
"message": "workflow execution started",
}, nil)
}
func (rt *Router) listAllEventPipelineExecutions(c *gin.Context) {
pipelineName := ginx.QueryStr(c, "pipeline_name", "")
mode := ginx.QueryStr(c, "mode", "")
status := ginx.QueryStr(c, "status", "")
limit := ginx.QueryInt(c, "limit", 20)
offset := ginx.QueryInt(c, "p", 1)
if limit <= 0 || limit > 1000 {
limit = 20
}
if offset <= 0 {
offset = 1
}
executions, total, err := models.ListAllEventPipelineExecutions(rt.Ctx, pipelineName, mode, status, limit, (offset-1)*limit)
ginx.Dangerous(err)
ginx.NewRender(c).Data(gin.H{
"list": executions,
"total": total,
}, nil)
}
func (rt *Router) listEventPipelineExecutions(c *gin.Context) {
pipelineID := ginx.UrlParamInt64(c, "id")
mode := ginx.QueryStr(c, "mode", "")
status := ginx.QueryStr(c, "status", "")
limit := ginx.QueryInt(c, "limit", 20)
offset := ginx.QueryInt(c, "p", 1)
if limit <= 0 || limit > 1000 {
limit = 20
}
if offset <= 0 {
offset = 1
}
executions, total, err := models.ListEventPipelineExecutions(rt.Ctx, pipelineID, mode, status, limit, (offset-1)*limit)
ginx.Dangerous(err)
ginx.NewRender(c).Data(gin.H{
"list": executions,
"total": total,
}, nil)
}
func (rt *Router) getEventPipelineExecution(c *gin.Context) {
execID := ginx.UrlParamStr(c, "exec_id")
detail, err := models.GetEventPipelineExecutionDetail(rt.Ctx, execID)
if err != nil {
ginx.Bomb(http.StatusNotFound, "execution not found: %v", err)
}
ginx.NewRender(c).Data(detail, nil)
}
func (rt *Router) getEventPipelineExecutionStats(c *gin.Context) {
pipelineID := ginx.UrlParamInt64(c, "id")
stats, err := models.GetEventPipelineExecutionStatistics(rt.Ctx, pipelineID)
ginx.Dangerous(err)
ginx.NewRender(c).Data(stats, nil)
}
func (rt *Router) cleanEventPipelineExecutions(c *gin.Context) {
var f struct {
BeforeDays int `json:"before_days"`
}
ginx.BindJSON(c, &f)
if f.BeforeDays <= 0 {
f.BeforeDays = 30
}
beforeTime := time.Now().AddDate(0, 0, -f.BeforeDays).Unix()
affected, err := models.DeleteEventPipelineExecutions(rt.Ctx, beforeTime)
ginx.Dangerous(err)
ginx.NewRender(c).Data(gin.H{
"deleted": affected,
}, nil)
}
func (rt *Router) streamEventPipeline(c *gin.Context) {
pipelineID := ginx.UrlParamInt64(c, "id")
var f EventPipelineRequest
ginx.BindJSON(c, &f)
pipeline, err := models.GetEventPipeline(rt.Ctx, pipelineID)
if err != nil {
ginx.Bomb(http.StatusNotFound, "pipeline not found: %v", err)
}
me := c.MustGet("user").(*models.User)
ginx.Dangerous(me.CheckGroupPermission(rt.Ctx, pipeline.TeamIds))
var event *models.AlertCurEvent
if f.Event != nil {
event = f.Event
} else {
event = &models.AlertCurEvent{
TriggerTime: time.Now().Unix(),
}
}
triggerCtx := &models.WorkflowTriggerContext{
Mode: models.TriggerModeAPI,
TriggerBy: me.Username,
EnvOverrides: f.EnvOverrides,
RequestID: uuid.New().String(),
Stream: true, // 流式端点强制启用流式输出
}
workflowEngine := engine.NewWorkflowEngine(rt.Ctx)
_, result, err := workflowEngine.Execute(pipeline, event, triggerCtx)
if err != nil {
ginx.Bomb(http.StatusInternalServerError, "execute failed: %v", err)
}
if result.Stream && result.StreamChan != nil {
rt.handleStreamResponse(c, result, triggerCtx.RequestID)
return
}
ginx.NewRender(c).Data(result, nil)
}
func (rt *Router) handleStreamResponse(c *gin.Context, result *models.WorkflowResult, requestID string) {
// 设置 SSE 响应头
c.Header("Content-Type", "text/event-stream")
c.Header("Cache-Control", "no-cache")
c.Header("Connection", "keep-alive")
c.Header("X-Accel-Buffering", "no") // 禁用 nginx 缓冲
c.Header("X-Request-ID", requestID)
flusher, ok := c.Writer.(http.Flusher)
if !ok {
ginx.Bomb(http.StatusInternalServerError, "streaming not supported")
return
}
// 发送初始连接成功消息
initData := fmt.Sprintf(`{"type":"connected","request_id":"%s","timestamp":%d}`, requestID, time.Now().UnixMilli())
fmt.Fprintf(c.Writer, "data: %s\n\n", initData)
flusher.Flush()
// 从 channel 读取并发送 SSE
timeout := time.After(30 * time.Minute) // 最长流式输出时间
for {
select {
case chunk, ok := <-result.StreamChan:
if !ok {
// channel 关闭,发送结束标记
return
}
data, err := json.Marshal(chunk)
if err != nil {
logger.Errorf("stream: failed to marshal chunk: %v", err)
continue
}
fmt.Fprintf(c.Writer, "data: %s\n\n", data)
flusher.Flush()
if chunk.Done {
return
}
case <-c.Request.Context().Done():
// 客户端断开连接
logger.Infof("stream: client disconnected, request_id=%s", requestID)
return
case <-timeout:
logger.Errorf("stream: timeout, request_id=%s", requestID)
return
}
}
}
func (rt *Router) streamEventPipelineByService(c *gin.Context) {
pipelineID := ginx.UrlParamInt64(c, "id")
var f EventPipelineRequest
ginx.BindJSON(c, &f)
pipeline, err := models.GetEventPipeline(rt.Ctx, pipelineID)
if err != nil {
ginx.Bomb(http.StatusNotFound, "pipeline not found: %v", err)
}
var event *models.AlertCurEvent
if f.Event != nil {
event = f.Event
} else {
event = &models.AlertCurEvent{
TriggerTime: time.Now().Unix(),
}
}
triggerCtx := &models.WorkflowTriggerContext{
Mode: models.TriggerModeAPI,
TriggerBy: f.Username,
EnvOverrides: f.EnvOverrides,
RequestID: uuid.New().String(),
Stream: true, // 流式端点强制启用流式输出
}
workflowEngine := engine.NewWorkflowEngine(rt.Ctx)
_, result, err := workflowEngine.Execute(pipeline, event, triggerCtx)
if err != nil {
ginx.Bomb(http.StatusInternalServerError, "execute failed: %v", err)
}
// 检查是否是流式输出
if result.Stream && result.StreamChan != nil {
rt.handleStreamResponse(c, result, triggerCtx.RequestID)
return
}
ginx.NewRender(c).Data(result, nil)
}

View File

@@ -1,17 +1,14 @@
package router
import (
"fmt"
"net/http"
"strconv"
"strings"
"github.com/ccfos/nightingale/v6/alert/aconf"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/ibex"
"github.com/gin-gonic/gin"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
@@ -39,11 +36,31 @@ func (rt *Router) statistic(c *gin.Context) {
model = models.User{}
case "user_group":
model = models.UserGroup{}
case "notify_rule":
model = models.NotifyRule{}
case "notify_channel":
model = models.NotifyChannel{}
case "event_pipeline":
statistics, err = models.EventPipelineStatistics(rt.Ctx)
ginx.NewRender(c).Data(statistics, err)
return
case "datasource":
// datasource update_at is different from others
statistics, err = models.DatasourceStatistics(rt.Ctx)
ginx.NewRender(c).Data(statistics, err)
return
case "user_variable":
statistics, err = models.ConfigsUserVariableStatistics(rt.Ctx)
ginx.NewRender(c).Data(statistics, err)
return
case "cval":
statistics, err = models.ConfigCvalStatistics(rt.Ctx)
ginx.NewRender(c).Data(statistics, err)
return
case "message_template":
statistics, err = models.MessageTemplateStatistics(rt.Ctx)
ginx.NewRender(c).Data(statistics, err)
return
default:
ginx.Bomb(http.StatusBadRequest, "invalid name")
}
@@ -64,8 +81,26 @@ func queryDatasourceIds(c *gin.Context) []int64 {
return ids
}
func queryStrListField(c *gin.Context, fieldName string, sep ...string) []string {
str := ginx.QueryStr(c, fieldName, "")
if str == "" {
return nil
}
lst := []string{str}
for _, s := range sep {
var newLst []string
for _, str := range lst {
newLst = append(newLst, strings.Split(str, s)...)
}
lst = newLst
}
return lst
}
type idsForm struct {
Ids []int64 `json:"ids"`
Ids []int64 `json:"ids"`
IsSyncToFlashDuty bool `json:"is_sync_to_flashduty"`
}
func (f idsForm) Verify() {
@@ -93,6 +128,12 @@ func UserGroup(ctx *ctx.Context, id int64) *models.UserGroup {
ginx.Bomb(http.StatusNotFound, "No such UserGroup")
}
bgids, err := models.BusiGroupIds(ctx, []int64{id})
ginx.Dangerous(err)
obj.BusiGroups, err = models.BusiGroupGetByIds(ctx, bgids)
ginx.Dangerous(err)
return obj
}
@@ -130,27 +171,46 @@ type TaskCreateReply struct {
Dat int64 `json:"dat"` // task.id
}
// return task.id, error
func TaskCreate(v interface{}, ibexc aconf.Ibex) (int64, error) {
var res TaskCreateReply
err := ibex.New(
ibexc.Address,
ibexc.BasicAuthUser,
ibexc.BasicAuthPass,
ibexc.Timeout,
).
Path("/ibex/v1/tasks").
In(v).
Out(&res).
POST()
if err != nil {
return 0, err
func Username(c *gin.Context) string {
username := c.GetString(gin.AuthUserKey)
if username == "" {
user := c.MustGet("user").(*models.User)
username = user.Username
}
if res.Err != "" {
return 0, fmt.Errorf("response.err: %v", res.Err)
}
return res.Dat, nil
return username
}
func HasPermission(ctx *ctx.Context, c *gin.Context, sourceType, sourceId string, isAnonymousAccess bool) bool {
if sourceType == "event" && isAnonymousAccess {
return true
}
// 尝试从请求中获取 __token 参数
token := ginx.QueryStr(c, "__token", "")
// 如果有 __token 参数,验证其合法性
if token != "" {
return ValidateSourceToken(ctx, sourceType, sourceId, token)
}
return false
}
func ValidateSourceToken(ctx *ctx.Context, sourceType, sourceId, token string) bool {
if token == "" {
return false
}
// 根据源类型、源ID和令牌获取源令牌记录
sourceToken, err := models.GetSourceTokenBySource(ctx, sourceType, sourceId, token)
if err != nil {
return false
}
// 检查令牌是否过期
if sourceToken.IsExpired() {
return false
}
return true
}

View File

@@ -3,18 +3,35 @@ package router
import (
"compress/gzip"
"encoding/json"
"errors"
"io/ioutil"
"sort"
"strconv"
"strings"
"time"
"github.com/ccfos/nightingale/v6/center/metas"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pushgw/idents"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
type HeartbeatHookFunc func(ident string) map[string]interface{}
func (rt *Router) heartbeat(c *gin.Context) {
req, err := HandleHeartbeat(c, rt.Ctx, rt.Alert.Heartbeat.EngineName, rt.MetaSet, rt.IdentSet, rt.TargetCache)
ginx.Dangerous(err)
m := rt.HeartbeatHook(req.Hostname)
ginx.NewRender(c).Data(m, err)
}
func HandleHeartbeat(c *gin.Context, ctx *ctx.Context, engineName string, metaSet *metas.Set, identSet *idents.Set, targetCache *memsto.TargetCacheType) (models.HostMeta, error) {
var bs []byte
var err error
var r *gzip.Reader
@@ -23,7 +40,7 @@ func (rt *Router) heartbeat(c *gin.Context) {
r, err = gzip.NewReader(c.Request.Body)
if err != nil {
c.String(400, err.Error())
return
return req, err
}
defer r.Close()
bs, err = ioutil.ReadAll(r)
@@ -31,11 +48,19 @@ func (rt *Router) heartbeat(c *gin.Context) {
} else {
defer c.Request.Body.Close()
bs, err = ioutil.ReadAll(c.Request.Body)
ginx.Dangerous(err)
if err != nil {
return req, err
}
}
err = json.Unmarshal(bs, &req)
ginx.Dangerous(err)
if err != nil {
return req, err
}
if req.Hostname == "" {
return req, errors.New("hostname is required")
}
// maybe from pushgw
if req.Offset == 0 {
@@ -46,23 +71,133 @@ func (rt *Router) heartbeat(c *gin.Context) {
req.RemoteAddr = c.ClientIP()
}
rt.MetaSet.Set(req.Hostname, req)
var items = make(map[string]struct{})
items[req.Hostname] = struct{}{}
rt.IdentSet.MSet(items)
if target, has := rt.TargetCache.Get(req.Hostname); has && target != nil {
var defGid int64 = -1
gid := ginx.QueryInt64(c, "gid", defGid)
hostIpStr := strings.TrimSpace(req.HostIp)
if gid == defGid { //set gid value from cache
gid = target.GroupId
}
logger.Debugf("heartbeat gid: %v, host_ip: '%v', target: %v", gid, hostIpStr, *target)
if gid != target.GroupId || hostIpStr != target.HostIp { // if either gid or host_ip has a new value
err = models.TargetUpdateHostIpAndBgid(rt.Ctx, req.Hostname, hostIpStr, gid)
}
if req.EngineName == "" {
req.EngineName = engineName
}
ginx.NewRender(c).Message(err)
metaSet.Set(req.Hostname, req)
var items = make(map[string]struct{})
items[req.Hostname] = struct{}{}
identSet.MSet(items)
if target, has := targetCache.Get(req.Hostname); has && target != nil {
gidsStr := ginx.QueryStr(c, "gid", "")
overwriteGids := ginx.QueryBool(c, "overwrite_gids", false)
hostIp := strings.TrimSpace(req.HostIp)
gids := strings.Split(gidsStr, ",")
if overwriteGids {
groupIds := make([]int64, 0)
for i := range gids {
if gids[i] == "" {
continue
}
groupId, err := strconv.ParseInt(gids[i], 10, 64)
if err != nil {
logger.Warningf("update target:%s group ids failed, err: %v", req.Hostname, err)
continue
}
groupIds = append(groupIds, groupId)
}
err := models.TargetOverrideBgids(ctx, []string{target.Ident}, groupIds, nil)
if err != nil {
logger.Warningf("update target:%s group ids failed, err: %v", target.Ident, err)
}
} else if gidsStr != "" {
for i := range gids {
groupId, err := strconv.ParseInt(gids[i], 10, 64)
if err != nil {
logger.Warningf("update target:%s group ids failed, err: %v", req.Hostname, err)
continue
}
if !target.MatchGroupId(groupId) {
err := models.TargetBindBgids(ctx, []string{target.Ident}, []int64{groupId}, nil)
if err != nil {
logger.Warningf("update target:%s group ids failed, err: %v", target.Ident, err)
}
}
}
}
newTarget := models.Target{}
targetNeedUpdate := false
if hostIp != "" && hostIp != target.HostIp {
newTarget.HostIp = hostIp
targetNeedUpdate = true
}
hostTagsMap := target.GetHostTagsMap()
hostTagNeedUpdate := false
if len(hostTagsMap) != len(req.GlobalLabels) {
hostTagNeedUpdate = true
} else {
for k, v := range req.GlobalLabels {
if v == "" {
continue
}
if tagv, ok := hostTagsMap[k]; !ok || tagv != v {
hostTagNeedUpdate = true
break
}
}
}
if hostTagNeedUpdate {
lst := []string{}
for k, v := range req.GlobalLabels {
lst = append(lst, k+"="+v)
}
sort.Strings(lst)
newTarget.HostTags = lst
targetNeedUpdate = true
}
userTagsMap := target.GetTagsMap()
userTagNeedUpdate := false
userTags := []string{}
for k, v := range userTagsMap {
if v == "" {
continue
}
if _, ok := req.GlobalLabels[k]; !ok {
userTags = append(userTags, k+"="+v)
} else { // 该key在hostTags中已经存在
userTagNeedUpdate = true
}
}
if userTagNeedUpdate {
newTarget.Tags = strings.Join(userTags, " ") + " "
targetNeedUpdate = true
}
if req.EngineName != "" && req.EngineName != target.EngineName {
newTarget.EngineName = req.EngineName
targetNeedUpdate = true
}
if req.AgentVersion != "" && req.AgentVersion != target.AgentVersion {
newTarget.AgentVersion = req.AgentVersion
targetNeedUpdate = true
}
if req.OS != "" && req.OS != target.OS {
newTarget.OS = req.OS
targetNeedUpdate = true
}
if targetNeedUpdate {
err := models.DB(ctx).Model(&target).Updates(newTarget).Error
if err != nil {
logger.Errorf("update target fields failed, err: %v", err)
}
}
logger.Debugf("heartbeat field:%+v target: %v", newTarget, *target)
}
return req, nil
}

View File

@@ -2,6 +2,7 @@ package router
import (
"encoding/base64"
"encoding/json"
"fmt"
"net/http"
"strconv"
@@ -10,16 +11,19 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/cas"
"github.com/ccfos/nightingale/v6/pkg/dingtalk"
"github.com/ccfos/nightingale/v6/pkg/ldapx"
"github.com/ccfos/nightingale/v6/pkg/oauth2x"
"github.com/ccfos/nightingale/v6/pkg/oidcx"
"github.com/ccfos/nightingale/v6/pkg/secu"
"github.com/pelletier/go-toml/v2"
"github.com/dgrijalva/jwt-go"
"github.com/gin-gonic/gin"
"github.com/pelletier/go-toml/v2"
"github.com/pkg/errors"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
"gorm.io/gorm"
)
type loginForm struct {
@@ -51,22 +55,26 @@ func (rt *Router) loginPost(c *gin.Context) {
}
authPassWord = decPassWord
}
user, err := models.PassLogin(rt.Ctx, f.Username, authPassWord)
if err != nil {
// pass validate fail, try ldap
if rt.Sso.LDAP.Enable {
roles := strings.Join(rt.Sso.LDAP.DefaultRoles, " ")
user, err = models.LdapLogin(rt.Ctx, f.Username, authPassWord, roles, rt.Sso.LDAP)
if err != nil {
logger.Debugf("ldap login failed: %v username: %s", err, f.Username)
ginx.NewRender(c).Message(err)
var user *models.User
var err error
lc := rt.Sso.LDAP.Copy()
if lc.Enable {
user, err = ldapx.LdapLogin(rt.Ctx, f.Username, authPassWord, lc.DefaultRoles, lc.DefaultTeams, lc)
if err != nil {
logger.Debugf("ldap login failed: %v username: %s", err, f.Username)
var errLoginInN9e error
// to use n9e as the minimum guarantee for login
if user, errLoginInN9e = models.PassLogin(rt.Ctx, rt.Redis, f.Username, authPassWord); errLoginInN9e != nil {
ginx.NewRender(c).Message("ldap login failed: %v; n9e login failed: %v", err, errLoginInN9e)
return
}
user.RolesLst = strings.Fields(user.Roles)
} else {
ginx.NewRender(c).Message(err)
return
user.RolesLst = strings.Fields(user.Roles)
}
} else {
user, err = models.PassLogin(rt.Ctx, rt.Redis, f.Username, authPassWord)
ginx.Dangerous(err)
}
if user == nil {
@@ -89,7 +97,7 @@ func (rt *Router) loginPost(c *gin.Context) {
}
func (rt *Router) logoutPost(c *gin.Context) {
logger.Infof("username:%s login from:%s", c.GetString("username"), c.ClientIP())
logger.Infof("username:%s logout from:%s", c.GetString("username"), c.ClientIP())
metadata, err := rt.extractTokenMetadata(c.Request)
if err != nil {
ginx.NewRender(c, http.StatusBadRequest).Message("failed to parse jwt token")
@@ -102,7 +110,29 @@ func (rt *Router) logoutPost(c *gin.Context) {
return
}
ginx.NewRender(c).Message("")
var logoutAddr string
user := c.MustGet("user").(*models.User)
// 获取用户的 id_token
idToken, err := rt.fetchIdToken(c.Request.Context(), user.Id)
if err != nil {
logger.Debugf("fetch id_token failed: %v, user_id: %d", err, user.Id)
idToken = "" // 如果获取失败,使用空字符串
}
// 删除 id_token
rt.deleteIdToken(c.Request.Context(), user.Id)
switch user.Belong {
case "oidc":
logoutAddr = rt.Sso.OIDC.GetSsoLogoutAddr(idToken)
case "cas":
logoutAddr = rt.Sso.CAS.GetSsoLogoutAddr()
case "oauth2":
logoutAddr = rt.Sso.OAuth2.GetSsoLogoutAddr()
}
ginx.NewRender(c).Data(logoutAddr, nil)
}
type refreshForm struct {
@@ -138,6 +168,13 @@ func (rt *Router) refreshPost(c *gin.Context) {
return
}
// 看这个 token 是否还存在 redis 中
val, err := rt.fetchAuth(c.Request.Context(), refreshUuid)
if err != nil || val == "" {
ginx.NewRender(c, http.StatusUnauthorized).Message("refresh token expired")
return
}
userIdentity, ok := claims["user_identity"].(string)
if !ok {
// Theoretically impossible
@@ -178,6 +215,14 @@ func (rt *Router) refreshPost(c *gin.Context) {
ginx.Dangerous(err)
ginx.Dangerous(rt.createAuth(c.Request.Context(), userIdentity, ts))
// 延长 id_token 的过期时间,使其与新的 refresh token 生命周期保持一致
// 注意:这里不会获取新的 id_token只是延长 Redis 中现有 id_token 的 TTL
if idToken, err := rt.fetchIdToken(c.Request.Context(), userid); err == nil && idToken != "" {
if err := rt.saveIdToken(c.Request.Context(), userid, idToken); err != nil {
logger.Debugf("refresh id_token ttl failed: %v, user_id: %d", err, userid)
}
}
ginx.NewRender(c).Data(gin.H{
"access_token": ts.AccessToken,
"refresh_token": ts.RefreshToken,
@@ -240,41 +285,23 @@ func (rt *Router) loginCallback(c *gin.Context) {
if user != nil {
if rt.Sso.OIDC.CoverAttributes {
if ret.Nickname != "" {
user.Nickname = ret.Nickname
}
if ret.Email != "" {
user.Email = ret.Email
}
if ret.Phone != "" {
user.Phone = ret.Phone
}
user.UpdateAt = time.Now().Unix()
user.Update(rt.Ctx, "email", "nickname", "phone", "update_at")
updatedFields := user.UpdateSsoFields("oidc", ret.Nickname, ret.Phone, ret.Email)
ginx.Dangerous(user.Update(rt.Ctx, "update_at", updatedFields...))
}
} else {
now := time.Now().Unix()
user = &models.User{
Username: ret.Username,
Password: "******",
Nickname: ret.Nickname,
Phone: ret.Phone,
Email: ret.Email,
Portrait: "",
Roles: strings.Join(rt.Sso.OIDC.DefaultRoles, " "),
RolesLst: rt.Sso.OIDC.DefaultRoles,
Contacts: []byte("{}"),
CreateAt: now,
UpdateAt: now,
CreateBy: "oidc",
UpdateBy: "oidc",
}
user = new(models.User)
user.FullSsoFields("oidc", ret.Username, ret.Nickname, ret.Phone, ret.Email, rt.Sso.OIDC.DefaultRoles)
// create user from oidc
ginx.Dangerous(user.Add(rt.Ctx))
if len(rt.Sso.OIDC.DefaultTeams) > 0 {
for _, gid := range rt.Sso.OIDC.DefaultTeams {
err = models.UserGroupMemberAdd(rt.Ctx, gid, user.Id)
if err != nil {
logger.Errorf("user:%v UserGroupMemberAdd: %s", user, err)
}
}
}
}
// set user login state
@@ -283,6 +310,13 @@ func (rt *Router) loginCallback(c *gin.Context) {
ginx.Dangerous(err)
ginx.Dangerous(rt.createAuth(c.Request.Context(), userIdentity, ts))
// 保存 id_token 到 Redis用于登出时使用
if ret.IdToken != "" {
if err := rt.saveIdToken(c.Request.Context(), user.Id, ret.IdToken); err != nil {
logger.Errorf("save id_token failed: %v, user_id: %d", err, user.Id)
}
}
redirect := "/"
if ret.Redirect != "/login" {
redirect = ret.Redirect
@@ -350,38 +384,12 @@ func (rt *Router) loginCallbackCas(c *gin.Context) {
ginx.Dangerous(err)
if user != nil {
if rt.Sso.CAS.CoverAttributes {
if ret.Nickname != "" {
user.Nickname = ret.Nickname
}
if ret.Email != "" {
user.Email = ret.Email
}
if ret.Phone != "" {
user.Phone = ret.Phone
}
user.UpdateAt = time.Now().Unix()
ginx.Dangerous(user.Update(rt.Ctx, "email", "nickname", "phone", "update_at"))
updatedFields := user.UpdateSsoFields("cas", ret.Nickname, ret.Phone, ret.Email)
ginx.Dangerous(user.Update(rt.Ctx, "update_at", updatedFields...))
}
} else {
now := time.Now().Unix()
user = &models.User{
Username: ret.Username,
Password: "******",
Nickname: ret.Nickname,
Portrait: "",
Roles: strings.Join(rt.Sso.CAS.DefaultRoles, " "),
RolesLst: rt.Sso.CAS.DefaultRoles,
Contacts: []byte("{}"),
Phone: ret.Phone,
Email: ret.Email,
CreateAt: now,
UpdateAt: now,
CreateBy: "CAS",
UpdateBy: "CAS",
}
user = new(models.User)
user.FullSsoFields("cas", ret.Username, ret.Nickname, ret.Phone, ret.Email, rt.Sso.CAS.DefaultRoles)
// create user from cas
ginx.Dangerous(user.Add(rt.Ctx))
}
@@ -436,6 +444,81 @@ func (rt *Router) loginRedirectOAuth(c *gin.Context) {
ginx.NewRender(c).Data(redirect, err)
}
func (rt *Router) loginRedirectDingTalk(c *gin.Context) {
redirect := ginx.QueryStr(c, "redirect", "/")
v, exists := c.Get("userid")
if exists {
userid := v.(int64)
user, err := models.UserGetById(rt.Ctx, userid)
ginx.Dangerous(err)
if user == nil {
ginx.Bomb(200, "user not found")
}
if user.Username != "" { // already login
ginx.NewRender(c).Data(redirect, nil)
return
}
}
if !rt.Sso.DingTalk.Enable {
ginx.NewRender(c).Data("", nil)
return
}
redirect, err := rt.Sso.DingTalk.Authorize(rt.Redis, redirect)
ginx.Dangerous(err)
ginx.NewRender(c).Data(redirect, err)
}
func (rt *Router) loginCallbackDingTalk(c *gin.Context) {
code := ginx.QueryStr(c, "code", "")
state := ginx.QueryStr(c, "state", "")
ret, err := rt.Sso.DingTalk.Callback(rt.Redis, c.Request.Context(), code, state)
if err != nil {
logger.Errorf("sso_callback DingTalk fail. code:%s, state:%s, get ret: %+v. error: %v", code, state, ret, err)
ginx.NewRender(c).Data(CallbackOutput{}, err)
return
}
user, err := models.UserGet(rt.Ctx, "username=?", ret.Username)
ginx.Dangerous(err)
if user != nil {
if rt.Sso.DingTalk.DingTalkConfig.CoverAttributes {
updatedFields := user.UpdateSsoFields(dingtalk.SsoTypeName, ret.Nickname, ret.Phone, ret.Email)
ginx.Dangerous(user.Update(rt.Ctx, "update_at", updatedFields...))
}
} else {
user = new(models.User)
user.FullSsoFields(dingtalk.SsoTypeName, ret.Username, ret.Nickname, ret.Phone, ret.Email, rt.Sso.DingTalk.DingTalkConfig.DefaultRoles)
// create user from dingtalk
ginx.Dangerous(user.Add(rt.Ctx))
}
// set user login state
userIdentity := fmt.Sprintf("%d-%s", user.Id, user.Username)
ts, err := rt.createTokens(rt.HTTP.JWTAuth.SigningKey, userIdentity)
ginx.Dangerous(err)
ginx.Dangerous(rt.createAuth(c.Request.Context(), userIdentity, ts))
redirect := "/"
if ret.Redirect != "/login" {
redirect = ret.Redirect
}
ginx.NewRender(c).Data(CallbackOutput{
Redirect: redirect,
User: user,
AccessToken: ts.AccessToken,
RefreshToken: ts.RefreshToken,
}, nil)
}
func (rt *Router) loginCallbackOAuth(c *gin.Context) {
code := ginx.QueryStr(c, "code", "")
state := ginx.QueryStr(c, "state", "")
@@ -452,39 +535,12 @@ func (rt *Router) loginCallbackOAuth(c *gin.Context) {
if user != nil {
if rt.Sso.OAuth2.CoverAttributes {
if ret.Nickname != "" {
user.Nickname = ret.Nickname
}
if ret.Email != "" {
user.Email = ret.Email
}
if ret.Phone != "" {
user.Phone = ret.Phone
}
user.UpdateAt = time.Now().Unix()
user.Update(rt.Ctx, "email", "nickname", "phone", "update_at")
updatedFields := user.UpdateSsoFields("oauth2", ret.Nickname, ret.Phone, ret.Email)
ginx.Dangerous(user.Update(rt.Ctx, "update_at", updatedFields...))
}
} else {
now := time.Now().Unix()
user = &models.User{
Username: ret.Username,
Password: "******",
Nickname: ret.Nickname,
Phone: ret.Phone,
Email: ret.Email,
Portrait: "",
Roles: strings.Join(rt.Sso.OAuth2.DefaultRoles, " "),
RolesLst: rt.Sso.OAuth2.DefaultRoles,
Contacts: []byte("{}"),
CreateAt: now,
UpdateAt: now,
CreateBy: "oauth2",
UpdateBy: "oauth2",
}
user = new(models.User)
user.FullSsoFields("oauth2", ret.Username, ret.Nickname, ret.Phone, ret.Email, rt.Sso.OAuth2.DefaultRoles)
// create user from oidc
ginx.Dangerous(user.Add(rt.Ctx))
}
@@ -509,13 +565,14 @@ func (rt *Router) loginCallbackOAuth(c *gin.Context) {
}
type SsoConfigOutput struct {
OidcDisplayName string `json:"oidcDisplayName"`
CasDisplayName string `json:"casDisplayName"`
OauthDisplayName string `json:"oauthDisplayName"`
OidcDisplayName string `json:"oidcDisplayName"`
CasDisplayName string `json:"casDisplayName"`
OauthDisplayName string `json:"oauthDisplayName"`
DingTalkDisplayName string `json:"dingTalkDisplayName"`
}
func (rt *Router) ssoConfigNameGet(c *gin.Context) {
var oidcDisplayName, casDisplayName, oauthDisplayName string
var oidcDisplayName, casDisplayName, oauthDisplayName, dingTalkDisplayName string
if rt.Sso.OIDC != nil {
oidcDisplayName = rt.Sso.OIDC.GetDisplayName()
}
@@ -528,23 +585,85 @@ func (rt *Router) ssoConfigNameGet(c *gin.Context) {
oauthDisplayName = rt.Sso.OAuth2.GetDisplayName()
}
if rt.Sso.DingTalk != nil {
dingTalkDisplayName = rt.Sso.DingTalk.GetDisplayName()
}
ginx.NewRender(c).Data(SsoConfigOutput{
OidcDisplayName: oidcDisplayName,
CasDisplayName: casDisplayName,
OauthDisplayName: oauthDisplayName,
OidcDisplayName: oidcDisplayName,
CasDisplayName: casDisplayName,
OauthDisplayName: oauthDisplayName,
DingTalkDisplayName: dingTalkDisplayName,
}, nil)
}
func (rt *Router) ssoConfigGets(c *gin.Context) {
ginx.NewRender(c).Data(models.SsoConfigGets(rt.Ctx))
var ssoConfigs []models.SsoConfig
lst, err := models.SsoConfigGets(rt.Ctx)
ginx.Dangerous(err)
if len(lst) == 0 {
ginx.NewRender(c).Data(ssoConfigs, nil)
return
}
// TODO: dingTalkExist 为了兼容当前前端配置, 后期单点登陆统一调整后不在预先设置默认内容
dingTalkExist := false
for _, config := range lst {
var ssoReqConfig models.SsoConfig
ssoReqConfig.Id = config.Id
ssoReqConfig.Name = config.Name
ssoReqConfig.UpdateAt = config.UpdateAt
switch config.Name {
case dingtalk.SsoTypeName:
dingTalkExist = true
err := json.Unmarshal([]byte(config.Content), &ssoReqConfig.SettingJson)
ginx.Dangerous(err)
default:
ssoReqConfig.Content = config.Content
}
ssoConfigs = append(ssoConfigs, ssoReqConfig)
}
// TODO: dingTalkExist 为了兼容当前前端配置, 后期单点登陆统一调整后不在预先设置默认内容
if !dingTalkExist {
var ssoConfig models.SsoConfig
ssoConfig.Name = dingtalk.SsoTypeName
ssoConfigs = append(ssoConfigs, ssoConfig)
}
ginx.NewRender(c).Data(ssoConfigs, nil)
}
func (rt *Router) ssoConfigUpdate(c *gin.Context) {
var f models.SsoConfig
ginx.BindJSON(c, &f)
var ssoConfig models.SsoConfig
ginx.BindJSON(c, &ssoConfig)
err := f.Update(rt.Ctx)
ginx.Dangerous(err)
switch ssoConfig.Name {
case dingtalk.SsoTypeName:
f.Name = ssoConfig.Name
setting, err := json.Marshal(ssoConfig.SettingJson)
ginx.Dangerous(err)
f.Content = string(setting)
f.UpdateAt = time.Now().Unix()
sso, err := f.Query(rt.Ctx)
if !errors.Is(err, gorm.ErrRecordNotFound) {
ginx.Dangerous(err)
}
if errors.Is(err, gorm.ErrRecordNotFound) {
err = f.Create(rt.Ctx)
} else {
f.Id = sso.Id
err = f.Update(rt.Ctx)
}
ginx.Dangerous(err)
default:
f.Id = ssoConfig.Id
f.Name = ssoConfig.Name
f.Content = ssoConfig.Content
err := f.Update(rt.Ctx)
ginx.Dangerous(err)
}
switch f.Name {
case "LDAP":
@@ -568,6 +687,14 @@ func (rt *Router) ssoConfigUpdate(c *gin.Context) {
err := toml.Unmarshal([]byte(f.Content), &config)
ginx.Dangerous(err)
rt.Sso.OAuth2.Reload(config)
case dingtalk.SsoTypeName:
var config dingtalk.Config
err := json.Unmarshal([]byte(f.Content), &config)
ginx.Dangerous(err)
if rt.Sso.DingTalk == nil {
rt.Sso.DingTalk = dingtalk.New(config)
}
rt.Sso.DingTalk.Reload(config)
}
ginx.NewRender(c).Message(nil)

View File

@@ -0,0 +1,218 @@
package router
import (
"bytes"
"fmt"
"html/template"
"net/http"
"strings"
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/slice"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pkg/tplx"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) messageTemplatesAdd(c *gin.Context) {
var lst []*models.MessageTemplate
ginx.BindJSON(c, &lst)
if len(lst) == 0 {
ginx.Bomb(http.StatusBadRequest, "input json is empty")
}
me := c.MustGet("user").(*models.User)
isAdmin := me.IsAdmin()
idents := make([]string, 0, len(lst))
gids, err := models.MyGroupIds(rt.Ctx, me.Id)
ginx.Dangerous(err)
now := time.Now().Unix()
for _, tpl := range lst {
// 生成一个唯一的标识符,以后也不允许修改,前端不需要传这个参数
tpl.Ident = uuid.New().String()
ginx.Dangerous(tpl.Verify())
if !isAdmin && !slice.HaveIntersection(gids, tpl.UserGroupIds) {
ginx.Bomb(http.StatusForbidden, "forbidden")
}
idents = append(idents, tpl.Ident)
tpl.CreateBy = me.Username
tpl.CreateAt = now
tpl.UpdateBy = me.Username
tpl.UpdateAt = now
}
lstWithSameId, err := models.MessageTemplatesGet(rt.Ctx, "ident IN ?", idents)
ginx.Dangerous(err)
if len(lstWithSameId) > 0 {
ginx.Bomb(http.StatusBadRequest, "ident already exists")
}
ids := make([]int64, 0, len(lst))
for _, tpl := range lst {
err := models.Insert(rt.Ctx, tpl)
ginx.Dangerous(err)
ids = append(ids, tpl.ID)
}
ginx.NewRender(c).Data(ids, nil)
}
func (rt *Router) messageTemplatesDel(c *gin.Context) {
var f idsForm
ginx.BindJSON(c, &f)
f.Verify()
lst, err := models.MessageTemplatesGet(rt.Ctx, "id in (?)", f.Ids)
ginx.Dangerous(err)
notifyRuleIds, err := models.UsedByNotifyRule(rt.Ctx, models.MsgTplList(lst))
ginx.Dangerous(err)
if len(notifyRuleIds) > 0 {
ginx.NewRender(c).Message(fmt.Errorf("used by notify rule: %v", notifyRuleIds))
return
}
if me := c.MustGet("user").(*models.User); !me.IsAdmin() {
gids, err := models.MyGroupIds(rt.Ctx, me.Id)
ginx.Dangerous(err)
for _, t := range lst {
if !slice.HaveIntersection(gids, t.UserGroupIds) {
ginx.Bomb(http.StatusForbidden, "forbidden")
}
}
}
ginx.NewRender(c).Message(models.DB(rt.Ctx).Delete(
&models.MessageTemplate{}, "id in (?)", f.Ids).Error)
}
func (rt *Router) messageTemplatePut(c *gin.Context) {
var f models.MessageTemplate
ginx.BindJSON(c, &f)
mt, err := models.MessageTemplateGet(rt.Ctx, "id <> ? and ident = ?", ginx.UrlParamInt64(c, "id"), f.Ident)
ginx.Dangerous(err)
if mt != nil {
ginx.Bomb(http.StatusBadRequest, "message template ident already exists")
}
mt, err = models.MessageTemplateGet(rt.Ctx, "id = ?", ginx.UrlParamInt64(c, "id"))
ginx.Dangerous(err)
if mt == nil {
ginx.Bomb(http.StatusNotFound, "message template not found")
}
me := c.MustGet("user").(*models.User)
if !me.IsAdmin() {
gids, err := models.MyGroupIds(rt.Ctx, me.Id)
ginx.Dangerous(err)
if !slice.HaveIntersection(gids, mt.UserGroupIds) {
ginx.Bomb(http.StatusForbidden, "forbidden")
}
}
f.UpdateBy = me.Username
ginx.NewRender(c).Message(mt.Update(rt.Ctx, f))
}
func (rt *Router) messageTemplateGet(c *gin.Context) {
me := c.MustGet("user").(*models.User)
gids, err := models.MyGroupIds(rt.Ctx, me.Id)
ginx.Dangerous(err)
tid := ginx.UrlParamInt64(c, "id")
mt, err := models.MessageTemplateGet(rt.Ctx, "id = ?", tid)
ginx.Dangerous(err)
if mt == nil {
ginx.Bomb(http.StatusNotFound, "message template not found")
}
if mt.Private == 1 && !slice.HaveIntersection(gids, mt.UserGroupIds) {
ginx.Bomb(http.StatusForbidden, "forbidden")
}
ginx.NewRender(c).Data(mt, nil)
}
func (rt *Router) messageTemplatesGet(c *gin.Context) {
var notifyChannelIdents []string
if tmp := ginx.QueryStr(c, "notify_channel_idents", ""); tmp != "" {
notifyChannelIdents = strings.Split(tmp, ",")
}
notifyChannelIds := strx.IdsInt64ForAPI(ginx.QueryStr(c, "notify_channel_ids", ""))
if len(notifyChannelIds) > 0 {
ginx.Dangerous(models.DB(rt.Ctx).Model(models.NotifyChannelConfig{}).
Where("id in (?)", notifyChannelIds).Pluck("ident", &notifyChannelIdents).Error)
}
me := c.MustGet("user").(*models.User)
gids, err := models.MyGroupIds(rt.Ctx, me.Id)
ginx.Dangerous(err)
lst, err := models.MessageTemplatesGetBy(rt.Ctx, notifyChannelIdents)
ginx.Dangerous(err)
if me.IsAdmin() {
ginx.NewRender(c).Data(lst, nil)
return
}
res := make([]*models.MessageTemplate, 0)
for _, t := range lst {
if slice.HaveIntersection[int64](gids, t.UserGroupIds) || t.Private == 0 {
res = append(res, t)
}
}
ginx.NewRender(c).Data(res, nil)
}
type evtMsgReq struct {
EventIds []int64 `json:"event_ids"`
Tpl struct {
Content map[string]string `json:"content"`
} `json:"tpl"`
}
func (rt *Router) eventsMessage(c *gin.Context) {
var req evtMsgReq
ginx.BindJSON(c, &req)
hisEvents, err := models.AlertHisEventGetByIds(rt.Ctx, req.EventIds)
ginx.Dangerous(err)
if len(hisEvents) == 0 {
ginx.Bomb(http.StatusBadRequest, "event not found")
}
ginx.Dangerous(err)
events := make([]*models.AlertCurEvent, len(hisEvents))
for i, he := range hisEvents {
events[i] = he.ToCur()
}
renderData := make(map[string]interface{})
renderData["events"] = events
defs := models.GetDefs(renderData)
ret := make(map[string]string, len(req.Tpl.Content))
for k, v := range req.Tpl.Content {
text := strings.Join(append(defs, v), "")
tpl, err := template.New(k).Funcs(tplx.TemplateFuncMap).Parse(text)
if err != nil {
ret[k] = err.Error()
continue
}
var buf bytes.Buffer
err = tpl.Execute(&buf, renderData)
if err != nil {
ret[k] = err.Error()
continue
}
ret[k] = buf.String()
}
ginx.NewRender(c).Data(ret, nil)
}

View File

@@ -6,16 +6,47 @@ import (
"time"
"github.com/ccfos/nightingale/v6/alert/common"
"github.com/ccfos/nightingale/v6/alert/mute"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
)
// Return all, front-end search and paging
func (rt *Router) alertMuteGetsByBG(c *gin.Context) {
bgid := ginx.UrlParamInt64(c, "id")
lst, err := models.AlertMuteGetsByBG(rt.Ctx, bgid)
prods := strings.Fields(ginx.QueryStr(c, "prods", ""))
query := ginx.QueryStr(c, "query", "")
expired := ginx.QueryInt(c, "expired", -1)
lst, err := models.AlertMuteGets(rt.Ctx, prods, bgid, -1, expired, query)
ginx.NewRender(c).Data(lst, err)
}
func (rt *Router) alertMuteGetsByGids(c *gin.Context) {
gids := strx.IdsInt64ForAPI(ginx.QueryStr(c, "gids", ""), ",")
if len(gids) > 0 {
for _, gid := range gids {
rt.bgroCheck(c, gid)
}
} else {
me := c.MustGet("user").(*models.User)
if !me.IsAdmin() {
var err error
gids, err = models.MyBusiGroupIds(rt.Ctx, me.Id)
ginx.Dangerous(err)
if len(gids) == 0 {
ginx.NewRender(c).Data([]int{}, nil)
return
}
}
}
lst, err := models.AlertMuteGetsByBGIds(rt.Ctx, gids)
ginx.NewRender(c).Data(lst, err)
}
@@ -24,11 +55,18 @@ func (rt *Router) alertMuteGets(c *gin.Context) {
prods := strings.Fields(ginx.QueryStr(c, "prods", ""))
bgid := ginx.QueryInt64(c, "bgid", -1)
query := ginx.QueryStr(c, "query", "")
lst, err := models.AlertMuteGets(rt.Ctx, prods, bgid, query)
disabled := ginx.QueryInt(c, "disabled", -1)
expired := ginx.QueryInt(c, "expired", -1)
lst, err := models.AlertMuteGets(rt.Ctx, prods, bgid, disabled, expired, query)
ginx.NewRender(c).Data(lst, err)
}
func (rt *Router) activeAlertMuteGets(c *gin.Context) {
lst, err := models.AlertMuteGetsAll(rt.Ctx)
ginx.NewRender(c).Data(lst, err)
}
func (rt *Router) alertMuteAdd(c *gin.Context) {
var f models.AlertMute
@@ -36,8 +74,58 @@ func (rt *Router) alertMuteAdd(c *gin.Context) {
username := c.MustGet("username").(string)
f.CreateBy = username
f.UpdateBy = username
f.GroupId = ginx.UrlParamInt64(c, "id")
ginx.NewRender(c).Message(f.Add(rt.Ctx))
ginx.Dangerous(f.Add(rt.Ctx))
ginx.NewRender(c).Data(f.Id, nil)
}
type MuteTestForm struct {
EventId int64 `json:"event_id" binding:"required"`
AlertMute models.AlertMute `json:"config" binding:"required"`
PassTimeCheck bool `json:"pass_time_check"`
}
func (rt *Router) alertMuteTryRun(c *gin.Context) {
var f MuteTestForm
ginx.BindJSON(c, &f)
ginx.Dangerous(f.AlertMute.Verify())
hisEvent, err := models.AlertHisEventGetById(rt.Ctx, f.EventId)
ginx.Dangerous(err)
if hisEvent == nil {
ginx.Bomb(http.StatusNotFound, "event not found")
}
curEvent := *hisEvent.ToCur()
curEvent.SetTagsMap()
if f.PassTimeCheck {
f.AlertMute.MuteTimeType = models.Periodic
f.AlertMute.PeriodicMutesJson = []models.PeriodicMute{
{
EnableDaysOfWeek: "0 1 2 3 4 5 6",
EnableStime: "00:00",
EnableEtime: "00:00",
},
}
}
match, err := mute.MatchMute(&curEvent, &f.AlertMute)
if err != nil {
// 对错误信息进行 i18n 翻译
translatedErr := i18n.Sprintf(c.GetHeader("X-Language"), err.Error())
ginx.Bomb(http.StatusBadRequest, translatedErr)
}
if !match {
ginx.NewRender(c).Data("event not match mute", nil)
return
}
ginx.NewRender(c).Data("event match mute", nil)
}
// Preview events (alert_cur_event) that match the mute strategy based on the following criteria:
@@ -69,7 +157,8 @@ func (rt *Router) alertMuteAddByService(c *gin.Context) {
var f models.AlertMute
ginx.BindJSON(c, &f)
ginx.NewRender(c).Message(f.Add(rt.Ctx))
err := f.Add(rt.Ctx)
ginx.NewRender(c).Data(f.Id, err)
}
func (rt *Router) alertMuteDel(c *gin.Context) {
@@ -80,6 +169,14 @@ func (rt *Router) alertMuteDel(c *gin.Context) {
ginx.NewRender(c).Message(models.AlertMuteDel(rt.Ctx, f.Ids))
}
// alertMuteGet returns the alert mute by ID
func (rt *Router) alertMuteGet(c *gin.Context) {
amid := ginx.UrlParamInt64(c, "amid")
am, err := models.AlertMuteGetById(rt.Ctx, amid)
am.DB2FE()
ginx.NewRender(c).Data(am, err)
}
func (rt *Router) alertMutePutByFE(c *gin.Context) {
var f models.AlertMute
ginx.BindJSON(c, &f)

View File

@@ -9,6 +9,7 @@ import (
"strings"
"time"
"github.com/ccfos/nightingale/v6/center/cstats"
"github.com/ccfos/nightingale/v6/models"
"github.com/gin-gonic/gin"
@@ -17,6 +18,10 @@ import (
"github.com/toolkits/pkg/ginx"
)
const (
DefaultTokenKey = "X-User-Token"
)
type AccessDetails struct {
AccessUuid string
UserIdentity string
@@ -62,8 +67,29 @@ func (rt *Router) proxyAuth() gin.HandlerFunc {
}
}
func (rt *Router) jwtAuth() gin.HandlerFunc {
// tokenAuth 支持两种方式的认证,固定 token 和 jwt token
// 因为不太好区分用户使用哪个方式,所以两种方式放在一个中间件里
func (rt *Router) tokenAuth() gin.HandlerFunc {
return func(c *gin.Context) {
// 先验证固定 token
if rt.HTTP.TokenAuth.Enable {
tokenKey := rt.HTTP.TokenAuth.HeaderUserTokenKey
if tokenKey == "" {
tokenKey = DefaultTokenKey
}
token := c.GetHeader(tokenKey)
if token != "" {
user := rt.UserTokenCache.GetByToken(token)
if user != nil && user.Username != "" {
c.Set("userid", user.Id)
c.Set("username", user.Username)
c.Next()
return
}
}
}
// 再验证 jwt token
metadata, err := rt.extractTokenMetadata(c.Request)
if err != nil {
ginx.Bomb(http.StatusUnauthorized, "unauthorized")
@@ -92,11 +118,15 @@ func (rt *Router) jwtAuth() gin.HandlerFunc {
}
}
func (rt *Router) Auth() gin.HandlerFunc {
return rt.auth()
}
func (rt *Router) auth() gin.HandlerFunc {
if rt.HTTP.ProxyAuth.Enable {
return rt.proxyAuth()
} else {
return rt.jwtAuth()
return rt.tokenAuth()
}
}
@@ -120,11 +150,15 @@ func (rt *Router) jwtMock() gin.HandlerFunc {
}
}
func (rt *Router) User() gin.HandlerFunc {
return rt.user()
}
func (rt *Router) user() gin.HandlerFunc {
return func(c *gin.Context) {
userid := c.MustGet("userid").(int64)
username := c.MustGet("username").(string)
user, err := models.UserGetById(rt.Ctx, userid)
user, err := models.UserGetByUsername(rt.Ctx, username)
if err != nil {
ginx.Bomb(http.StatusUnauthorized, "unauthorized")
}
@@ -135,6 +169,8 @@ func (rt *Router) user() gin.HandlerFunc {
c.Set("user", user)
c.Set("isadmin", user.IsAdmin())
// Update user.LastActiveTime
rt.UserCache.SetLastActiveTime(user.Id, time.Now().Unix())
c.Next()
}
}
@@ -174,6 +210,10 @@ func (rt *Router) bgro() gin.HandlerFunc {
}
// bgrw 逐步要被干掉,不安全
func (rt *Router) Bgrw() gin.HandlerFunc {
return rt.bgrw()
}
func (rt *Router) bgrw() gin.HandlerFunc {
return func(c *gin.Context) {
me := c.MustGet("user").(*models.User)
@@ -233,6 +273,10 @@ func (rt *Router) bgroCheck(c *gin.Context, bgid int64) {
c.Set("busi_group", bg)
}
func (rt *Router) Perm(operation string) gin.HandlerFunc {
return rt.perm(operation)
}
func (rt *Router) perm(operation string) gin.HandlerFunc {
return func(c *gin.Context) {
me := c.MustGet("user").(*models.User)
@@ -292,6 +336,12 @@ func (rt *Router) extractTokenMetadata(r *http.Request) (*AccessDetails, error)
return nil, errors.New("failed to parse access_uuid from jwt")
}
// accessUuid 在 redis 里存在才放行
val, err := rt.fetchAuth(r.Context(), accessUuid)
if err != nil || val == "" {
return nil, errors.New("unauthorized")
}
return &AccessDetails{
AccessUuid: accessUuid,
UserIdentity: claims["user_identity"].(string),
@@ -312,29 +362,72 @@ func (rt *Router) extractToken(r *http.Request) string {
}
func (rt *Router) createAuth(ctx context.Context, userIdentity string, td *TokenDetails) error {
username := strings.Split(userIdentity, "-")[1]
// 如果只能有一个账号登录,那么就删除之前的 token
if rt.HTTP.JWTAuth.SingleLogin {
delKeys, err := rt.Redis.SMembers(ctx, rt.wrapJwtKey(username)).Result()
if err != nil {
return err
}
if len(delKeys) > 0 {
errDel := rt.Redis.Del(ctx, delKeys...).Err()
if errDel != nil {
return errDel
}
}
if errDel := rt.Redis.Del(ctx, rt.wrapJwtKey(username)).Err(); errDel != nil {
return errDel
}
}
at := time.Unix(td.AtExpires, 0)
rte := time.Unix(td.RtExpires, 0)
now := time.Now()
errAccess := rt.Redis.Set(ctx, rt.wrapJwtKey(td.AccessUuid), userIdentity, at.Sub(now)).Err()
if errAccess != nil {
return errAccess
if err := rt.Redis.Set(ctx, rt.wrapJwtKey(td.AccessUuid), userIdentity, at.Sub(now)).Err(); err != nil {
cstats.RedisOperationLatency.WithLabelValues("set_token", "fail").Observe(time.Since(now).Seconds())
return err
}
errRefresh := rt.Redis.Set(ctx, rt.wrapJwtKey(td.RefreshUuid), userIdentity, rte.Sub(now)).Err()
if errRefresh != nil {
return errRefresh
if err := rt.Redis.Set(ctx, rt.wrapJwtKey(td.RefreshUuid), userIdentity, rte.Sub(now)).Err(); err != nil {
cstats.RedisOperationLatency.WithLabelValues("set_token", "fail").Observe(time.Since(now).Seconds())
return err
}
cstats.RedisOperationLatency.WithLabelValues("set_token", "success").Observe(time.Since(now).Seconds())
if rt.HTTP.JWTAuth.SingleLogin {
if err := rt.Redis.SAdd(ctx, rt.wrapJwtKey(username), rt.wrapJwtKey(td.AccessUuid), rt.wrapJwtKey(td.RefreshUuid)).Err(); err != nil {
return err
}
}
return nil
}
func (rt *Router) fetchAuth(ctx context.Context, givenUuid string) (string, error) {
return rt.Redis.Get(ctx, rt.wrapJwtKey(givenUuid)).Result()
now := time.Now()
ret, err := rt.Redis.Get(ctx, rt.wrapJwtKey(givenUuid)).Result()
if err != nil {
cstats.RedisOperationLatency.WithLabelValues("get_token", "fail").Observe(time.Since(now).Seconds())
} else {
cstats.RedisOperationLatency.WithLabelValues("get_token", "success").Observe(time.Since(now).Seconds())
}
return ret, err
}
func (rt *Router) deleteAuth(ctx context.Context, givenUuid string) error {
return rt.Redis.Del(ctx, rt.wrapJwtKey(givenUuid)).Err()
err := rt.Redis.Del(ctx, rt.wrapJwtKey(givenUuid)).Err()
if err != nil {
cstats.RedisOperationLatency.WithLabelValues("del_token", "fail").Observe(time.Since(time.Now()).Seconds())
} else {
cstats.RedisOperationLatency.WithLabelValues("del_token", "success").Observe(time.Since(time.Now()).Seconds())
}
return err
}
func (rt *Router) deleteTokens(ctx context.Context, authD *AccessDetails) error {
@@ -360,6 +453,30 @@ func (rt *Router) wrapJwtKey(key string) string {
return rt.HTTP.JWTAuth.RedisKeyPrefix + key
}
func (rt *Router) wrapIdTokenKey(userId int64) string {
return fmt.Sprintf("n9e_id_token_%d", userId)
}
// saveIdToken 保存用户的 id_token 到 Redis
func (rt *Router) saveIdToken(ctx context.Context, userId int64, idToken string) error {
if idToken == "" {
return nil
}
// id_token 的过期时间应该与 RefreshToken 保持一致,确保在整个会话期间都可用于登出
expiration := time.Minute * time.Duration(rt.HTTP.JWTAuth.RefreshExpired)
return rt.Redis.Set(ctx, rt.wrapIdTokenKey(userId), idToken, expiration).Err()
}
// fetchIdToken 从 Redis 获取用户的 id_token
func (rt *Router) fetchIdToken(ctx context.Context, userId int64) (string, error) {
return rt.Redis.Get(ctx, rt.wrapIdTokenKey(userId)).Result()
}
// deleteIdToken 从 Redis 删除用户的 id_token
func (rt *Router) deleteIdToken(ctx context.Context, userId int64) error {
return rt.Redis.Del(ctx, rt.wrapIdTokenKey(userId)).Err()
}
type TokenDetails struct {
AccessToken string
RefreshToken string

View File

@@ -0,0 +1,206 @@
package router
import (
"strings"
"github.com/ccfos/nightingale/v6/alert/sender"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
type NotificationResponse struct {
SubRules []SubRule `json:"sub_rules"`
Notifies map[string][]Record `json:"notifies"`
}
type SubRule struct {
SubID int64 `json:"sub_id"`
NotifyRuleId int64 `json:"notify_rule_id"`
Notifies map[string][]Record `json:"notifies"`
}
type Record struct {
NotifyRuleId int64 `json:"notify_rule_id"`
Target string `json:"target"`
Username string `json:"username"`
Status int `json:"status"`
Detail string `json:"detail"`
}
// notificationRecordAdd
func (rt *Router) notificationRecordAdd(c *gin.Context) {
var req []*models.NotificationRecord
ginx.BindJSON(c, &req)
err := sender.PushNotifyRecords(req)
ginx.Dangerous(err, 429)
ginx.NewRender(c).Data(nil, err)
}
func (rt *Router) notificationRecordList(c *gin.Context) {
eid := ginx.UrlParamInt64(c, "eid")
lst, err := models.NotificationRecordsGetByEventId(rt.Ctx, eid)
ginx.Dangerous(err)
response := buildNotificationResponse(rt.Ctx, lst)
ginx.NewRender(c).Data(response, nil)
}
func buildNotificationResponse(ctx *ctx.Context, nl []*models.NotificationRecord) NotificationResponse {
response := NotificationResponse{
SubRules: []SubRule{},
Notifies: make(map[string][]Record),
}
subRuleMap := make(map[int64]*SubRule)
// Collect all group IDs
groupIdSet := make(map[int64]struct{})
// map[SubId]map[Channel]map[Target]index
filter := make(map[int64]map[string]map[string]int)
for i, n := range nl {
// 对相同的 channel-target 进行合并
for _, gid := range n.GetGroupIds(ctx) {
groupIdSet[gid] = struct{}{}
}
if _, exists := filter[n.SubId]; !exists {
filter[n.SubId] = make(map[string]map[string]int)
}
if _, exists := filter[n.SubId][n.Channel]; !exists {
filter[n.SubId][n.Channel] = make(map[string]int)
}
idx, exists := filter[n.SubId][n.Channel][n.Target]
if !exists {
filter[n.SubId][n.Channel][n.Target] = i
} else {
if nl[idx].Status < n.Status {
nl[idx].Status = n.Status
}
nl[idx].Details = nl[idx].Details + ", " + n.Details
nl[i] = nil
}
}
// Fill usernames only once
usernameByTarget := fillUserNames(ctx, groupIdSet)
for _, n := range nl {
if n == nil {
continue
}
m := usernameByTarget[n.Target]
usernames := make([]string, 0, len(m))
for k := range m {
usernames = append(usernames, k)
}
if !checkChannel(n.Channel) {
// Hide sensitive information
n.Target = replaceLastEightChars(n.Target)
}
record := Record{
Target: n.Target,
Status: n.Status,
Detail: n.Details,
NotifyRuleId: n.NotifyRuleID,
}
record.Username = strings.Join(usernames, ",")
if n.SubId > 0 {
// Handle SubRules
subRule, ok := subRuleMap[n.SubId]
if !ok {
newSubRule := &SubRule{
NotifyRuleId: n.NotifyRuleID,
SubID: n.SubId,
}
newSubRule.Notifies = make(map[string][]Record)
newSubRule.Notifies[n.Channel] = []Record{record}
subRuleMap[n.SubId] = newSubRule
} else {
if _, exists := subRule.Notifies[n.Channel]; !exists {
subRule.Notifies[n.Channel] = []Record{record}
} else {
subRule.Notifies[n.Channel] = append(subRule.Notifies[n.Channel], record)
}
}
continue
}
if response.Notifies == nil {
response.Notifies = make(map[string][]Record)
}
if _, exists := response.Notifies[n.Channel]; !exists {
response.Notifies[n.Channel] = []Record{record}
} else {
response.Notifies[n.Channel] = append(response.Notifies[n.Channel], record)
}
}
for _, subRule := range subRuleMap {
response.SubRules = append(response.SubRules, *subRule)
}
return response
}
// check channel is one of the following: tx-sms, tx-voice, ali-sms, ali-voice, email, script
func checkChannel(channel string) bool {
switch channel {
case "tx-sms", "tx-voice", "ali-sms", "ali-voice", "email", "script":
return true
}
return false
}
func replaceLastEightChars(s string) string {
if len(s) <= 8 {
return strings.Repeat("*", len(s))
}
return s[:len(s)-8] + strings.Repeat("*", 8)
}
func fillUserNames(ctx *ctx.Context, groupIdSet map[int64]struct{}) map[string]map[string]struct{} {
userNameByTarget := make(map[string]map[string]struct{})
gids := make([]int64, 0, len(groupIdSet))
for gid := range groupIdSet {
gids = append(gids, gid)
}
users, err := models.UsersGetByGroupIds(ctx, gids)
if err != nil {
logger.Errorf("UsersGetByGroupIds failed, err: %v", err)
return userNameByTarget
}
for _, user := range users {
logger.Warningf("user: %s", user.Username)
for _, ch := range models.DefaultChannels {
target, exist := user.ExtractToken(ch)
if exist {
if _, ok := userNameByTarget[target]; !ok {
userNameByTarget[target] = make(map[string]struct{})
}
userNameByTarget[target][user.Username] = struct{}{}
}
}
}
return userNameByTarget
}

View File

@@ -0,0 +1,414 @@
package router
import (
"bytes"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"sort"
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) notifyChannelsAdd(c *gin.Context) {
me := c.MustGet("user").(*models.User)
var lst []*models.NotifyChannelConfig
ginx.BindJSON(c, &lst)
if len(lst) == 0 {
ginx.Bomb(http.StatusBadRequest, "input json is empty")
}
names := make([]string, 0, len(lst))
for i := range lst {
ginx.Dangerous(lst[i].Verify())
names = append(names, lst[i].Name)
lst[i].CreateBy = me.Username
lst[i].CreateAt = time.Now().Unix()
lst[i].UpdateBy = me.Username
lst[i].UpdateAt = time.Now().Unix()
}
lstWithSameName, err := models.NotifyChannelsGet(rt.Ctx, "name IN ?", names)
ginx.Dangerous(err)
if len(lstWithSameName) > 0 {
ginx.Bomb(http.StatusBadRequest, "name already exists")
}
ids := make([]int64, 0, len(lst))
for _, item := range lst {
err := models.Insert(rt.Ctx, item)
ginx.Dangerous(err)
ids = append(ids, item.ID)
}
ginx.NewRender(c).Data(ids, nil)
}
func (rt *Router) notifyChannelsDel(c *gin.Context) {
var f idsForm
ginx.BindJSON(c, &f)
f.Verify()
lst, err := models.NotifyChannelsGet(rt.Ctx, "id in (?)", f.Ids)
ginx.Dangerous(err)
notifyRuleIds, err := models.UsedByNotifyRule(rt.Ctx, models.NotiChList(lst))
ginx.Dangerous(err)
if len(notifyRuleIds) > 0 {
ginx.NewRender(c).Message(fmt.Errorf("used by notify rule: %v", notifyRuleIds))
return
}
ginx.NewRender(c).Message(models.DB(rt.Ctx).
Delete(&models.NotifyChannelConfig{}, "id in (?)", f.Ids).Error)
}
func (rt *Router) notifyChannelPut(c *gin.Context) {
me := c.MustGet("user").(*models.User)
var f models.NotifyChannelConfig
ginx.BindJSON(c, &f)
lstWithSameName, err := models.NotifyChannelsGet(rt.Ctx, "name = ? and id <> ?", f.Name, f.ID)
ginx.Dangerous(err)
if len(lstWithSameName) > 0 {
ginx.Bomb(http.StatusBadRequest, "name already exists")
}
nc, err := models.NotifyChannelGet(rt.Ctx, "id = ?", ginx.UrlParamInt64(c, "id"))
ginx.Dangerous(err)
if nc == nil {
ginx.Bomb(http.StatusNotFound, "notify channel not found")
}
f.UpdateBy = me.Username
ginx.NewRender(c).Message(nc.Update(rt.Ctx, f))
}
func (rt *Router) notifyChannelGet(c *gin.Context) {
cid := ginx.UrlParamInt64(c, "id")
nc, err := models.NotifyChannelGet(rt.Ctx, "id = ?", cid)
ginx.Dangerous(err)
if nc == nil {
ginx.Bomb(http.StatusNotFound, "notify channel not found")
}
ginx.NewRender(c).Data(nc, nil)
}
func (rt *Router) notifyChannelGetBy(c *gin.Context) {
ident := ginx.QueryStr(c, "ident")
nc, err := models.NotifyChannelGet(rt.Ctx, "ident = ?", ident)
ginx.Dangerous(err)
if nc == nil {
ginx.Bomb(http.StatusNotFound, "notify channel not found")
}
nc.ParamConfig = &models.NotifyParamConfig{}
nc.RequestConfig = &models.RequestConfig{}
ginx.NewRender(c).Data(nc, nil)
}
func (rt *Router) notifyChannelsGet(c *gin.Context) {
lst, err := models.NotifyChannelsGet(rt.Ctx, "", nil)
ginx.NewRender(c).Data(lst, err)
}
func (rt *Router) notifyChannelsGetForNormalUser(c *gin.Context) {
lst, err := models.NotifyChannelsGet(rt.Ctx, "")
ginx.Dangerous(err)
newLst := make([]*models.NotifyChannelConfig, 0, len(lst))
for _, c := range lst {
newLst = append(newLst, &models.NotifyChannelConfig{
ID: c.ID,
Name: c.Name,
Ident: c.Ident,
Enable: c.Enable,
RequestType: c.RequestType,
ParamConfig: c.ParamConfig,
})
}
ginx.NewRender(c).Data(newLst, nil)
}
func (rt *Router) notifyChannelIdentsGet(c *gin.Context) {
// 获取所有通知渠道
channels, err := models.NotifyChannelsGet(rt.Ctx, "", nil)
ginx.Dangerous(err)
// ident 去重
idents := make(map[string]struct{})
for _, channel := range channels {
if channel.Ident != "" {
idents[channel.Ident] = struct{}{}
}
}
lst := make([]string, 0, len(idents))
for ident := range idents {
lst = append(lst, ident)
}
sort.Strings(lst)
ginx.NewRender(c).Data(lst, nil)
}
func (rt *Router) flashDutyNotifyChannelsGet(c *gin.Context) {
cid := ginx.UrlParamInt64(c, "id")
nc, err := models.NotifyChannelGet(rt.Ctx, "id = ?", cid)
ginx.Dangerous(err)
if nc == nil {
ginx.Bomb(http.StatusNotFound, "notify channel not found")
}
configs, err := models.ConfigsSelectByCkey(rt.Ctx, "flashduty_app_key")
if err != nil {
ginx.Bomb(http.StatusInternalServerError, "failed to get flashduty app key")
}
jsonData := []byte("{}")
if len(configs) > 0 {
me := c.MustGet("user").(*models.User)
jsonData = []byte(fmt.Sprintf(`{"member_name":"%s","email":"%s","phone":"%s"}`, me.Username, me.Email, me.Phone))
}
items, err := getFlashDutyChannels(nc.RequestConfig.FlashDutyRequestConfig.IntegrationUrl, jsonData, time.Duration(nc.RequestConfig.FlashDutyRequestConfig.Timeout)*time.Millisecond)
ginx.Dangerous(err)
ginx.NewRender(c).Data(items, nil)
}
type flushDutyChannelsResponse struct {
Error struct {
Code string `json:"code"`
Message string `json:"message"`
} `json:"error"`
Data struct {
Items []FlashDutyChannel `json:"items"`
Total int `json:"total"`
} `json:"data"`
}
type FlashDutyChannel struct {
ChannelID int `json:"channel_id"`
ChannelName string `json:"channel_name"`
Status string `json:"status"`
}
// getFlashDutyChannels 从FlashDuty API获取频道列表
func getFlashDutyChannels(integrationUrl string, jsonData []byte, timeout time.Duration) ([]FlashDutyChannel, error) {
// 解析URL提取baseUrl和参数
baseUrl, integrationKey, err := parseIntegrationUrl(integrationUrl)
if err != nil {
return nil, err
}
if integrationKey == "" {
return nil, fmt.Errorf("integration_key not found in URL")
}
// 构建新的API URL保持原始路径
url := fmt.Sprintf("%s/channel/list-by-integration?integration_key=%s", baseUrl, integrationKey)
req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData))
if err != nil {
return nil, err
}
req.Header.Set("Content-Type", "application/json")
httpResp, err := (&http.Client{
Timeout: timeout,
}).Do(req)
if err != nil {
return nil, err
}
defer httpResp.Body.Close()
body, err := io.ReadAll(httpResp.Body)
if err != nil {
return nil, err
}
var res flushDutyChannelsResponse
if err := json.Unmarshal(body, &res); err != nil {
return nil, err
}
if res.Error.Message != "" {
return nil, fmt.Errorf(res.Error.Message)
}
return res.Data.Items, nil
}
// parseIntegrationUrl 从URL中提取baseUrl和参数
func parseIntegrationUrl(urlStr string) (baseUrl string, integrationKey string, err error) {
// 解析URL
parsedUrl, err := url.Parse(urlStr)
if err != nil {
return "", "", err
}
host := fmt.Sprintf("%s://%s", parsedUrl.Scheme, parsedUrl.Host)
// 提取查询参数
queryParams := parsedUrl.Query()
integrationKey = queryParams.Get("integration_key")
return host, integrationKey, nil
}
func (rt *Router) pagerDutyNotifyServicesGet(c *gin.Context) {
cid := ginx.UrlParamInt64(c, "id")
nc, err := models.NotifyChannelGet(rt.Ctx, "id = ?", cid)
ginx.Dangerous(err)
if err != nil || nc == nil {
ginx.Bomb(http.StatusNotFound, "notify channel not found")
}
items, err := getPagerDutyServices(nc.RequestConfig.PagerDutyRequestConfig.ApiKey, time.Duration(nc.RequestConfig.PagerDutyRequestConfig.Timeout)*time.Millisecond)
if err != nil {
ginx.Bomb(http.StatusInternalServerError, fmt.Sprintf("failed to get pagerduty services: %v", err))
}
// 服务: []集成,扁平化为服务-集成
var flattenedItems []map[string]string
for _, svc := range items {
for _, integ := range svc.Integrations {
flattenedItems = append(flattenedItems, map[string]string{
"service_id": svc.ID,
"service_name": svc.Name,
"integration_summary": integ.Summary,
"integration_id": integ.ID,
"integration_url": integ.Self,
})
}
}
ginx.NewRender(c).Data(flattenedItems, nil)
}
func (rt *Router) pagerDutyIntegrationKeyGet(c *gin.Context) {
serviceId := ginx.UrlParamStr(c, "service_id")
integrationId := ginx.UrlParamStr(c, "integration_id")
cid := ginx.UrlParamInt64(c, "id")
nc, err := models.NotifyChannelGet(rt.Ctx, "id = ?", cid)
ginx.Dangerous(err)
if err != nil || nc == nil {
ginx.Bomb(http.StatusNotFound, "notify channel not found")
}
integrationUrl := fmt.Sprintf("https://api.pagerduty.com/services/%s/integrations/%s", serviceId, integrationId)
integrationKey, err := getPagerDutyIntegrationKey(integrationUrl, nc.RequestConfig.PagerDutyRequestConfig.ApiKey, time.Duration(nc.RequestConfig.PagerDutyRequestConfig.Timeout)*time.Millisecond)
if err != nil {
ginx.Bomb(http.StatusInternalServerError, fmt.Sprintf("failed to get pagerduty integration key: %v", err))
}
ginx.NewRender(c).Data(map[string]string{
"integration_key": integrationKey,
}, nil)
}
type PagerDutyIntegration struct {
ID string `json:"id"`
IntegrationKey string `json:"integration_key"`
Self string `json:"self"` // integration 的 API URL
Summary string `json:"summary"`
}
type PagerDutyService struct {
Name string `json:"name"`
ID string `json:"id"`
Integrations []PagerDutyIntegration `json:"integrations"`
}
// getPagerDutyServices 从 PagerDuty API 分页获取所有服务及其集成信息
func getPagerDutyServices(apiKey string, timeout time.Duration) ([]PagerDutyService, error) {
const limit = 100 // 每页最大数量
var offset uint // 分页偏移量
var allServices []PagerDutyService
for {
// 构建带分页参数的 URL
url := fmt.Sprintf("https://api.pagerduty.com/services?limit=%d&offset=%d", limit, offset)
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, err
}
req.Header.Set("Authorization", fmt.Sprintf("Token token=%s", apiKey))
req.Header.Set("Accept", "application/vnd.pagerduty+json;version=2")
httpResp, err := (&http.Client{Timeout: timeout}).Do(req)
if err != nil {
return nil, err
}
body, err := io.ReadAll(httpResp.Body)
httpResp.Body.Close()
if err != nil {
return nil, err
}
// 定义包含分页信息的响应结构
var serviceRes struct {
Services []PagerDutyService `json:"services"`
More bool `json:"more"` // 是否还有更多数据
Limit uint `json:"limit"`
Offset uint `json:"offset"`
}
if err := json.Unmarshal(body, &serviceRes); err != nil {
return nil, err
}
allServices = append(allServices, serviceRes.Services...)
// 判断是否还有更多数据
if !serviceRes.More || len(serviceRes.Services) < int(limit) {
break
}
offset += limit // 准备请求下一页
}
return allServices, nil
}
// getPagerDutyIntegrationKey 通过 integration 的 API URL 获取 integration key
func getPagerDutyIntegrationKey(integrationUrl, apiKey string, timeout time.Duration) (string, error) {
req, err := http.NewRequest("GET", integrationUrl, nil)
if err != nil {
return "", err
}
req.Header.Set("Authorization", fmt.Sprintf("Token token=%s", apiKey))
httpResp, err := (&http.Client{
Timeout: timeout,
}).Do(req)
if err != nil {
return "", err
}
defer httpResp.Body.Close()
body, err := io.ReadAll(httpResp.Body)
if err != nil {
return "", err
}
var integRes struct {
Integration struct {
IntegrationKey string `json:"integration_key"`
} `json:"integration"`
}
if err := json.Unmarshal(body, &integRes); err != nil {
return "", err
}
return integRes.Integration.IntegrationKey, nil
}

View File

@@ -0,0 +1,17 @@
package router
import (
"fmt"
"testing"
)
func TestGetFlashDutyChannels(t *testing.T) {
// 构造测试数据
integrationUrl := "https://api.flashcat.cloud/event/push/alert/n9e?integration_key=xxx"
jsonData := []byte(`{}`)
// 调用被测试的函数
channels, err := getFlashDutyChannels(integrationUrl, jsonData, 5000)
fmt.Println(channels, err)
}

View File

@@ -2,12 +2,14 @@ package router
import (
"encoding/json"
"fmt"
"strings"
"github.com/ccfos/nightingale/v6/alert/aconf"
"github.com/ccfos/nightingale/v6/alert/sender"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/tplx"
"github.com/gin-gonic/gin"
"github.com/pelletier/go-toml/v2"
@@ -43,8 +45,8 @@ func (rt *Router) webhookPuts(c *gin.Context) {
data, err := json.Marshal(webhooks)
ginx.Dangerous(err)
ginx.NewRender(c).Message(models.ConfigsSet(rt.Ctx, models.WEBHOOKKEY, string(data)))
username := c.MustGet("username").(string)
ginx.NewRender(c).Message(models.ConfigsSetWithUname(rt.Ctx, models.WEBHOOKKEY, string(data), username))
}
func (rt *Router) notifyScriptGet(c *gin.Context) {
@@ -67,8 +69,8 @@ func (rt *Router) notifyScriptPut(c *gin.Context) {
data, err := json.Marshal(notifyScript)
ginx.Dangerous(err)
ginx.NewRender(c).Message(models.ConfigsSet(rt.Ctx, models.NOTIFYSCRIPT, string(data)))
username := c.MustGet("username").(string)
ginx.NewRender(c).Message(models.ConfigsSetWithUname(rt.Ctx, models.NOTIFYSCRIPT, string(data), username))
}
func (rt *Router) notifyChannelGets(c *gin.Context) {
@@ -88,7 +90,8 @@ func (rt *Router) notifyChannelPuts(c *gin.Context) {
var notifyChannels []models.NotifyChannel
ginx.BindJSON(c, &notifyChannels)
channels := []string{models.Dingtalk, models.Wecom, models.Feishu, models.Mm, models.Telegram, models.Email}
channels := []string{models.Dingtalk, models.Wecom, models.Feishu, models.Mm, models.Telegram,
models.Email, models.Lark, models.LarkCard}
m := make(map[string]struct{})
for _, v := range notifyChannels {
@@ -103,12 +106,12 @@ func (rt *Router) notifyChannelPuts(c *gin.Context) {
data, err := json.Marshal(notifyChannels)
ginx.Dangerous(err)
ginx.NewRender(c).Message(models.ConfigsSet(rt.Ctx, models.NOTIFYCHANNEL, string(data)))
username := c.MustGet("username").(string)
ginx.NewRender(c).Message(models.ConfigsSetWithUname(rt.Ctx, models.NOTIFYCHANNEL, string(data), username))
}
func (rt *Router) notifyContactGets(c *gin.Context) {
var notifyContacts []models.NotifyContact
notifyContacts := []models.NotifyContact{}
cval, err := models.ConfigsGet(rt.Ctx, models.NOTIFYCONTACT)
ginx.Dangerous(err)
if cval == "" {
@@ -117,6 +120,7 @@ func (rt *Router) notifyContactGets(c *gin.Context) {
}
err = json.Unmarshal([]byte(cval), &notifyContacts)
ginx.NewRender(c).Data(notifyContacts, err)
}
@@ -124,23 +128,10 @@ func (rt *Router) notifyContactPuts(c *gin.Context) {
var notifyContacts []models.NotifyContact
ginx.BindJSON(c, &notifyContacts)
keys := []string{models.DingtalkKey, models.WecomKey, models.FeishuKey, models.MmKey, models.TelegramKey}
m := make(map[string]struct{})
for _, v := range notifyContacts {
m[v.Ident] = struct{}{}
}
for _, v := range keys {
if _, ok := m[v]; !ok {
ginx.Bomb(200, "contact %s ident can not modify", v)
}
}
data, err := json.Marshal(notifyContacts)
ginx.Dangerous(err)
ginx.NewRender(c).Message(models.ConfigsSet(rt.Ctx, models.NOTIFYCONTACT, string(data)))
username := c.MustGet("username").(string)
ginx.NewRender(c).Message(models.ConfigsSetWithUname(rt.Ctx, models.NOTIFYCONTACT, string(data), username))
}
func (rt *Router) notifyConfigGet(c *gin.Context) {
@@ -160,42 +151,41 @@ func (rt *Router) notifyConfigGet(c *gin.Context) {
func (rt *Router) notifyConfigPut(c *gin.Context) {
var f models.Configs
ginx.BindJSON(c, &f)
userVariableMap := rt.NotifyConfigCache.ConfigCache.Get()
text := tplx.ReplaceTemplateUseText(f.Ckey, f.Cval, userVariableMap)
switch f.Ckey {
case models.SMTP:
var smtp aconf.SMTPConfig
err := toml.Unmarshal([]byte(f.Cval), &smtp)
ginx.Dangerous(err)
case models.IBEX:
var ibex aconf.Ibex
err := toml.Unmarshal([]byte(f.Cval), &ibex)
err := toml.Unmarshal([]byte(text), &smtp)
ginx.Dangerous(err)
default:
ginx.Bomb(200, "key %s can not modify", f.Ckey)
}
err := models.ConfigsSet(rt.Ctx, f.Ckey, f.Cval)
if err != nil {
ginx.Bomb(200, err.Error())
}
username := c.MustGet("username").(string)
//insert or update built-in config
ginx.Dangerous(models.ConfigsSetWithUname(rt.Ctx, f.Ckey, f.Cval, username))
if f.Ckey == models.SMTP {
// 重置邮件发送器
smtp := smtpValidate(f.Cval)
go sender.RestartEmailSender(smtp)
smtp, errSmtp := SmtpValidate(text)
ginx.Dangerous(errSmtp)
go sender.RestartEmailSender(rt.Ctx, smtp)
}
ginx.NewRender(c).Message(nil)
}
func smtpValidate(smtpStr string) aconf.SMTPConfig {
func SmtpValidate(text string) (aconf.SMTPConfig, error) {
var smtp aconf.SMTPConfig
ginx.Dangerous(toml.Unmarshal([]byte(smtpStr), &smtp))
var err error
if smtp.Host == "" || smtp.Port == 0 {
ginx.Bomb(200, "smtp host or port can not be empty")
err = toml.Unmarshal([]byte(text), &smtp)
if err != nil {
return smtp, err
}
return smtp
if smtp.Host == "" || smtp.Port == 0 {
return smtp, fmt.Errorf("smtp host or port can not be empty")
}
return smtp, err
}
type form struct {
@@ -215,7 +205,22 @@ func (rt *Router) attemptSendEmail(c *gin.Context) {
if f.Ckey != models.SMTP {
ginx.Bomb(200, "config(%v) invalid", f)
}
smtp := smtpValidate(f.Cval)
userVariableMap := rt.NotifyConfigCache.ConfigCache.Get()
text := tplx.ReplaceTemplateUseText(f.Ckey, f.Cval, userVariableMap)
smtp, err := SmtpValidate(text)
ginx.Dangerous(err)
ginx.NewRender(c).Message(sender.SendEmail("Email test", "email content", []string{f.Email}, smtp))
}
func (rt *Router) notifyChannelConfigGets(c *gin.Context) {
id := ginx.QueryInt64(c, "id", 0)
name := ginx.QueryStr(c, "name", "")
ident := ginx.QueryStr(c, "ident", "")
enabled := ginx.QueryInt(c, "enabled", -1)
notifyChannels, err := models.NotifyChannelGets(rt.Ctx, id, name, ident, enabled)
ginx.NewRender(c).Data(notifyChannels, err)
}

View File

@@ -0,0 +1,360 @@
package router
import (
"fmt"
"net/http"
"time"
"github.com/ccfos/nightingale/v6/alert/dispatch"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/slice"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
func (rt *Router) notifyRulesAdd(c *gin.Context) {
var lst []*models.NotifyRule
ginx.BindJSON(c, &lst)
if len(lst) == 0 {
ginx.Bomb(http.StatusBadRequest, "input json is empty")
}
me := c.MustGet("user").(*models.User)
isAdmin := me.IsAdmin()
gids, err := models.MyGroupIds(rt.Ctx, me.Id)
ginx.Dangerous(err)
now := time.Now().Unix()
for _, nr := range lst {
ginx.Dangerous(nr.Verify())
if !isAdmin && !slice.HaveIntersection(gids, nr.UserGroupIds) {
ginx.Bomb(http.StatusForbidden, "forbidden")
}
nr.CreateBy = me.Username
nr.CreateAt = now
nr.UpdateBy = me.Username
nr.UpdateAt = now
err := models.Insert(rt.Ctx, nr)
ginx.Dangerous(err)
}
ginx.NewRender(c).Data(lst, nil)
}
func (rt *Router) notifyRulesDel(c *gin.Context) {
var f idsForm
ginx.BindJSON(c, &f)
f.Verify()
if me := c.MustGet("user").(*models.User); !me.IsAdmin() {
lst, err := models.NotifyRulesGet(rt.Ctx, "id in (?)", f.Ids)
ginx.Dangerous(err)
gids, err := models.MyGroupIds(rt.Ctx, me.Id)
ginx.Dangerous(err)
for _, t := range lst {
if !slice.HaveIntersection(gids, t.UserGroupIds) {
ginx.Bomb(http.StatusForbidden, "forbidden")
}
}
}
ginx.NewRender(c).Message(models.DB(rt.Ctx).
Delete(&models.NotifyRule{}, "id in (?)", f.Ids).Error)
}
func (rt *Router) notifyRulePut(c *gin.Context) {
var f models.NotifyRule
ginx.BindJSON(c, &f)
nr, err := models.NotifyRuleGet(rt.Ctx, "id = ?", ginx.UrlParamInt64(c, "id"))
ginx.Dangerous(err)
if nr == nil {
ginx.Bomb(http.StatusNotFound, "notify rule not found")
}
me := c.MustGet("user").(*models.User)
gids, err := models.MyGroupIds(rt.Ctx, me.Id)
ginx.Dangerous(err)
if !slice.HaveIntersection(gids, nr.UserGroupIds) && !me.IsAdmin() {
ginx.Bomb(http.StatusForbidden, "forbidden")
}
f.UpdateBy = me.Username
ginx.NewRender(c).Message(nr.Update(rt.Ctx, f))
}
func (rt *Router) notifyRuleGet(c *gin.Context) {
me := c.MustGet("user").(*models.User)
gids, err := models.MyGroupIds(rt.Ctx, me.Id)
ginx.Dangerous(err)
tid := ginx.UrlParamInt64(c, "id")
nr, err := models.NotifyRuleGet(rt.Ctx, "id = ?", tid)
ginx.Dangerous(err)
if nr == nil {
ginx.Bomb(http.StatusNotFound, "notify rule not found")
}
if !slice.HaveIntersection(gids, nr.UserGroupIds) && !me.IsAdmin() {
ginx.Bomb(http.StatusForbidden, "forbidden")
}
ginx.NewRender(c).Data(nr, nil)
}
func (rt *Router) notifyRulesGetByService(c *gin.Context) {
ginx.NewRender(c).Data(models.NotifyRulesGet(rt.Ctx, "enable = ?", true))
}
func (rt *Router) notifyRulesGet(c *gin.Context) {
me := c.MustGet("user").(*models.User)
gids, err := models.MyGroupIds(rt.Ctx, me.Id)
ginx.Dangerous(err)
lst, err := models.NotifyRulesGet(rt.Ctx, "", nil)
ginx.Dangerous(err)
if me.IsAdmin() {
ginx.NewRender(c).Data(lst, nil)
return
}
res := make([]*models.NotifyRule, 0)
for _, nr := range lst {
if slice.HaveIntersection[int64](gids, nr.UserGroupIds) {
res = append(res, nr)
}
}
ginx.NewRender(c).Data(res, nil)
}
type NotifyTestForm struct {
EventIDs []int64 `json:"event_ids" binding:"required"`
NotifyConfig models.NotifyConfig `json:"notify_config" binding:"required"`
}
func (rt *Router) notifyTest(c *gin.Context) {
var f NotifyTestForm
ginx.BindJSON(c, &f)
hisEvents, err := models.AlertHisEventGetByIds(rt.Ctx, f.EventIDs)
ginx.Dangerous(err)
if len(hisEvents) == 0 {
ginx.Bomb(http.StatusBadRequest, "event not found")
}
ginx.Dangerous(err)
events := []*models.AlertCurEvent{}
for _, he := range hisEvents {
event := he.ToCur()
event.SetTagsMap()
if err := dispatch.NotifyRuleMatchCheck(&f.NotifyConfig, event); err != nil {
ginx.Bomb(http.StatusBadRequest, err.Error())
}
events = append(events, event)
}
resp, err := SendNotifyChannelMessage(rt.Ctx, rt.UserCache, rt.UserGroupCache, f.NotifyConfig, events)
if resp == "" {
resp = "success"
}
ginx.NewRender(c).Data(resp, err)
}
func SendNotifyChannelMessage(ctx *ctx.Context, userCache *memsto.UserCacheType, userGroup *memsto.UserGroupCacheType, notifyConfig models.NotifyConfig, events []*models.AlertCurEvent) (string, error) {
notifyChannels, err := models.NotifyChannelGets(ctx, notifyConfig.ChannelID, "", "", -1)
if err != nil {
return "", fmt.Errorf("failed to get notify channels: %v", err)
}
if len(notifyChannels) == 0 {
return "", fmt.Errorf("notify channel not found")
}
notifyChannel := notifyChannels[0]
if !notifyChannel.Enable {
return "", fmt.Errorf("notify channel not enabled, please enable it first")
}
// 获取站点URL用于模板渲染
siteUrl, _ := models.ConfigsGetSiteUrl(ctx)
if siteUrl == "" {
siteUrl = "http://127.0.0.1:17000"
}
tplContent := make(map[string]interface{})
if notifyChannel.RequestType != "flashduty" {
messageTemplates, err := models.MessageTemplateGets(ctx, notifyConfig.TemplateID, "", "")
if err != nil {
return "", fmt.Errorf("failed to get message templates: %v", err)
}
if len(messageTemplates) == 0 {
return "", fmt.Errorf("message template not found")
}
tplContent = messageTemplates[0].RenderEvent(events, siteUrl)
}
var contactKey string
if notifyChannel.ParamConfig != nil && notifyChannel.ParamConfig.UserInfo != nil {
contactKey = notifyChannel.ParamConfig.UserInfo.ContactKey
}
sendtos, flashDutyChannelIDs, pagerDutyRoutingKeys, customParams := dispatch.GetNotifyConfigParams(&notifyConfig, contactKey, userCache, userGroup)
var resp string
switch notifyChannel.RequestType {
case "flashduty":
client, err := models.GetHTTPClient(notifyChannel)
if err != nil {
return "", fmt.Errorf("failed to get http client: %v", err)
}
for i := range flashDutyChannelIDs {
resp, err = notifyChannel.SendFlashDuty(events, flashDutyChannelIDs[i], client)
if err != nil {
return "", fmt.Errorf("failed to send flashduty notify: %v", err)
}
}
logger.Infof("channel_name: %v, event:%+v, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0], tplContent, customParams, resp, err)
return resp, nil
case "pagerduty":
client, err := models.GetHTTPClient(notifyChannel)
if err != nil {
return "", fmt.Errorf("failed to get http client: %v", err)
}
for _, routingKey := range pagerDutyRoutingKeys {
resp, err = notifyChannel.SendPagerDuty(events, routingKey, siteUrl, client)
if err != nil {
return "", fmt.Errorf("failed to send pagerduty notify: %v", err)
}
}
logger.Infof("channel_name: %v, event:%+v, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0], tplContent, customParams, resp, err)
return resp, nil
case "http":
client, err := models.GetHTTPClient(notifyChannel)
if err != nil {
return "", fmt.Errorf("failed to get http client: %v", err)
}
if notifyChannel.RequestConfig == nil {
return "", fmt.Errorf("request config is nil")
}
if notifyChannel.RequestConfig.HTTPRequestConfig == nil {
return "", fmt.Errorf("http request config is nil")
}
if dispatch.NeedBatchContacts(notifyChannel.RequestConfig.HTTPRequestConfig) || len(sendtos) == 0 {
resp, err = notifyChannel.SendHTTP(events, tplContent, customParams, sendtos, client)
logger.Infof("channel_name: %v, event:%+v, sendtos:%+v, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0], sendtos, tplContent, customParams, resp, err)
if err != nil {
return "", fmt.Errorf("failed to send http notify: %v", err)
}
return resp, nil
} else {
for i := range sendtos {
resp, err = notifyChannel.SendHTTP(events, tplContent, customParams, []string{sendtos[i]}, client)
logger.Infof("channel_name: %v, event:%+v, tplContent:%s, customParams:%v, sendto:%+v, respBody: %v, err: %v", notifyChannel.Name, events[0], tplContent, customParams, sendtos[i], resp, err)
if err != nil {
return "", fmt.Errorf("failed to send http notify: %v", err)
}
}
return resp, nil
}
case "smtp":
if len(sendtos) == 0 {
return "", fmt.Errorf("no valid email address in the user and team")
}
err := notifyChannel.SendEmailNow(events, tplContent, sendtos)
if err != nil {
return "", fmt.Errorf("failed to send email notify: %v", err)
}
return resp, nil
case "script":
resp, _, err := notifyChannel.SendScript(events, tplContent, customParams, sendtos)
logger.Infof("channel_name: %v, event:%+v, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0], tplContent, customParams, resp, err)
return resp, err
default:
logger.Errorf("unsupported request type: %v", notifyChannel.RequestType)
return "", fmt.Errorf("unsupported request type")
}
}
type paramList struct {
Name string `json:"name"`
CName string `json:"cname"`
Value interface{} `json:"value"`
}
func (rt *Router) notifyRuleCustomParamsGet(c *gin.Context) {
notifyChannelID := ginx.QueryInt64(c, "notify_channel_id")
me := c.MustGet("user").(*models.User)
gids, err := models.MyGroupIds(rt.Ctx, me.Id)
ginx.Dangerous(err)
notifyChannel, err := models.NotifyChannelGet(rt.Ctx, "id=?", notifyChannelID)
ginx.Dangerous(err)
keyMap := make(map[string]string)
if notifyChannel == nil {
ginx.NewRender(c).Data([][]paramList{}, nil)
return
}
if notifyChannel.ParamConfig == nil {
ginx.NewRender(c).Data([][]paramList{}, nil)
return
}
for _, param := range notifyChannel.ParamConfig.Custom.Params {
keyMap[param.Key] = param.CName
}
lst, err := models.NotifyRulesGet(rt.Ctx, "", nil)
ginx.Dangerous(err)
res := make([][]paramList, 0)
filter := make(map[string]struct{})
for _, nr := range lst {
if !slice.HaveIntersection[int64](gids, nr.UserGroupIds) {
continue
}
for _, nc := range nr.NotifyConfigs {
if nc.ChannelID != notifyChannelID {
continue
}
list := make([]paramList, 0)
filterKey := ""
for key, value := range nc.Params {
// 找到在通知媒介中的自定义变量配置项,进行 cname 转换
cname, exists := keyMap[key]
if exists {
list = append(list, paramList{
Name: key,
CName: cname,
Value: value,
})
}
filterKey += fmt.Sprintf("%s:%s,", key, value)
}
if _, ok := filter[filterKey]; ok {
continue
}
filter[filterKey] = struct{}{}
res = append(res, list)
}
}
ginx.NewRender(c).Data(res, nil)
}

View File

@@ -6,6 +6,7 @@ import (
"fmt"
"html/template"
"strings"
"time"
"github.com/ccfos/nightingale/v6/center/cconf"
"github.com/ccfos/nightingale/v6/models"
@@ -34,10 +35,22 @@ func (rt *Router) notifyTplGets(c *gin.Context) {
}
func (rt *Router) notifyTplUpdateContent(c *gin.Context) {
user := c.MustGet("user").(*models.User)
var f models.NotifyTpl
ginx.BindJSON(c, &f)
ginx.Dangerous(templateValidate(f))
notifyTpl, err := models.NotifyTplGet(rt.Ctx, f.Id)
ginx.Dangerous(err)
if notifyTpl.CreateBy != user.Username && !user.IsAdmin() {
ginx.Bomb(403, "forbidden")
}
f.UpdateAt = time.Now().Unix()
f.UpdateBy = user.Username
ginx.NewRender(c).Message(f.UpdateContent(rt.Ctx))
}
@@ -45,8 +58,27 @@ func (rt *Router) notifyTplUpdate(c *gin.Context) {
var f models.NotifyTpl
ginx.BindJSON(c, &f)
ginx.Dangerous(templateValidate(f))
user := c.MustGet("user").(*models.User)
ginx.NewRender(c).Message(f.Update(rt.Ctx))
notifyTpl, err := models.NotifyTplGet(rt.Ctx, f.Id)
ginx.Dangerous(err)
if notifyTpl.CreateBy != user.Username && !user.IsAdmin() {
ginx.Bomb(403, "forbidden")
}
// get the count of the same channel and name but different id
count, err := models.Count(models.DB(rt.Ctx).Model(&models.NotifyTpl{}).Where("(channel = ? or name = ?) and id <> ?", f.Channel, f.Name, f.Id))
ginx.Dangerous(err)
if count != 0 {
ginx.Bomb(200, "Refuse to create duplicate channel or name")
}
notifyTpl.UpdateAt = time.Now().Unix()
notifyTpl.UpdateBy = user.Username
notifyTpl.Name = f.Name
ginx.NewRender(c).Message(notifyTpl.Update(rt.Ctx))
}
func templateValidate(f models.NotifyTpl) error {
@@ -106,7 +138,7 @@ func (rt *Router) notifyTplPreview(c *gin.Context) {
continue
}
arr := strings.Split(pair, "=")
arr := strings.SplitN(pair, "=", 2)
if len(arr) != 2 {
continue
}
@@ -129,14 +161,20 @@ func (rt *Router) notifyTplPreview(c *gin.Context) {
func (rt *Router) notifyTplAdd(c *gin.Context) {
var f models.NotifyTpl
ginx.BindJSON(c, &f)
user := c.MustGet("user").(*models.User)
f.CreateBy = user.Username
f.Channel = strings.TrimSpace(f.Channel)
ginx.Dangerous(templateValidate(f))
count, err := models.NotifyTplCountByChannel(rt.Ctx, f.Channel)
count, err := models.Count(models.DB(rt.Ctx).Model(&models.NotifyTpl{}).Where("channel = ? or name = ?", f.Channel, f.Name))
ginx.Dangerous(err)
if count != 0 {
ginx.Bomb(200, "Refuse to create duplicate channel(unique)")
}
f.CreateAt = time.Now().Unix()
ginx.NewRender(c).Message(f.Create(rt.Ctx))
}
@@ -144,5 +182,24 @@ func (rt *Router) notifyTplAdd(c *gin.Context) {
func (rt *Router) notifyTplDel(c *gin.Context) {
f := new(models.NotifyTpl)
id := ginx.UrlParamInt64(c, "id")
user := c.MustGet("user").(*models.User)
notifyTpl, err := models.NotifyTplGet(rt.Ctx, id)
ginx.Dangerous(err)
if notifyTpl.CreateBy != user.Username && !user.IsAdmin() {
ginx.Bomb(403, "forbidden")
}
ginx.NewRender(c).Message(f.NotifyTplDelete(rt.Ctx, id))
}
func (rt *Router) messageTemplateGets(c *gin.Context) {
id := ginx.QueryInt64(c, "id", 0)
name := ginx.QueryStr(c, "name", "")
ident := ginx.QueryStr(c, "ident", "")
tpls, err := models.MessageTemplateGets(rt.Ctx, id, name, ident)
ginx.NewRender(c).Data(tpls, err)
}

View File

@@ -0,0 +1,58 @@
package router
import (
"github.com/ccfos/nightingale/v6/datasource/opensearch"
"github.com/ccfos/nightingale/v6/dscache"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
func (rt *Router) QueryOSIndices(c *gin.Context) {
var f IndexReq
ginx.BindJSON(c, &f)
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logger.Warningf("cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
indices, err := plug.(*opensearch.OpenSearch).QueryIndices()
ginx.Dangerous(err)
ginx.NewRender(c).Data(indices, nil)
}
func (rt *Router) QueryOSFields(c *gin.Context) {
var f IndexReq
ginx.BindJSON(c, &f)
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logger.Warningf("cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
fields, err := plug.(*opensearch.OpenSearch).QueryFields([]string{f.Index})
ginx.Dangerous(err)
ginx.NewRender(c).Data(fields, nil)
}
func (rt *Router) QueryOSVariable(c *gin.Context) {
var f FieldValueReq
ginx.BindJSON(c, &f)
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logger.Warningf("cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
fields, err := plug.(*opensearch.OpenSearch).QueryFieldValue([]string{f.Index}, f.Query.Field, f.Query.Query)
ginx.Dangerous(err)
ginx.NewRender(c).Data(fields, nil)
}

Some files were not shown because too many files have changed in this diff Show More