Compare commits

...

307 Commits

Author SHA1 Message Date
Yening Qin
b49ab44818 refactor: http support tracing (#3083) 2026-02-12 17:00:45 +08:00
yuansheng
4d37594a0a feat: alert time tz support (#3081) 2026-02-11 19:15:09 +08:00
ning
5b941d2ce5 optimize event detail api 2026-02-11 11:45:54 +08:00
Yening Qin
932199fde1 refactor: add alert eval detail debug api (#3080) 2026-02-10 22:40:23 +08:00
ning
5d1636d1a5 fix: delete dup api 2026-02-10 20:13:29 +08:00
Yening Qin
6167eb3b13 refactor: event debug api (#3079) 2026-02-10 18:35:00 +08:00
huangjie
5beee98cde refactor: feishu sso support default usergroup (#3078) 2026-02-10 17:49:18 +08:00
Yening Qin
c34c008080 update list api 2026-02-10 10:38:09 +08:00
laiwei
75218f9d5a Revise README_zh.md for clarity and new features
Updated the description to emphasize the open-source monitoring alert management aspect and added information about the MCP-Server.
2026-02-07 17:15:57 +08:00
laiwei
341f82ecde Update README with MCP-Server launch details
Added information about the MCP-Server and its capabilities.
2026-02-07 17:12:40 +08:00
liufuniu
a6056a5fab fix: doris datasource equal (#3074) 2026-02-05 17:45:47 +08:00
liufuniu
01e8370882 refactor: update doris datasource (#3071) 2026-02-05 13:42:17 +08:00
ning
8b11e18754 refactor: optimize es query data 2026-02-05 12:10:41 +08:00
liufuniu
aa749065da refactor: doris datasource add write user (#3067) 2026-02-04 17:27:46 +08:00
ning
f5811bc5f7 refactor: datasource add weight 2026-02-04 16:00:13 +08:00
ning
5de63d7307 refactor: optimize es query error 2026-02-03 11:52:34 +08:00
ning
6a44da4dda refactor: optimize es query error 2026-02-03 11:33:01 +08:00
ning
0a65616fbb refactor: recording rule api 2026-02-02 16:29:46 +08:00
ning
a0e8c5f764 refactor: change ident meta mset 2026-02-02 15:14:19 +08:00
ning
d64dbb6909 refactor: recording rule api 2026-02-02 14:42:29 +08:00
Yening Qin
656b91e976 refactor: update heartbeat (#3060) 2026-01-29 16:03:00 +08:00
ning
fe6dce403f brain fix get datasource 2026-01-28 10:51:18 +08:00
zhang fugui
faa348a086 refactor(cfg): replace ioutil.ReadFile with os.ReadFile (#3050)
Replace ioutil.ReadFile with os.ReadFile, as the ioutil package has been deprecated.
2026-01-24 21:30:24 +08:00
ning
635b781ae1 refactor: alert rule check append tags 2026-01-24 21:21:14 +08:00
ning
f60771ad9c optimize drop sample 2026-01-23 15:28:38 +08:00
ning
6bd2f9a89f optimize drop sample 2026-01-23 15:27:43 +08:00
Yening Qin
a76049822c refactor: update workflow (#3052) 2026-01-22 19:56:33 +08:00
ning
97746f7469 refactor: update init metrics tpl 2026-01-21 19:46:19 +08:00
z
903d75e4b8 refactor: add base64 encoding and decoding tpl functions (#3044)
* Add base64 encoding and decoding functions

* Add base64 encoding package import

* Return original string on base64 decode error

---------

Co-authored-by: Yening Qin <710leo@gmail.com>
2026-01-21 16:30:45 +08:00
ning
42637e546d refactor: update doris check max rows 2026-01-21 16:02:42 +08:00
ning
7bf000932d fix save workflow execution 2026-01-20 21:49:06 +08:00
yuansheng
3202cd1410 refactor: record_rule support writeback_enabled (#3048) 2026-01-20 19:16:08 +08:00
ning
e28dd079f9 refactor: update doris check max rows 2026-01-20 16:34:24 +08:00
huangjie
72cb35a4ed sso add feishu (#3046) 2026-01-19 14:32:27 +08:00
ning
80d0193ac0 docs: add migrate sql 2026-01-16 17:29:35 +08:00
ning
54a8e2590e refactor: optimize tplx printf 2026-01-15 21:06:53 +08:00
ning
b296d5bcc3 refactor: update trigger value 2026-01-14 16:15:39 +08:00
ning
996c9812bd refactor: cron delete pipeline execution 2026-01-13 15:35:14 +08:00
ning
0f8bb8b2af refactor: cron delete pipeline execution 2026-01-13 15:15:46 +08:00
ning
8c54a97292 refactor: event pipeline api 2026-01-13 12:08:27 +08:00
ning
47cab69088 fix: event drop processor 2026-01-12 19:53:45 +08:00
ning
c432636d8d fix: send flashduty set proxy 2026-01-12 19:39:03 +08:00
Yening Qin
959b0389c6 refactor: udpate workflow (#3028) 2026-01-11 19:37:09 +08:00
ning
3d8f1b3ef5 refactor: update database init 2026-01-09 17:25:16 +08:00
ning
ce838036ad refactor: database init 2026-01-09 17:19:16 +08:00
ning
578ac096e5 docs: fix dash tpl 2026-01-09 15:45:51 +08:00
promalert
48ee6117e9 chore: execute goimports to format the code (#3031)
Signed-off-by: promalert <promalert@outlook.com>
2026-01-07 16:54:35 +08:00
ning
5afd6a60e9 update site info 2026-01-06 15:22:36 +08:00
ning
37372ae9ea refactor: notify channel weight 2026-01-06 10:56:36 +08:00
Snowykami
48e7c34ebf feat: notify channel support JIRA Issue and JSM Alert (#2989) 2026-01-06 10:31:02 +08:00
ning
acd0ec4bef refactor: update change-pass api 2026-01-05 19:35:32 +08:00
ning
c1ad946bc5 Merge branch 'main' of github.com:ccfos/nightingale 2026-01-05 18:02:27 +08:00
ning
4c2affc7da refactor: notify record add status code 2026-01-05 18:01:50 +08:00
SenCoder
273d282beb feat: alert eval sql support go template (#3024) 2026-01-05 15:47:55 +08:00
Yening Qin
3e86656381 feat: prom support mtls (#3029) 2026-01-05 10:40:07 +08:00
Yening Qin
f942772d2b refactor: builtin metrics add params (#3027) 2026-01-04 14:11:30 +08:00
ning
fbc0c22d7a Merge branch 'main' of github.com:ccfos/nightingale 2026-01-04 10:18:21 +08:00
ning
abd452a6df fix: migrate BuiltinPayload 2026-01-03 23:19:22 +08:00
Ulric Qin
47f05627d9 refactor the start command of nightingale 2026-01-03 08:30:21 +08:00
Ulric Qin
edd8e2a3db Merge branch 'main' of https://github.com/ccfos/nightingale 2026-01-03 08:20:17 +08:00
Ulric Qin
c4ca2920ef no need to init Admin operations 2026-01-03 08:20:10 +08:00
ning
afc8d7d21c fix: query data 2025-12-30 19:25:24 +08:00
ning
c0e13e2870 update ds perm check 2025-12-30 16:47:42 +08:00
ning
4f186a71ba fix: datasource delete 2025-12-30 16:35:36 +08:00
ning
104c275f2d refactor: doris datasource conf 2025-12-29 20:36:17 +08:00
pioneerlfn
2ba7a970e8 fix: doris exec sql timeout unit: s -> ms (#3019) 2025-12-29 16:08:45 +08:00
ning
c98241b3fd fix: search view api 2025-12-26 14:47:57 +08:00
ning
b30caf625b refactor: save view check name 2025-12-26 12:11:12 +08:00
SenCoder
32e8b961c2 refactor: add args parameter to CallIbex 2025-12-25 14:51:27 +08:00
ning
2ff0a8fdbb Merge branch 'main' of github.com:ccfos/nightingale 2025-12-25 14:42:33 +08:00
ning
7ff74d0948 fix: es query use ASCII control character as label separator to avoid truncation when user data contains -- 2025-12-25 14:42:16 +08:00
pioneerlfn
da58d825c0 refactor: doris add method showIndexes (#3011) 2025-12-25 11:48:29 +08:00
SenCoder
0014b77c4d refactor: change canDoIbex func to public (#3010) 2025-12-24 21:21:01 +08:00
Yening Qin
fc7fdde2d5 feat: support search view save (#3009) 2025-12-24 17:52:45 +08:00
pioneerlfn
61b63fc75c fix: doris query logs with interval (#3008) 2025-12-24 12:14:12 +08:00
pioneerlfn
80f564ec63 refactor: doris query data (#3003) 2025-12-22 16:04:36 +08:00
Ulric Qin
203c2a885b eval log: use Warn level when error message is not blank 2025-12-22 14:39:07 +08:00
ning
9bee3e1379 fix: vlogs query 2025-12-20 19:52:29 +08:00
ning
c214580e87 refactor: trim prom param 2025-12-18 19:45:06 +08:00
ning
f6faed0659 fix: webhook connection leak 2025-12-17 18:07:43 +08:00
ning
990819d6c1 fix: webhook connection leak 2025-12-17 18:01:28 +08:00
SenCoder
5fff517cce refactor: callibex return task Id (#2994) 2025-12-12 18:45:09 +08:00
ning
db1bb34277 refactor: index pattern delete check 2025-12-11 15:31:49 +08:00
ning
81e37c9ed4 refactor: removes invisible characters from user contacts 2025-12-11 14:52:57 +08:00
pioneerlfn
27ec6a2d04 fix: doris macro/time (#2990) 2025-12-11 12:27:54 +08:00
ning
372a8cff2f refactor: builtin tpl add 2025-12-08 16:53:58 +08:00
Yening Qin
68850800ed feat: support victorialogs alert (#2988) 2025-12-05 14:51:58 +08:00
Busyster996
717f7f1c4b docs: dockerfile add jinja2 (#2982) 2025-12-05 14:47:58 +08:00
Snowykami
82e1e715ad fix: remove elasticsearch version validate (#2986) 2025-12-05 14:47:10 +08:00
ning
d1058639fc fix: event concurrent map writes 2025-12-04 20:16:21 +08:00
ning
709eda93a8 fix: edge get datasource config 2025-12-03 11:41:20 +08:00
SenCoder
48e69449c5 fix: edge panic (#2983) 2025-12-01 21:47:08 +08:00
SenCoder
e5218bdba0 refactor: add config OpenRSA in Center.RSA. if open, settings/auth will encode in api "/v1/n9e/datasources" (#2981) 2025-11-29 13:33:21 +08:00
ning
543b334e64 refactor: update process handle log 2025-11-28 11:51:57 +08:00
ning
3644200488 Merge branch 'main' of github.com:ccfos/nightingale 2025-11-27 21:04:22 +08:00
ning
ceddf1f552 docs: update alert_rule model 2025-11-27 20:59:24 +08:00
SenCoder
faa4c4f438 refactor: es datasource QueryMapData (#2978) 2025-11-27 20:16:06 +08:00
laiwei
4f8b6157a3 Revise CCF ODTC and Nightingale project descriptions of ZH 2025-11-25 20:13:18 +08:00
ning
7fd7040c7f refactor: update gomod 2025-11-24 14:48:02 +08:00
jie210
7fa1a41437 feat: sso support dingtalk (#2968) 2025-11-24 14:19:23 +08:00
ning
f7b406078f refactor: user group api 2025-11-21 11:30:11 +08:00
dependabot[bot]
f6b10403d9 chore(deps): bump golang.org/x/crypto from 0.32.0 to 0.45.0 (#2966)
Bumps [golang.org/x/crypto](https://github.com/golang/crypto) from 0.32.0 to 0.45.0.
- [Commits](https://github.com/golang/crypto/compare/v0.32.0...v0.45.0)

---
updated-dependencies:
- dependency-name: golang.org/x/crypto
  dependency-version: 0.45.0
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-11-20 19:22:52 +08:00
cyberslack_lee
f4ce0bccfc chore: fix error typos (#2958) 2025-11-20 19:20:46 +08:00
Snowykami
f26ce4487d feat: notify config support target biz group filter (#2964) 2025-11-20 17:02:40 +08:00
Ulric Qin
9f31f3b57d update integrations: delete unnecessary docs 2025-11-17 16:04:37 +08:00
co63oc
c7a97a9767 fix nvidia_smi.toml (#2962) 2025-11-17 15:56:47 +08:00
Ulric Qin
f94068e611 Merge branch 'main' of https://github.com/ccfos/nightingale 2025-11-17 10:39:44 +08:00
Ulric Qin
2cd5edf691 fix typo in zookeeper dashboard 2025-11-17 10:39:30 +08:00
cyberslack_lee
0ffc67f35f chore: fix typos (#2951) 2025-11-14 22:16:45 +08:00
co63oc
6dc5ac47b7 chore: fix some words (#2952) 2025-11-14 22:14:58 +08:00
cyberslack_lee
2526440efa chore: fix typo error (#2950) 2025-11-14 22:13:30 +08:00
cyberslack_lee
2f8b8fad62 chore: fix typos (#2948) 2025-11-14 22:12:25 +08:00
ning
9c19201c13 Merge branch 'main' of github.com:ccfos/nightingale 2025-11-14 18:27:34 +08:00
ning
4758c14a46 refactor: validate query_configs length to prevent data truncation in recording_rule 2025-11-14 18:26:32 +08:00
Snowykami
2e54ab8c2f fix: builtin metrics pagination total (#2953) 2025-11-14 18:03:42 +08:00
ning
67f79c2f88 refactor: optimize init salt 2025-11-14 16:37:15 +08:00
ning
749ae70bd7 refactor: optimize db query 2025-11-14 16:25:31 +08:00
ning
e2dba9b3d3 Merge branch 'main' of github.com:ccfos/nightingale 2025-11-14 15:59:04 +08:00
ning
2228842b2f refactor: update migrate BuiltinPayloads 2025-11-14 14:59:49 +08:00
Snowykami
38fe37a286 feat: add secure connection for ClickHouse native and http (#2945) 2025-11-14 14:18:00 +08:00
ning
7daf1e8c43 Merge branch 'main' of github.com:ccfos/nightingale 2025-11-14 11:54:20 +08:00
ning
8706ded776 refactor: es query data time range 2025-11-14 11:51:19 +08:00
Snowykami
f637078dd9 fix: skip processing pagerduty_integration_ids in GetNotifyConfigParams (#2949) 2025-11-14 11:10:50 +08:00
ning
8aa7b1060d chore: fix typos 2025-11-14 11:09:23 +08:00
co63oc
18634a33b2 chore: fix some words (#2946) 2025-11-14 10:29:52 +08:00
cyberslack_lee
7ed1b80759 chore: fix typos (#2947) 2025-11-14 10:21:22 +08:00
ning
3d240704f6 Merge branch 'main' of github.com:ccfos/nightingale 2025-11-13 20:27:32 +08:00
ning
ce0322bbd7 refactor: update message tpl, add domain 2025-11-13 20:25:45 +08:00
ning
66f62ca8c5 docs: update message tpl 2025-11-13 19:39:36 +08:00
cyberslack_lee
d11d73f6bc chore: fix typos about HostUpdteTime (#2942) 2025-11-13 17:18:46 +08:00
cyberslack_lee
dee1fe2d61 chore: fix typos about NotificaitonRecord to NotificationRecord (#2943) 2025-11-13 17:12:43 +08:00
ning
b3da24f18a refactor: update builtin payload api 2025-11-13 16:54:14 +08:00
ning
29ea4f6ed2 refactor: board add note 2025-11-13 16:49:30 +08:00
co63oc
5272b11efc chore: fix typos (#2941) 2025-11-13 11:40:22 +08:00
Snowykami
c322601138 feat: clickhouse protocol support (#2938) 2025-11-13 11:37:53 +08:00
ning
f1357d6f33 Merge branch 'main' of github.com:ccfos/nightingale 2025-11-12 20:10:54 +08:00
co63oc
728d70c707 fix repo path (#2933) 2025-11-12 19:46:29 +08:00
ning
bf93932b22 refactor: update board api 2025-11-12 19:12:06 +08:00
co63oc
57581be350 chore: fix typos mesurement (#2934) 2025-11-12 16:25:22 +08:00
Snowykami
5793f089f6 feat: pagerduty support (#2930) 2025-11-12 16:24:19 +08:00
ning
fa49449588 fix: user phone encrypt 2025-11-11 16:47:49 +08:00
ning
876f1d1084 fix prom datasource update 2025-11-10 10:51:44 +08:00
ning
678830be37 refactor: add /builtin-component service api 2025-11-10 10:29:14 +08:00
ning
5e30f3a00d Merge branch 'main' of github.com:ccfos/nightingale 2025-11-06 01:39:58 +08:00
ning
7f1eefd033 fix: dsn 2025-11-05 15:08:08 +08:00
Wenyu Su
c8dd26ca4c fix(notify): http callback retry interval in millisecond (#2928) 2025-11-04 18:11:30 +08:00
ning
37c57e66ea refactor: optimize db dsn 2025-11-04 15:09:03 +08:00
ning
878e940325 docs: update migrate.sql 2025-11-03 19:16:00 +08:00
Ulric Qin
cbc715305d Merge branch 'main' of https://github.com/ccfos/nightingale 2025-10-31 18:35:39 +08:00
Ulric Qin
5011766c70 delete single config dir 2025-10-31 18:35:32 +08:00
Snowykami
b3ed8a1e8c fix: elasticsearch v9 range query compatibility (#2692) (#2922) 2025-10-31 15:05:45 +08:00
Ulric Qin
814ded90b6 remove no use configuration: EnableAutoMigrate and add etc/single/config.toml 2025-10-31 11:38:17 +08:00
ning
43e89040eb refactor: update oidc logout 2025-10-31 10:26:41 +08:00
710leo
3d339fe03c update issue-translator 2025-10-30 22:50:53 +08:00
710leo
7618858912 update issue-translator 2025-10-30 22:24:49 +08:00
710leo
15b4ef8611 update issue-translator 2025-10-30 22:19:20 +08:00
710leo
5083a5cc96 add issue-translator 2025-10-30 22:10:09 +08:00
Yening Qin
d51e83d7d4 feat: alert rule add event process (#2921) 2025-10-28 16:18:12 +08:00
ning
601d4f0c95 update built in 2025-10-28 15:42:37 +08:00
Ulric Qin
90fac12953 update readme 2025-10-26 22:25:16 +08:00
ning
19d76824d9 update user group api 2025-10-25 16:45:31 +08:00
ning
1341554bbc refactor: optimize push data log 2025-10-19 12:12:06 +08:00
Ulric Qin
fd3ce338cb Merge branch 'main' of https://github.com/ccfos/nightingale 2025-10-15 17:21:58 +08:00
Ulric Qin
b8f36ce3cb add more prometheus rule severity choices 2025-10-15 17:21:52 +08:00
ning
037112a9e6 refactor: update alert mute api 2025-10-15 15:19:01 +08:00
zjxpsetp
c6e75d31a1 update jmx dashboard to support multi gc mode 2025-10-15 12:41:01 +08:00
zjxpsetp
bd24f5b056 Merge remote-tracking branch 'origin/main'
# Conflicts:
#	integrations/Java/dashboards/jmx_by_kubernetes.json
2025-10-15 12:36:14 +08:00
zjxpsetp
89551c8edb update jmx dashboard to support multi gc mode 2025-10-15 12:29:11 +08:00
ning
042b44940d docs: update i18n 2025-10-14 17:38:25 +08:00
ning
8cd8674848 refactor: optimize alert mute match 2025-10-14 16:59:57 +08:00
ning
7bb6ac8a03 refactor: update alert mute sync 2025-10-14 16:45:15 +08:00
ning
76b35276af refactor: update event api 2025-10-13 20:24:52 +08:00
SaladDay
439a21b784 feat: add expired filter for alert mute rules (#2907) 2025-10-13 14:04:32 +08:00
Yening Qin
47e70a2dba fix: sql order (#2908) 2025-10-13 12:18:16 +08:00
Yening Qin
16b3cb1abc feat: support encrypt user phone (#2906)
* feature: add a new configuration option to encrypt user phone numbers in the database (#2902)

Co-authored-by: yuanzaiping_dxm <yuanzaiping@duxiaoman.com>
---------

Co-authored-by: zaipingY <30775871+zaipingy@users.noreply.github.com>
Co-authored-by: yuanzaiping_dxm <yuanzaiping@duxiaoman.com>
2025-10-11 14:37:14 +08:00
ning
32995c1b2d fix: event pipeline insert by PostgreSQL 2025-09-23 16:40:27 +08:00
ning
b4fa36fa0e refactor: update message tpl 2025-09-22 18:18:19 +08:00
ning
f412f82eb8 fix: event value is inf 2025-09-19 19:32:40 +08:00
ning
9da1cd506b fix: datasource sync 2025-09-19 17:41:57 +08:00
ning
99ea838863 refactor: optimize query target 2025-09-19 15:00:07 +08:00
ning
7feb003b72 refactor: change feishucard body 2025-09-19 10:37:58 +08:00
ning
b0a053361f refactor: change log 2025-09-17 20:28:22 +08:00
ning
959f75394b refactor: alert rule api 2025-09-17 12:10:43 +08:00
ning
03e95973b2 refactor: message tpl api 2025-09-16 20:13:25 +08:00
ning
e890705167 refactor: event notify 2025-09-16 19:24:05 +08:00
ning
6716f1bdf1 refactor: update event notify 2025-09-15 20:19:18 +08:00
ning
739b9406a4 Merge branch 'main' of github.com:ccfos/nightingale 2025-09-15 17:37:38 +08:00
ning
77f280d1cc fix: event delete api 2025-09-15 17:37:25 +08:00
pioneerlfn
04fe1b9dd6 for slice, when marshal, return [] instead null (#2878) 2025-09-15 17:28:57 +08:00
ning
552758e0e1 refactor: pushgw writer support async 2025-09-14 15:00:40 +08:00
ning
68bc474c1b refactor: update message tpl 2025-09-11 20:38:17 +08:00
ning
f692035deb refactor: change datasource log 2025-09-11 16:14:59 +08:00
ning
eb441353c3 refactor: message tpl 2025-09-11 15:33:02 +08:00
ning
b606b22ae6 fix: opensearch alert 2025-09-10 22:24:50 +08:00
ning
1de0428860 docs: update init.sql 2025-09-09 18:51:23 +08:00
ning
3d0c288c9f refactor: event api 2025-09-09 16:36:40 +08:00
ning
343814a802 refactor: notify rule update 2025-09-09 11:05:58 +08:00
ning
12e2761467 fix: dingtalk message tpl 2025-09-09 10:18:08 +08:00
Yening Qin
0edd5ee772 refactor: event notify (#2869) 2025-09-08 15:04:33 +08:00
ning
5e430cedc7 fix: build, router_target miss idents 2025-09-03 14:17:15 +08:00
ning
a791a9901e refactor: push ts to kafka 2025-09-03 12:17:32 +08:00
totoro
222cdd76f0 refactor : es support search_after (#2859) 2025-09-03 12:12:07 +08:00
arch3754
ed4e3937e0 feat: add target update (#2853) 2025-09-03 12:05:38 +08:00
Yening Qin
60f9e1c48e refactor: dscache sync add datasource process hook (#2792) 2025-09-02 18:12:36 +08:00
Ulric Qin
276dfe7372 update linux dash 2025-09-02 08:57:36 +08:00
Ulric Qin
4a6dacbe30 add host table ng 2025-09-02 08:54:54 +08:00
Ulric Qin
48eebba11a update linux dashboard 2025-09-02 08:52:58 +08:00
ning
eca82e5ec2 change ops update 2025-09-01 14:24:17 +08:00
Yening Qin
21478fcf3d fix: send http notify retry (#2849) 2025-08-30 01:57:32 +08:00
ulricqin
a87c856299 fix: call flashduty to push event (#2848) 2025-08-30 00:06:53 +08:00
ning
ba035a446d refactor: change some log 2025-08-29 16:32:57 +08:00
ning
bf840e6bb2 docs: update dashboard tpl 2025-08-29 10:14:59 +08:00
ning
cd01092aed refactor: update alert rule import api 2025-08-28 19:44:05 +08:00
ning
e202fd50c8 refactor: datasource api 2025-08-28 16:48:04 +08:00
ning
f0e5062485 refactor: optimize edge ident update 2025-08-28 16:24:33 +08:00
ning
861fe96de5 add UpdateDBTargetTimestampDisable 2025-08-27 19:12:56 +08:00
Ulric Qin
5b66ada96d Merge branch 'main' of https://github.com/ccfos/nightingale 2025-08-27 10:06:19 +08:00
Ulric Qin
d5a98debff upgrade ibex 2025-08-27 10:06:11 +08:00
ning
4977052a67 Merge branch 'main' of github.com:ccfos/nightingale 2025-08-22 15:03:17 +08:00
ning
dcc461e587 refactor: push writer support sync 2025-08-22 15:00:49 +08:00
Ulric Qin
f5ce1733bb update minio dashboard 2025-08-21 18:58:47 +08:00
Ulric Qin
436cf25409 Merge branch 'main' of https://github.com/ccfos/nightingale 2025-08-21 10:34:56 +08:00
Ulric Qin
038f68b0b7 add minio dashboard for new version 2025-08-21 10:34:50 +08:00
ning
96ef1895b7 refactor: event_script_notify_result log add stdin 2025-08-20 14:24:28 +08:00
zjxpsetp
eeaa7b46f1 update es dashboard for categraf version bigger than 0.3.102 2025-08-19 23:43:54 +08:00
zjxpsetp
dc525352f1 Merge remote-tracking branch 'origin/main' 2025-08-19 17:36:12 +08:00
zjxpsetp
98a3fe9375 update jmx dashboard in kubernetes 2025-08-19 17:35:55 +08:00
ning
74b0f802ec Merge branch 'main' of github.com:ccfos/nightingale 2025-08-18 11:19:07 +08:00
ning
85bd3148d5 refactor: add update db metric 2025-08-18 11:18:52 +08:00
ning
0931fa9603 fix: target update ts 2025-08-18 11:18:39 +08:00
zjxpsetp
65cdb2da9e 更新 jmx 的仪表盘,新的jmx Exporter 指标和之前有一些差别 2025-08-17 17:23:39 +08:00
ning
9ad6514af6 refactor: ds query api 2025-08-13 11:50:01 +08:00
ning
302c6549e4 refactor: ds query api 2025-08-13 11:12:57 +08:00
ning
a3122270e6 refactor: ds query api 2025-08-13 11:02:00 +08:00
Yening Qin
1245c453bb refactor: send flashduty (#2824) 2025-08-12 20:55:23 +08:00
Ulric Qin
9c5ccf0c8f fix: update update_at when batch-updating-rules 2025-08-06 20:21:57 +08:00
Ulric Qin
cd468af250 refactor batch updating rules 2025-08-06 17:16:02 +08:00
Ulric Qin
2d3449c0ec code refactor for batch updating 2025-08-06 15:45:36 +08:00
ning
e15bdbce92 refactor: optimize import prom alert rule 2025-08-06 11:09:53 +08:00
ning
3890243d42 fix: new mysql db client 2025-08-04 18:40:04 +08:00
ning
37fb4ee867 add case-insensitive search for builtin payload filtering 2025-08-04 16:31:28 +08:00
ning
6db63eafc1 refactor: change import prom rule 2025-08-01 19:00:06 +08:00
ning
1e9cbfc316 fix: event query log 2025-08-01 16:47:14 +08:00
ning
4f95554fe3 refactor: update msg tpl 2025-07-31 18:08:12 +08:00
ning
8eba9aa92f refactor: update msg tpl 2025-07-31 15:31:29 +08:00
ning
6ba74b8e21 fix: pgsql cross database query 2025-07-31 11:51:27 +08:00
ning
8ea4632681 refactor: update duty user sync 2025-07-28 14:36:08 +08:00
ning
f958f27de1 fix: AlertRuleExists 2025-07-27 13:28:46 +08:00
ning
1bdfa3e032 refactor: update TargetDel 2025-07-27 12:46:22 +08:00
ning
143880cd46 Merge branch 'main' of github.com:ccfos/nightingale 2025-07-25 13:21:55 +08:00
ning
38f0b4f1bb refactor: modify add loki api resp 2025-07-25 13:01:27 +08:00
dependabot[bot]
2bccd5be99 build(deps): bump golang.org/x/oauth2 from 0.23.0 to 0.27.0 (#2793)
Bumps [golang.org/x/oauth2](https://github.com/golang/oauth2) from 0.23.0 to 0.27.0.
- [Commits](https://github.com/golang/oauth2/compare/v0.23.0...v0.27.0)

---
updated-dependencies:
- dependency-name: golang.org/x/oauth2
  dependency-version: 0.27.0
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-07-24 21:21:29 +08:00
ning
7b328b3eaa refactor: update prom rule import 2025-07-24 21:17:12 +08:00
Ulric Qin
8bd5b90e94 fix: support old rule format when importing 2025-07-23 09:36:28 +08:00
ning
96629e284f refactor: add email notify record 2025-07-17 11:45:58 +08:00
ning
67d2875690 fix: update record rule datasource ids api 2025-07-15 16:29:30 +08:00
ning
238895a1f8 refactor: init tpl 2025-07-15 15:52:38 +08:00
ning
fb341b645d refactor: sub alert add host filter 2025-07-15 14:46:09 +08:00
Haobo Zhang
2d84fd8cf3 fix: ai summary customize parameter parse from interface (#2788) 2025-07-14 14:54:15 +08:00
ning
2611f87c41 refactor: drop builtin_components idx_ident 2025-07-11 19:12:24 +08:00
ning
a5b7aa7a26 refactor: drop builtin_components idx_ident 2025-07-11 18:57:03 +08:00
ning
0714a0f8f1 refactor: change log level 2025-07-11 16:41:14 +08:00
ning
063cc750e1 refactor: update notify channel api 2025-07-11 12:25:08 +08:00
ning
b2a912d72f refactor: log level 2025-07-11 12:06:18 +08:00
ning
4ba745f442 fix: alert rule batch update notify rule 2025-07-11 11:38:09 +08:00
smx_Morgan
fa7d46ecad fix: compatible user_token table with postgresql (#2785) 2025-07-10 11:03:33 +08:00
pioneerlfn
a5a43df44f refactor: doris search sql (#2778)
* doris:support search sql with macro

* Update doris.go

---------

Co-authored-by: Yening Qin <710leo@gmail.com>
2025-07-09 21:33:17 +08:00
smx_Morgan
fbf1d68b84 fix: update postgresql init sql (#2784) 2025-07-09 20:53:56 +08:00
ulricqin
ca712f62a4 fix execution of notify script (#2769) 2025-07-06 08:40:13 +08:00
ulricqin
84ee14d21e add img (#2767) 2025-07-03 19:48:39 +08:00
ning
c9cf1cfdd2 refactor: change alert rule update api 2025-07-02 20:32:56 +08:00
ning
9d1c01107f refactor: builtin tpl 2025-07-02 20:12:43 +08:00
ning
7ea31b5c6d refactor: builtin tpl 2025-07-02 19:37:25 +08:00
ning
e8e1c67cc8 refactor: event notify filter 2025-07-02 15:44:02 +08:00
ning
8079bcd288 docs: enbale token auth 2025-07-01 18:38:28 +08:00
ning
33b178ce82 refactor: roles api 2025-07-01 15:37:42 +08:00
ning
28c9cd7b43 refactor: change eval for duration check 2025-06-30 15:10:20 +08:00
ning
b771e8a3e8 refactor: change eval for duration check 2025-06-30 12:06:48 +08:00
Yening Qin
4945e98200 refactor: builtin tpl gets api (#2760) 2025-06-27 19:45:28 +08:00
ning
a938ea3e56 docs: update migrate.sql 2025-06-26 18:44:55 +08:00
ning
25c339025b Merge branch 'main' of github.com:ccfos/nightingale 2025-06-25 18:05:46 +08:00
ning
bb0ee35275 refactor: optimize notify rule check api 2025-06-25 18:05:34 +08:00
Ulric Qin
0fc54ad173 add some icon 2025-06-25 16:57:59 +08:00
Ulric Qin
1f95e2df94 Merge branch 'main' of https://github.com/ccfos/nightingale 2025-06-25 16:52:00 +08:00
Ulric Qin
d2969f34ef update READE. set en version as default 2025-06-25 16:51:52 +08:00
Yening Qin
d9a34959dc feat: support doris and opensearch alert (#2758) 2025-06-25 16:34:08 +08:00
smx_Morgan
bc6ff7f4ba fix : add offset to es date_histogam (#2757) 2025-06-25 15:37:51 +08:00
Asklv
514913a97a feat: support deletion in datasource series. (#2753) 2025-06-23 19:20:50 +08:00
ning
affc610b7b refactor: change rulename tag handling in alert processing 2025-06-23 16:04:37 +08:00
ning
a098d5d39c refactor: update subscribe api 2025-06-20 18:01:01 +08:00
ning
05c3f1e0e4 refactor: update message tpl 2025-06-20 16:31:30 +08:00
ning
d5740164f2 refactor: update mute tryrun api 2025-06-20 14:42:10 +08:00
ning
8c2383c410 refactor: api add i18n 2025-06-20 14:31:03 +08:00
ning
9af024fb99 refactor: api add i18n 2025-06-20 14:23:03 +08:00
ning
12f3cc21e1 refactor: change rule test api 2025-06-19 16:18:30 +08:00
Asklv
0b3bb54eb4 feat: add time cost in alert history. (#2744)
Signed-off-by: Asklv <boironic@gmail.com>
2025-06-18 21:57:53 +08:00
Yening Qin
da813e2b0c refactor: optimize event notify (#2750) 2025-06-18 18:39:33 +08:00
Ulric Qin
50fa2499b7 add some alert_eval log 2025-06-18 15:15:56 +08:00
Ulric Qin
2c5ae5b3a9 delete some info log 2025-06-18 15:14:58 +08:00
ning
522932aeb4 refactor: api auth check 2025-06-17 20:57:32 +08:00
Yening Qin
35ac0ddea5 fix: api for agent auth (#2749) 2025-06-17 20:52:55 +08:00
ning
26fa750309 refactor: event process test api 2025-06-17 11:58:39 +08:00
710leo
1eba607aeb feat: add install date api 2025-06-16 22:46:47 +08:00
ning
6aadd159af fix: optimize api for agent auth 2025-06-16 20:22:24 +08:00
xtan
b6ad87523e feat: support redis password encryption (#2739) 2025-06-16 20:11:28 +08:00
Yening Qin
ea5b6845de refactor: optimize event processor (#2742) 2025-06-16 16:46:53 +08:00
Yening Qin
5ba5096da2 feat: add mute and sub rule tryrun api (#2737) 2025-06-13 18:08:46 +08:00
371 changed files with 31476 additions and 4272 deletions

22
.github/workflows/issue-translator.yml vendored Normal file
View File

@@ -0,0 +1,22 @@
name: 'Issue Translator'
on:
issues:
types: [opened]
jobs:
translate:
runs-on: ubuntu-latest
permissions:
issues: write
contents: read
steps:
- name: Translate Issues
uses: usthe/issues-translate-action@v2.7
with:
# 是否翻译 issue 标题
IS_MODIFY_TITLE: true
# GitHub Token
BOT_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# 自定义翻译标注(可选)
# CUSTOM_BOT_NOTE: "Translation by bot"

4
.gitignore vendored
View File

@@ -58,6 +58,10 @@ _test
.idea
.index
.vscode
.issue
.issue/*
.cursor
.claude
.DS_Store
.cache-loader
.payload

41
.typos.toml Normal file
View File

@@ -0,0 +1,41 @@
# Configuration for typos tool
[files]
extend-exclude = [
# Ignore auto-generated easyjson files
"*_easyjson.go",
# Ignore binary files
"*.gz",
"*.tar",
"n9e",
"n9e-*"
]
[default.extend-identifiers]
# Didi is a company name (DiDi), not a typo
Didi = "Didi"
# datas is intentionally used as plural of data (slice variable)
datas = "datas"
# pendings is intentionally used as plural
pendings = "pendings"
pendingsUseByRecover = "pendingsUseByRecover"
pendingsUseByRecoverMap = "pendingsUseByRecoverMap"
# typs is intentionally used as shorthand for types (parameter name)
typs = "typs"
[default.extend-words]
# Some false positives
ba = "ba"
# Specific corrections for ambiguous typos
contigious = "contiguous"
onw = "own"
componet = "component"
Patten = "Pattern"
Requets = "Requests"
Mis = "Miss"
exporer = "exporter"
soruce = "source"
verison = "version"
Configations = "Configurations"
emmited = "emitted"
Utlization = "Utilization"
serie = "series"

109
README.md
View File

@@ -3,7 +3,7 @@
<img src="doc/img/Nightingale_L_V.png" alt="nightingale - cloud native monitoring" width="100" /></a>
</p>
<p align="center">
<b>开源告警管理专家</b>
<b>Open-Source Alerting Expert</b>
</p>
<p align="center">
@@ -25,94 +25,93 @@
[English](./README_en.md) | [中文](./README.md)
[English](./README.md) | [中文](./README_zh.md)
## 夜莺是什么
## 🎯 What is Nightingale
夜莺监控(Nightingale)是一款侧重告警的监控类开源项目。类似 Grafana 的数据源集成方式,夜莺也是对接多种既有的数据源,不过 Grafana 侧重在可视化,夜莺是侧重在告警引擎、告警事件的处理和分发。
Nightingale is an open-source monitoring project that focuses on alerting. Similar to Grafana, Nightingale also connects with various existing data sources. However, while Grafana emphasizes visualization, Nightingale places greater emphasis on the alerting engine, as well as the processing and distribution of alarms.
夜莺监控项目,最初由滴滴开发和开源,并于 2022 年 5 月 11 日捐赠予中国计算机学会开源发展委员会CCF ODC为 CCF ODC 成立后接受捐赠的第一个开源项目。
> 💡 Nightingale has now officially launched the [MCP-Server](https://github.com/n9e/n9e-mcp-server/). This MCP Server enables AI assistants to interact with the Nightingale API using natural language, facilitating alert management, monitoring, and observability tasks.
>
> The Nightingale project was initially developed and open-sourced by DiDi.inc. On May 11, 2022, it was donated to the Open Source Development Committee of the China Computer Federation (CCF ODTC).
## 夜莺的工作逻辑
![](https://n9e.github.io/img/global/arch-bg.png)
很多用户已经自行采集了指标、日志数据此时就把存储库VictoriaMetrics、ElasticSearch等作为数据源接入夜莺即可在夜莺里配置告警规则、通知规则完成告警事件的生成和派发。
## 💡 How Nightingale Works
![夜莺产品架构](doc/img/readme/20240221152601.png)
Many users have already collected metrics and log data. In this case, you can connect your storage repositories (such as VictoriaMetrics, ElasticSearch, etc.) as data sources in Nightingale. This allows you to configure alerting rules and notification rules within Nightingale, enabling the generation and distribution of alarms.
夜莺项目本身不提供监控数据采集能力。推荐您使用 [Categraf](https://github.com/flashcatcloud/categraf) 作为采集器,可以和夜莺丝滑对接。
![Nightingale Product Architecture](doc/img/readme/20240221152601.png)
[Categraf](https://github.com/flashcatcloud/categraf) 可以采集操作系统、网络设备、各类中间件、数据库的监控数据,通过 Remote Write 协议推送给夜莺,夜莺把监控数据转存到时序库(如 Prometheus、VictoriaMetrics 等),并提供告警和可视化能力。
Nightingale itself does not provide monitoring data collection capabilities. We recommend using [Categraf](https://github.com/flashcatcloud/categraf) as the collector, which integrates seamlessly with Nightingale.
对于个别边缘机房,如果和中心夜莺服务端网络链路不好,希望提升告警可用性,夜莺也提供边缘机房告警引擎下沉部署模式,这个模式下,即便边缘和中心端网络割裂,告警功能也不受影响。
[Categraf](https://github.com/flashcatcloud/categraf) can collect monitoring data from operating systems, network devices, various middleware, and databases. It pushes this data to Nightingale via the `Prometheus Remote Write` protocol. Nightingale then stores the monitoring data in a time-series database (such as Prometheus, VictoriaMetrics, etc.) and provides alerting and visualization capabilities.
![边缘部署模式](doc/img/readme/20240222102119.png)
For certain edge data centers with poor network connectivity to the central Nightingale server, we offer a distributed deployment mode for the alerting engine. In this mode, even if the network is disconnected, the alerting functionality remains unaffected.
> 上图中机房A和中心机房的网络链路很好所以直接由中心端的夜莺进程做告警引擎机房B和中心机房的网络链路不好所以在机房B部署了 `n9e-edge` 做告警引擎对机房B的数据源做告警判定。
![Edge Deployment Mode](doc/img/readme/multi-region-arch.png)
## 告警降噪、升级、协同
> In the above diagram, Data Center A has a good network with the central data center, so it uses the Nightingale process in the central data center as the alerting engine. Data Center B has a poor network with the central data center, so it deploys `n9e-edge` as the alerting engine to handle alerting for its own data sources.
夜莺的侧重点是做告警引擎,即负责产生告警事件,并根据规则做灵活派发,内置支持 20 种通知媒介电话、短信、邮件、钉钉、飞书、企微、Slack 等)。
## 🔕 Alert Noise Reduction, Escalation, and Collaboration
如果您有更高级的需求,比如:
Nightingale focuses on being an alerting engine, responsible for generating alarms and flexibly distributing them based on rules. It supports 20 built-in notification medias (such as phone calls, SMS, email, DingTalk, Slack, etc.).
- 想要把公司的多套监控系统产生的事件聚拢到一个平台,统一做收敛降噪、响应处理、数据分析
- 想要支持人员的排班,践行 On-call 文化,想要支持告警认领、升级(避免遗漏)、协同处理
If you have more advanced requirements, such as:
- Want to consolidate events from multiple monitoring systems into one platform for unified noise reduction, response handling, and data analysis.
- Want to support personnel scheduling, practice on-call culture, and support alert escalation (to avoid missing alerts) and collaborative handling.
那夜莺是不合适的,推荐您选用 [FlashDuty](https://flashcat.cloud/product/flashcat-duty/) 这样的 On-call 产品,产品简单易用,也有免费套餐。
Then Nightingale is not suitable. It is recommended that you choose on-call products such as PagerDuty and FlashDuty. These products are simple and easy to use.
## 🗨️ Communication Channels
## 相关资料 & 交流渠道
- 📚 [夜莺介绍PPT](https://mp.weixin.qq.com/s/Mkwx_46xrltSq8NLqAIYow) 对您了解夜莺各项关键特性会有帮助PPT链接在文末
- 👉 [文档中心](https://flashcat.cloud/docs/) 为了更快的访问速度,站点托管在 [FlashcatCloud](https://flashcat.cloud)
- ❤️ [报告 Bug](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=&projects=&template=question.yml) 写清楚问题描述、复现步骤、截图等信息,更容易得到答案
- 💡 前后端代码分离,前端代码仓库:[https://github.com/n9e/fe](https://github.com/n9e/fe)
- 🎯 关注[这个公众号](https://gitlink.org.cn/UlricQin)了解更多夜莺动态和知识
- 🌟 加我微信:`picobyte`(我已关闭好友验证)拉入微信群,备注:`夜莺互助群`,如果已经把夜莺上到生产环境,可联系我拉入资深监控用户群
- **Report Bugs:** It is highly recommended to submit issues via the [Nightingale GitHub Issue tracker](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Fbug&projects=&template=bug_report.yml).
- **Documentation:** For more information, we recommend thoroughly browsing the [Nightingale Documentation Site](https://n9e.github.io/).
## 🔑 Key Features
## 关键特性简介
![Nightingale Alerting rules](doc/img/readme/alerting-rules-en.png)
![夜莺告警规则](doc/img/readme/2025-05-23_18-43-37.png)
- Nightingale supports alerting rules, mute rules, subscription rules, and notification rules. It natively supports 20 types of notification media and allows customization of message templates.
- It supports event pipelines for Pipeline processing of alarms, facilitating automated integration with in-house systems. For example, it can append metadata to alarms or perform relabeling on events.
- It introduces the concept of business groups and a permission system to manage various rules in a categorized manner.
- Many databases and middleware come with built-in alert rules that can be directly imported and used. It also supports direct import of Prometheus alerting rules.
- It supports alerting self-healing, which automatically triggers a script to execute predefined logic after an alarm is generated—such as cleaning up disk space or capturing the current system state.
- 夜莺支持告警规则、屏蔽规则、订阅规则、通知规则,内置支持 20 种通知媒介,支持消息模板自定义
- 支持事件管道,对告警事件做 Pipeline 处理,方便和自有系统做自动化整合,比如给告警事件附加一些元信息,对事件做 relabel
- 支持业务组概念,引入权限体系,分门别类管理各类规则
- 很多数据库、中间件内置了告警规则,可以直接导入使用,也可以直接导入 Prometheus 的告警规则
- 支持告警自愈,即告警之后自动触发一个脚本执行一些预定义的逻辑,比如清理一下磁盘、抓一下现场等
![Nightingale Alarm Dashboard](doc/img/readme/active-events-en.png)
![夜莺事件大盘](doc/img/readme/2025-05-30_08-49-28.png)
- Nightingale archives historical alarms and supports multi-dimensional query and statistics.
- It supports flexible aggregation grouping, allowing a clear view of the distribution of alarms across the company.
- 夜莺存档了历史告警事件,支持多维度的查询和统计
- 支持灵活的聚合分组,一目了然看到公司的告警事件分布情况
![Nightingale Integration Center](doc/img/readme/integration-components-en.png)
![夜莺集成中心](doc/img/readme/2025-05-23_18-46-06.png)
- Nightingale has built-in metric descriptions, dashboards, and alerting rules for common operating systems, middleware, and databases, which are contributed by the community with varying quality.
- It directly receives data via multiple protocols such as Remote Write, OpenTSDB, Datadog, and Falcon, integrates with various Agents.
- It supports data sources like Prometheus, ElasticSearch, Loki, ClickHouse, MySQL, Postgres, allowing alerting based on data from these sources.
- Nightingale can be easily embedded into internal enterprise systems (e.g. Grafana, CMDB), and even supports configuring menu visibility for these embedded systems.
- 夜莺内置常用操作系统、中间件、数据库的的指标说明、仪表盘、告警规则,不过都是社区贡献的,整体也是参差不齐
- 夜莺直接接收 Remote Write、OpenTSDB、Datadog、Falcon 等多种协议的数据,故而可以和各类 Agent 对接
- 夜莺支持 Prometheus、ElasticSearch、Loki、TDEngine 等多种数据源,可以对其中的数据做告警
- 夜莺可以很方便内嵌企业内部系统,比如 Grafana、CMDB 等,甚至可以配置这些内嵌系统的菜单可见性
![Nightingale dashboards](doc/img/readme/dashboard-en.png)
- Nightingale supports dashboard functionality, including common chart types, and comes with pre-built dashboards. The image above is a screenshot of one of these dashboards.
- If you are already accustomed to Grafana, it is recommended to continue using Grafana for visualization, as Grafana has deeper expertise in this area.
- For machine-related monitoring data collected by Categraf, it is advisable to use Nightingale's built-in dashboards for viewing. This is because Categraf's metric naming follows Telegraf's convention, which differs from that of Node Exporter.
- Due to Nightingale's concept of business groups (where machines can belong to different groups), there may be scenarios where you only want to view machines within the current business group on the dashboard. Thus, Nightingale's dashboards can be linked with business groups for interactive filtering.
![夜莺仪表盘](doc/img/readme/2025-05-23_18-49-02.png)
## 🌟 Stargazers over time
- 夜莺支持仪表盘功能,支持常见的图表类型,也内置了一些仪表盘,上图是其中一个仪表盘的截图。
- 如果你已经习惯了 Grafana建议仍然使用 Grafana 看图。Grafana 在看图方面道行更深。
- 机器相关的监控数据,如果是 Categraf 采集的,建议使用夜莺自带的仪表盘查看,因为 Categraf 的指标命名 Follow 的是 Telegraf 的命名方式,和 Node Exporter 不同
- 因为夜莺有个业务组的概念,机器可以归属不同的业务组,有时在仪表盘里只想查看当前所属业务组的机器,所以夜莺的仪表盘可以和业务组联动
## 广受关注
[![Stargazers over time](https://api.star-history.com/svg?repos=ccfos/nightingale&type=Date)](https://star-history.com/#ccfos/nightingale&Date)
## 感谢众多企业的信赖
## 🔥 Users
![夜莺客户](doc/img/readme/logos.png)
![User Logos](doc/img/readme/logos.png)
## 社区共建
- ❇️ 请阅读浏览[夜莺开源项目和社区治理架构草案](./doc/community-governance.md),真诚欢迎每一位用户、开发者、公司以及组织,使用夜莺监控、积极反馈 Bug、提交功能需求、分享最佳实践共建专业、活跃的夜莺开源社区。
- 夜莺贡献者
## 🤝 Community Co-Building
- Please read the [Nightingale Open Source Project and Community Governance Draft](./doc/community-governance.md). We sincerely welcome every user, developer, company, and organization to use Nightingale, actively report bugs, submit feature requests, share best practices, and help build a professional and active open-source community.
- ❤️ Nightingale Contributors
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
<img src="https://contrib.rocks/image?repo=ccfos/nightingale" />
</a>
## License
- [Apache License V2.0](https://github.com/didi/nightingale/blob/main/LICENSE)
## 📜 License
- [Apache License V2.0](https://github.com/ccfos/nightingale/blob/main/LICENSE)

View File

@@ -1,113 +0,0 @@
<p align="center">
<a href="https://github.com/ccfos/nightingale">
<img src="doc/img/Nightingale_L_V.png" alt="nightingale - cloud native monitoring" width="100" /></a>
</p>
<p align="center">
<b>Open-source Alert Management Expert, an Integrated Observability Platform</b>
</p>
<p align="center">
<a href="https://flashcat.cloud/docs/">
<img alt="Docs" src="https://img.shields.io/badge/docs-get%20started-brightgreen"/></a>
<a href="https://hub.docker.com/u/flashcatcloud">
<img alt="Docker pulls" src="https://img.shields.io/docker/pulls/flashcatcloud/nightingale"/></a>
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
<img alt="GitHub contributors" src="https://img.shields.io/github/contributors-anon/ccfos/nightingale"/></a>
<img alt="GitHub Repo stars" src="https://img.shields.io/github/stars/ccfos/nightingale">
<img alt="GitHub forks" src="https://img.shields.io/github/forks/ccfos/nightingale">
<br/><img alt="GitHub Repo issues" src="https://img.shields.io/github/issues/ccfos/nightingale">
<img alt="GitHub Repo issues closed" src="https://img.shields.io/github/issues-closed/ccfos/nightingale">
<img alt="GitHub latest release" src="https://img.shields.io/github/v/release/ccfos/nightingale"/>
<img alt="License" src="https://img.shields.io/badge/license-Apache--2.0-blue"/>
<a href="https://n9e-talk.slack.com/">
<img alt="GitHub contributors" src="https://img.shields.io/badge/join%20slack-%23n9e-brightgreen.svg"/></a>
</p>
[English](./README_en.md) | [中文](./README.md)
## What is Nightingale
Nightingale is an open-source project focused on alerting. Similar to Grafana's data source integration approach, Nightingale also connects with various existing data sources. However, while Grafana focuses on visualization, Nightingale focuses on alerting engines.
Originally developed and open-sourced by Didi, Nightingale was donated to the China Computer Federation Open Source Development Committee (CCF ODC) on May 11, 2022, becoming the first open-source project accepted by the CCF ODC after its establishment.
## Quick Start
- 👉 [Documentation](https://flashcat.cloud/docs/) | [Download](https://flashcat.cloud/download/nightingale/)
- ❤️ [Report a Bug](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=&projects=&template=question.yml)
- For faster access, the above documentation and download sites are hosted on [FlashcatCloud](https://flashcat.cloud).
## Features
- **Integration with Multiple Time-Series Databases:** Supports integration with various time-series databases such as Prometheus, VictoriaMetrics, Thanos, Mimir, M3DB, and TDengine, enabling unified alert management.
- **Advanced Alerting Capabilities:** Comes with built-in support for multiple alerting rules, extensible to common notification channels. It also supports alert suppression, silencing, subscription, self-healing, and alert event management.
- **High-Performance Visualization Engine:** Offers various chart styles with numerous built-in dashboard templates and the ability to import Grafana templates. Ready to use with a business-friendly open-source license.
- **Support for Common Collectors:** Compatible with [Categraf](https://flashcat.cloud/product/categraf), Telegraf, Grafana-agent, Datadog-agent, and various exporters as collectors—there's no data that can't be monitored.
- **Seamless Integration with [Flashduty](https://flashcat.cloud/product/flashcat-duty/):** Enables alert aggregation, acknowledgment, escalation, scheduling, and IM integration, ensuring no alerts are missed, reducing unnecessary interruptions, and enhancing efficient collaboration.
## Screenshots
You can switch languages and themes in the top right corner. We now support English, Simplified Chinese, and Traditional Chinese.
![18n switch](doc/img/readme/n9e-switch-i18n.png)
### Instant Query
Similar to the built-in query analysis page in Prometheus, Nightingale offers an ad-hoc query feature with UI enhancements. It also provides built-in PromQL metrics, allowing users unfamiliar with PromQL to quickly perform queries.
![Instant Query](doc/img/readme/20240513103305.png)
### Metric View
Alternatively, you can use the Metric View to access data. With this feature, Instant Query becomes less necessary, as it caters more to advanced users. Regular users can easily perform queries using the Metric View.
![Metric View](doc/img/readme/20240513103530.png)
### Built-in Dashboards
Nightingale includes commonly used dashboards that can be imported and used directly. You can also import Grafana dashboards, although compatibility is limited to basic Grafana charts. If youre accustomed to Grafana, its recommended to continue using it for visualization, with Nightingale serving as an alerting engine.
![Built-in Dashboards](doc/img/readme/20240513103628.png)
### Built-in Alert Rules
In addition to the built-in dashboards, Nightingale also comes with numerous alert rules that are ready to use out of the box.
![Built-in Alert Rules](doc/img/readme/20240513103825.png)
## Architecture
In most community scenarios, Nightingale is primarily used as an alert engine, integrating with multiple time-series databases to unify alert rule management. Grafana remains the preferred tool for visualization. As an alert engine, the product architecture of Nightingale is as follows:
![Product Architecture](doc/img/readme/20240221152601.png)
For certain edge data centers with poor network connectivity to the central Nightingale server, we offer a distributed deployment mode for the alert engine. In this mode, even if the network is disconnected, the alerting functionality remains unaffected.
![Edge Deployment Mode](doc/img/readme/20240222102119.png)
## Communication Channels
- **Report Bugs:** It is highly recommended to submit issues via the [Nightingale GitHub Issue tracker](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Fbug&projects=&template=bug_report.yml).
- **Documentation:** For more information, we recommend thoroughly browsing the [Nightingale Documentation Site](https://flashcat.cloud/docs/content/flashcat-monitor/nightingale-v7/introduction/).
## Stargazers over time
[![Stargazers over time](https://api.star-history.com/svg?repos=ccfos/nightingale&type=Date)](https://star-history.com/#ccfos/nightingale&Date)
## Community Co-Building
- ❇️ Please read the [Nightingale Open Source Project and Community Governance Draft](./doc/community-governance.md). We sincerely welcome every user, developer, company, and organization to use Nightingale, actively report bugs, submit feature requests, share best practices, and help build a professional and active open-source community.
- ❤️ Nightingale Contributors
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
<img src="https://contrib.rocks/image?repo=ccfos/nightingale" />
</a>
## License
- [Apache License V2.0](https://github.com/didi/nightingale/blob/main/LICENSE)

123
README_zh.md Normal file
View File

@@ -0,0 +1,123 @@
<p align="center">
<a href="https://github.com/ccfos/nightingale">
<img src="doc/img/Nightingale_L_V.png" alt="nightingale - cloud native monitoring" width="100" /></a>
</p>
<p align="center">
<b>开源监控告警管理专家</b>
</p>
<p align="center">
<a href="https://flashcat.cloud/docs/">
<img alt="Docs" src="https://img.shields.io/badge/docs-get%20started-brightgreen"/></a>
<a href="https://hub.docker.com/u/flashcatcloud">
<img alt="Docker pulls" src="https://img.shields.io/docker/pulls/flashcatcloud/nightingale"/></a>
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
<img alt="GitHub contributors" src="https://img.shields.io/github/contributors-anon/ccfos/nightingale"/></a>
<img alt="GitHub Repo stars" src="https://img.shields.io/github/stars/ccfos/nightingale">
<img alt="GitHub forks" src="https://img.shields.io/github/forks/ccfos/nightingale">
<br/><img alt="GitHub Repo issues" src="https://img.shields.io/github/issues/ccfos/nightingale">
<img alt="GitHub Repo issues closed" src="https://img.shields.io/github/issues-closed/ccfos/nightingale">
<img alt="GitHub latest release" src="https://img.shields.io/github/v/release/ccfos/nightingale"/>
<img alt="License" src="https://img.shields.io/badge/license-Apache--2.0-blue"/>
<a href="https://n9e-talk.slack.com/">
<img alt="GitHub contributors" src="https://img.shields.io/badge/join%20slack-%23n9e-brightgreen.svg"/></a>
</p>
[English](./README.md) | [中文](./README_zh.md)
## 夜莺是什么
夜莺 Nightingale 是一款开源云原生监控告警工具,是中国计算机学会接受捐赠并托管的第一个开源项目,在 GitHub 上有超过 12000 颗星,广受关注和使用。夜莺的统一告警引擎,可以对接 Prometheus、Elasticsearch、ClickHouse、Loki、MySQL 等多种数据源,提供全面的告警判定、丰富的事件处理和灵活的告警分发及通知能力。
夜莺侧重于监控告警,类似于 Grafana 的数据源集成方式,夜莺也是对接多种既有的数据源,不过 Grafana 侧重于可视化,夜莺则是侧重于告警引擎、告警事件的处理和分发。
> - 💡夜莺正式推出了 [MCP-Server](https://github.com/n9e/n9e-mcp-server/),此 MCP Server 允许 AI 助手通过自然语言与夜莺 API 交互,实现告警管理、监控和可观测性任务。
> - 夜莺监控项目,最初由滴滴开发和开源,并于 2022 年 5 月 11 日捐赠予中国计算机学会开源发展技术委员会CCF ODTC为 CCF ODTC 成立后接受捐赠的第一个开源项目。
![](https://n9e.github.io/img/global/arch-bg.png)
## 夜莺的工作逻辑
很多用户已经自行采集了指标、日志数据此时就把存储库VictoriaMetrics、ElasticSearch等作为数据源接入夜莺即可在夜莺里配置告警规则、通知规则完成告警事件的生成和派发。
![夜莺产品架构](doc/img/readme/20240221152601.png)
夜莺项目本身不提供监控数据采集能力。推荐您使用 [Categraf](https://github.com/flashcatcloud/categraf) 作为采集器,可以和夜莺丝滑对接。
[Categraf](https://github.com/flashcatcloud/categraf) 可以采集操作系统、网络设备、各类中间件、数据库的监控数据,通过 Remote Write 协议推送给夜莺,夜莺把监控数据转存到时序库(如 Prometheus、VictoriaMetrics 等),并提供告警和可视化能力。
对于个别边缘机房,如果和中心夜莺服务端网络链路不好,希望提升告警可用性,夜莺也提供边缘机房告警引擎下沉部署模式,这个模式下,即便边缘和中心端网络割裂,告警功能也不受影响。
![边缘部署模式](doc/img/readme/20240222102119.png)
> 上图中机房A和中心机房的网络链路很好所以直接由中心端的夜莺进程做告警引擎机房B和中心机房的网络链路不好所以在机房B部署了 `n9e-edge` 做告警引擎对机房B的数据源做告警判定。
## 告警降噪、升级、协同
夜莺的侧重点是做告警引擎,即负责产生告警事件,并根据规则做灵活派发,内置支持 20 种通知媒介电话、短信、邮件、钉钉、飞书、企微、Slack 等)。
如果您有更高级的需求,比如:
- 想要把公司的多套监控系统产生的事件聚拢到一个平台,统一做收敛降噪、响应处理、数据分析
- 想要支持人员的排班,践行 On-call 文化,想要支持告警认领、升级(避免遗漏)、协同处理
那夜莺是不合适的,推荐您选用 [FlashDuty](https://flashcat.cloud/product/flashcat-duty/) 这样的 On-call 产品,产品简单易用,也有免费套餐。
## 相关资料 & 交流渠道
- 📚 [夜莺介绍PPT](https://mp.weixin.qq.com/s/Mkwx_46xrltSq8NLqAIYow) 对您了解夜莺各项关键特性会有帮助PPT链接在文末
- 👉 [文档中心](https://flashcat.cloud/docs/) 为了更快的访问速度,站点托管在 [FlashcatCloud](https://flashcat.cloud)
- ❤️ [报告 Bug](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=&projects=&template=question.yml) 写清楚问题描述、复现步骤、截图等信息,更容易得到答案
- 💡 前后端代码分离,前端代码仓库:[https://github.com/n9e/fe](https://github.com/n9e/fe)
- 🎯 关注[这个公众号](https://gitlink.org.cn/UlricQin)了解更多夜莺动态和知识
- 🌟 加我微信:`picobyte`(我已关闭好友验证)拉入微信群,备注:`夜莺互助群`,如果已经把夜莺上到生产环境,可联系我拉入资深监控用户群
## 关键特性简介
![夜莺告警规则](doc/img/readme/2025-05-23_18-43-37.png)
- 夜莺支持告警规则、屏蔽规则、订阅规则、通知规则,内置支持 20 种通知媒介,支持消息模板自定义
- 支持事件管道,对告警事件做 Pipeline 处理,方便和自有系统做自动化整合,比如给告警事件附加一些元信息,对事件做 relabel
- 支持业务组概念,引入权限体系,分门别类管理各类规则
- 很多数据库、中间件内置了告警规则,可以直接导入使用,也可以直接导入 Prometheus 的告警规则
- 支持告警自愈,即告警之后自动触发一个脚本执行一些预定义的逻辑,比如清理一下磁盘、抓一下现场等
![夜莺事件大盘](doc/img/readme/2025-05-30_08-49-28.png)
- 夜莺存档了历史告警事件,支持多维度的查询和统计
- 支持灵活的聚合分组,一目了然看到公司的告警事件分布情况
![夜莺集成中心](doc/img/readme/2025-05-23_18-46-06.png)
- 夜莺内置常用操作系统、中间件、数据库的的指标说明、仪表盘、告警规则,不过都是社区贡献的,整体也是参差不齐
- 夜莺直接接收 Remote Write、OpenTSDB、Datadog、Falcon 等多种协议的数据,故而可以和各类 Agent 对接
- 夜莺支持 Prometheus、ElasticSearch、Loki、TDEngine 等多种数据源,可以对其中的数据做告警
- 夜莺可以很方便内嵌企业内部系统,比如 Grafana、CMDB 等,甚至可以配置这些内嵌系统的菜单可见性
![夜莺仪表盘](doc/img/readme/2025-05-23_18-49-02.png)
- 夜莺支持仪表盘功能,支持常见的图表类型,也内置了一些仪表盘,上图是其中一个仪表盘的截图。
- 如果你已经习惯了 Grafana建议仍然使用 Grafana 看图。Grafana 在看图方面道行更深。
- 机器相关的监控数据,如果是 Categraf 采集的,建议使用夜莺自带的仪表盘查看,因为 Categraf 的指标命名 Follow 的是 Telegraf 的命名方式,和 Node Exporter 不同
- 因为夜莺有个业务组的概念,机器可以归属不同的业务组,有时在仪表盘里只想查看当前所属业务组的机器,所以夜莺的仪表盘可以和业务组联动
## 广受关注
[![Stargazers over time](https://api.star-history.com/svg?repos=ccfos/nightingale&type=Date)](https://star-history.com/#ccfos/nightingale&Date)
## 感谢众多企业的信赖
![夜莺客户](doc/img/readme/logos.png)
## 社区共建
- ❇️ 请阅读浏览[夜莺开源项目和社区治理架构草案](./doc/community-governance.md),真诚欢迎每一位用户、开发者、公司以及组织,使用夜莺监控、积极反馈 Bug、提交功能需求、分享最佳实践共建专业、活跃的夜莺开源社区。
- ❤️ 夜莺贡献者
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
<img src="https://contrib.rocks/image?repo=ccfos/nightingale" />
</a>
## License
- [Apache License V2.0](https://github.com/ccfos/nightingale/blob/main/LICENSE)

View File

@@ -75,11 +75,11 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
macros.RegisterMacro(macros.MacroInVain)
dscache.Init(ctx, false)
Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache, alertRuleCache, notifyConfigCache, taskTplsCache, dsCache, ctx, promClients, userCache, userGroupCache, notifyRuleCache, notifyChannelCache, messageTemplateCache)
Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache, alertRuleCache, notifyConfigCache, taskTplsCache, dsCache, ctx, promClients, userCache, userGroupCache, notifyRuleCache, notifyChannelCache, messageTemplateCache, configCvalCache)
r := httpx.GinEngine(config.Global.RunMode, config.HTTP,
configCvalCache.PrintBodyPaths, configCvalCache.PrintAccessLog)
rt := router.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors)
rt := router.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors, config.Log.Dir)
if config.Ibex.Enable {
ibex.ServerStart(false, nil, redis, config.HTTP.APIForService.BasicAuth, config.Alert.Heartbeat, &config.CenterApi, r, nil, config.Ibex, config.HTTP.Port)
@@ -98,7 +98,7 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
func Start(alertc aconf.Alert, pushgwc pconf.Pushgw, syncStats *memsto.Stats, alertStats *astats.Stats, externalProcessors *process.ExternalProcessorsType, targetCache *memsto.TargetCacheType, busiGroupCache *memsto.BusiGroupCacheType,
alertMuteCache *memsto.AlertMuteCacheType, alertRuleCache *memsto.AlertRuleCacheType, notifyConfigCache *memsto.NotifyConfigCacheType, taskTplsCache *memsto.TaskTplCache, datasourceCache *memsto.DatasourceCacheType, ctx *ctx.Context,
promClients *prom.PromClientMap, userCache *memsto.UserCacheType, userGroupCache *memsto.UserGroupCacheType, notifyRuleCache *memsto.NotifyRuleCacheType, notifyChannelCache *memsto.NotifyChannelCacheType, messageTemplateCache *memsto.MessageTemplateCacheType) {
promClients *prom.PromClientMap, userCache *memsto.UserCacheType, userGroupCache *memsto.UserGroupCacheType, notifyRuleCache *memsto.NotifyRuleCacheType, notifyChannelCache *memsto.NotifyChannelCacheType, messageTemplateCache *memsto.MessageTemplateCacheType, configCvalCache *memsto.CvalCache) {
alertSubscribeCache := memsto.NewAlertSubscribeCache(ctx, syncStats)
recordingRuleCache := memsto.NewRecordingRuleCache(ctx, syncStats)
targetsOfAlertRulesCache := memsto.NewTargetOfAlertRuleCache(ctx, alertc.Heartbeat.EngineName, syncStats)
@@ -117,14 +117,14 @@ func Start(alertc aconf.Alert, pushgwc pconf.Pushgw, syncStats *memsto.Stats, al
eventProcessorCache := memsto.NewEventProcessorCache(ctx, syncStats)
dp := dispatch.NewDispatch(alertRuleCache, userCache, userGroupCache, alertSubscribeCache, targetCache, notifyConfigCache, taskTplsCache, notifyRuleCache, notifyChannelCache, messageTemplateCache, eventProcessorCache, alertc.Alerting, ctx, alertStats)
consumer := dispatch.NewConsumer(alertc.Alerting, ctx, dp, promClients)
dp := dispatch.NewDispatch(alertRuleCache, userCache, userGroupCache, alertSubscribeCache, targetCache, notifyConfigCache, taskTplsCache, notifyRuleCache, notifyChannelCache, messageTemplateCache, eventProcessorCache, configCvalCache, alertc.Alerting, ctx, alertStats)
consumer := dispatch.NewConsumer(alertc.Alerting, ctx, dp, promClients, alertMuteCache)
notifyRecordComsumer := sender.NewNotifyRecordConsumer(ctx)
notifyRecordConsumer := sender.NewNotifyRecordConsumer(ctx)
go dp.ReloadTpls()
go consumer.LoopConsume()
go notifyRecordComsumer.LoopConsume()
go notifyRecordConsumer.LoopConsume()
go queue.ReportQueueSize(alertStats)
go sender.ReportNotifyRecordQueueSize(alertStats)

View File

@@ -1,6 +1,7 @@
package common
import (
"encoding/json"
"fmt"
"strings"
@@ -13,6 +14,20 @@ func RuleKey(datasourceId, id int64) string {
func MatchTags(eventTagsMap map[string]string, itags []models.TagFilter) bool {
for _, filter := range itags {
// target_group in和not in优先特殊处理匹配通过则继续下一个 filter匹配失败则整组不匹配
if filter.Key == "target_group" {
// target 字段从 event.JsonTagsAndValue() 中获取的
v, ok := eventTagsMap["target"]
if !ok {
return false
}
if !targetGroupMatch(v, filter) {
return false
}
continue
}
// 普通标签按原逻辑处理
value, has := eventTagsMap[filter.Key]
if !has {
return false
@@ -35,9 +50,9 @@ func MatchGroupsName(groupName string, groupFilter []models.TagFilter) bool {
func matchTag(value string, filter models.TagFilter) bool {
switch filter.Func {
case "==":
return strings.TrimSpace(filter.Value) == strings.TrimSpace(value)
return strings.TrimSpace(fmt.Sprintf("%v", filter.Value)) == strings.TrimSpace(value)
case "!=":
return strings.TrimSpace(filter.Value) != strings.TrimSpace(value)
return strings.TrimSpace(fmt.Sprintf("%v", filter.Value)) != strings.TrimSpace(value)
case "in":
_, has := filter.Vset[value]
return has
@@ -49,6 +64,65 @@ func matchTag(value string, filter models.TagFilter) bool {
case "!~":
return !filter.Regexp.MatchString(value)
}
// unexpect func
// unexpected func
return false
}
// targetGroupMatch 处理 target_group 的特殊匹配逻辑
func targetGroupMatch(value string, filter models.TagFilter) bool {
var valueMap map[string]interface{}
if err := json.Unmarshal([]byte(value), &valueMap); err != nil {
return false
}
switch filter.Func {
case "in", "not in":
// float64 类型的 id 切片
filterValueIds, ok := filter.Value.([]interface{})
if !ok {
return false
}
filterValueIdsMap := make(map[float64]struct{})
for _, id := range filterValueIds {
filterValueIdsMap[id.(float64)] = struct{}{}
}
// float64 类型的 groupIds 切片
groupIds, ok := valueMap["group_ids"].([]interface{})
if !ok {
return false
}
// in 只要 groupIds 中有一个在 filterGroupIds 中出现,就返回 true
// not in 则相反
found := false
for _, gid := range groupIds {
if _, found = filterValueIdsMap[gid.(float64)]; found {
break
}
}
if filter.Func == "in" {
return found
}
// filter.Func == "not in"
return !found
case "=~", "!~":
// 正则满足一个就认为 matched
groupNames, ok := valueMap["group_names"].([]interface{})
if !ok {
return false
}
matched := false
for _, gname := range groupNames {
if filter.Regexp.MatchString(fmt.Sprintf("%v", gname)) {
matched = true
break
}
}
if filter.Func == "=~" {
return matched
}
// "!~": 只要有一个匹配就返回 false否则返回 true
return !matched
default:
return false
}
}

View File

@@ -8,8 +8,8 @@ import (
"time"
"github.com/ccfos/nightingale/v6/alert/aconf"
"github.com/ccfos/nightingale/v6/alert/common"
"github.com/ccfos/nightingale/v6/alert/queue"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/poster"
@@ -26,10 +26,15 @@ type Consumer struct {
alerting aconf.Alerting
ctx *ctx.Context
dispatch *Dispatch
promClients *prom.PromClientMap
dispatch *Dispatch
promClients *prom.PromClientMap
alertMuteCache *memsto.AlertMuteCacheType
}
type EventMuteHookFunc func(event *models.AlertCurEvent) bool
var EventMuteHook EventMuteHookFunc = func(event *models.AlertCurEvent) bool { return false }
func InitRegisterQueryFunc(promClients *prom.PromClientMap) {
tplx.RegisterQueryFunc(func(datasourceID int64, promql string) model.Value {
if promClients.IsNil(datasourceID) {
@@ -43,12 +48,14 @@ func InitRegisterQueryFunc(promClients *prom.PromClientMap) {
}
// 创建一个 Consumer 实例
func NewConsumer(alerting aconf.Alerting, ctx *ctx.Context, dispatch *Dispatch, promClients *prom.PromClientMap) *Consumer {
func NewConsumer(alerting aconf.Alerting, ctx *ctx.Context, dispatch *Dispatch, promClients *prom.PromClientMap, alertMuteCache *memsto.AlertMuteCacheType) *Consumer {
return &Consumer{
alerting: alerting,
ctx: ctx,
dispatch: dispatch,
promClients: promClients,
alertMuteCache: alertMuteCache,
}
}
@@ -91,12 +98,12 @@ func (e *Consumer) consumeOne(event *models.AlertCurEvent) {
e.dispatch.Astats.CounterAlertsTotal.WithLabelValues(event.Cluster, eventType, event.GroupName).Inc()
if err := event.ParseRule("rule_name"); err != nil {
logger.Warningf("ruleid:%d failed to parse rule name: %v", event.RuleId, err)
logger.Warningf("alert_eval_%d datasource_%d failed to parse rule name: %v", event.RuleId, event.DatasourceId, err)
event.RuleName = fmt.Sprintf("failed to parse rule name: %v", err)
}
if err := event.ParseRule("annotations"); err != nil {
logger.Warningf("ruleid:%d failed to parse annotations: %v", event.RuleId, err)
logger.Warningf("alert_eval_%d datasource_%d failed to parse annotations: %v", event.RuleId, event.DatasourceId, err)
event.Annotations = fmt.Sprintf("failed to parse annotations: %v", err)
event.AnnotationsJSON["error"] = event.Annotations
}
@@ -104,16 +111,12 @@ func (e *Consumer) consumeOne(event *models.AlertCurEvent) {
e.queryRecoveryVal(event)
if err := event.ParseRule("rule_note"); err != nil {
logger.Warningf("ruleid:%d failed to parse rule note: %v", event.RuleId, err)
logger.Warningf("alert_eval_%d datasource_%d failed to parse rule note: %v", event.RuleId, event.DatasourceId, err)
event.RuleNote = fmt.Sprintf("failed to parse rule note: %v", err)
}
e.persist(event)
if event.IsRecovered && event.NotifyRecovered == 0 {
return
}
e.dispatch.HandleEventNotify(event, false)
}
@@ -127,7 +130,7 @@ func (e *Consumer) persist(event *models.AlertCurEvent) {
var err error
event.Id, err = poster.PostByUrlsWithResp[int64](e.ctx, "/v1/n9e/event-persist", event)
if err != nil {
logger.Errorf("event:%+v persist err:%v", event, err)
logger.Errorf("event:%s persist err:%v", event.Hash, err)
e.dispatch.Astats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", event.DatasourceId), "persist_event", event.GroupName, fmt.Sprintf("%v", event.RuleId)).Inc()
}
return
@@ -135,7 +138,7 @@ func (e *Consumer) persist(event *models.AlertCurEvent) {
err := models.EventPersist(e.ctx, event)
if err != nil {
logger.Errorf("event%+v persist err:%v", event, err)
logger.Errorf("event:%s persist err:%v", event.Hash, err)
e.dispatch.Astats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", event.DatasourceId), "persist_event", event.GroupName, fmt.Sprintf("%v", event.RuleId)).Inc()
}
}
@@ -153,12 +156,12 @@ func (e *Consumer) queryRecoveryVal(event *models.AlertCurEvent) {
promql = strings.TrimSpace(promql)
if promql == "" {
logger.Warningf("rule_eval:%s promql is blank", getKey(event))
logger.Warningf("alert_eval_%d datasource_%d promql is blank", event.RuleId, event.DatasourceId)
return
}
if e.promClients.IsNil(event.DatasourceId) {
logger.Warningf("rule_eval:%s error reader client is nil", getKey(event))
logger.Warningf("alert_eval_%d datasource_%d error reader client is nil", event.RuleId, event.DatasourceId)
return
}
@@ -167,7 +170,7 @@ func (e *Consumer) queryRecoveryVal(event *models.AlertCurEvent) {
var warnings promsdk.Warnings
value, warnings, err := readerClient.Query(e.ctx.Ctx, promql, time.Now())
if err != nil {
logger.Errorf("rule_eval:%s promql:%s, error:%v", getKey(event), promql, err)
logger.Errorf("alert_eval_%d datasource_%d promql:%s, error:%v", event.RuleId, event.DatasourceId, promql, err)
event.AnnotationsJSON["recovery_promql_error"] = fmt.Sprintf("promql:%s error:%v", promql, err)
b, err := json.Marshal(event.AnnotationsJSON)
@@ -181,12 +184,12 @@ func (e *Consumer) queryRecoveryVal(event *models.AlertCurEvent) {
}
if len(warnings) > 0 {
logger.Errorf("rule_eval:%s promql:%s, warnings:%v", getKey(event), promql, warnings)
logger.Errorf("alert_eval_%d datasource_%d promql:%s, warnings:%v", event.RuleId, event.DatasourceId, promql, warnings)
}
anomalyPoints := models.ConvertAnomalyPoints(value)
if len(anomalyPoints) == 0 {
logger.Warningf("rule_eval:%s promql:%s, result is empty", getKey(event), promql)
logger.Warningf("alert_eval_%d datasource_%d promql:%s, result is empty", event.RuleId, event.DatasourceId, promql)
event.AnnotationsJSON["recovery_promql_error"] = fmt.Sprintf("promql:%s error:%s", promql, "result is empty")
} else {
event.AnnotationsJSON["recovery_value"] = fmt.Sprintf("%v", anomalyPoints[0].Value)
@@ -201,6 +204,3 @@ func (e *Consumer) queryRecoveryVal(event *models.AlertCurEvent) {
}
}
func getKey(event *models.AlertCurEvent) string {
return common.RuleKey(event.DatasourceId, event.RuleId)
}

View File

@@ -16,6 +16,7 @@ import (
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/alert/common"
"github.com/ccfos/nightingale/v6/alert/pipeline"
"github.com/ccfos/nightingale/v6/alert/pipeline/engine"
"github.com/ccfos/nightingale/v6/alert/sender"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
@@ -24,6 +25,17 @@ import (
"github.com/toolkits/pkg/logger"
)
var ShouldSkipNotify func(*ctx.Context, *models.AlertCurEvent, int64) bool
var SendByNotifyRule func(*ctx.Context, *memsto.UserCacheType, *memsto.UserGroupCacheType, *memsto.NotifyChannelCacheType, *memsto.CvalCache,
[]*models.AlertCurEvent, int64, *models.NotifyConfig, *models.NotifyChannelConfig, *models.MessageTemplate)
var EventProcessorCache *memsto.EventProcessorCacheType
func init() {
ShouldSkipNotify = shouldSkipNotify
SendByNotifyRule = SendNotifyRuleMessage
}
type Dispatch struct {
alertRuleCache *memsto.AlertRuleCacheType
userCache *memsto.UserCacheType
@@ -32,6 +44,7 @@ type Dispatch struct {
targetCache *memsto.TargetCacheType
notifyConfigCache *memsto.NotifyConfigCacheType
taskTplsCache *memsto.TaskTplCache
configCvalCache *memsto.CvalCache
notifyRuleCache *memsto.NotifyRuleCacheType
notifyChannelCache *memsto.NotifyChannelCacheType
@@ -45,9 +58,8 @@ type Dispatch struct {
tpls map[string]*template.Template
ExtraSenders map[string]sender.Sender
BeforeSenderHook func(*models.AlertCurEvent) bool
ctx *ctx.Context
Astats *astats.Stats
ctx *ctx.Context
Astats *astats.Stats
RwLock sync.RWMutex
}
@@ -56,7 +68,7 @@ type Dispatch struct {
func NewDispatch(alertRuleCache *memsto.AlertRuleCacheType, userCache *memsto.UserCacheType, userGroupCache *memsto.UserGroupCacheType,
alertSubscribeCache *memsto.AlertSubscribeCacheType, targetCache *memsto.TargetCacheType, notifyConfigCache *memsto.NotifyConfigCacheType,
taskTplsCache *memsto.TaskTplCache, notifyRuleCache *memsto.NotifyRuleCacheType, notifyChannelCache *memsto.NotifyChannelCacheType,
messageTemplateCache *memsto.MessageTemplateCacheType, eventProcessorCache *memsto.EventProcessorCacheType, alerting aconf.Alerting, ctx *ctx.Context, astats *astats.Stats) *Dispatch {
messageTemplateCache *memsto.MessageTemplateCacheType, eventProcessorCache *memsto.EventProcessorCacheType, configCvalCache *memsto.CvalCache, alerting aconf.Alerting, c *ctx.Context, astats *astats.Stats) *Dispatch {
notify := &Dispatch{
alertRuleCache: alertRuleCache,
userCache: userCache,
@@ -69,6 +81,7 @@ func NewDispatch(alertRuleCache *memsto.AlertRuleCacheType, userCache *memsto.Us
notifyChannelCache: notifyChannelCache,
messageTemplateCache: messageTemplateCache,
eventProcessorCache: eventProcessorCache,
configCvalCache: configCvalCache,
alerting: alerting,
@@ -77,11 +90,16 @@ func NewDispatch(alertRuleCache *memsto.AlertRuleCacheType, userCache *memsto.Us
ExtraSenders: make(map[string]sender.Sender),
BeforeSenderHook: func(*models.AlertCurEvent) bool { return true },
ctx: ctx,
ctx: c,
Astats: astats,
}
pipeline.Init()
EventProcessorCache = eventProcessorCache
// 设置通知记录回调函数
notifyChannelCache.SetNotifyRecordFunc(sender.NotifyRecord)
return notify
}
@@ -153,7 +171,7 @@ func (e *Dispatch) HandleEventWithNotifyRule(eventOrigin *models.AlertCurEvent)
// 深拷贝新的 event避免并发修改 event 冲突
eventCopy := eventOrigin.DeepCopy()
logger.Infof("notify rule ids: %v, event: %+v", notifyRuleId, eventCopy)
logger.Infof("notify rule ids: %v, event: %s", notifyRuleId, eventCopy.Hash)
notifyRule := e.notifyRuleCache.Get(notifyRuleId)
if notifyRule == nil {
continue
@@ -162,71 +180,106 @@ func (e *Dispatch) HandleEventWithNotifyRule(eventOrigin *models.AlertCurEvent)
if !notifyRule.Enable {
continue
}
eventCopy.NotifyRuleId = notifyRuleId
eventCopy.NotifyRuleName = notifyRule.Name
var processors []models.Processor
for _, pipelineConfig := range notifyRule.PipelineConfigs {
if !pipelineConfig.Enable {
continue
}
eventPipeline := e.eventProcessorCache.Get(pipelineConfig.PipelineId)
if eventPipeline == nil {
logger.Warningf("notify_id: %d, event:%+v, processor not found", notifyRuleId, eventCopy)
continue
}
if !pipelineApplicable(eventPipeline, eventCopy) {
logger.Debugf("notify_id: %d, event:%+v, pipeline_id: %d, not applicable", notifyRuleId, eventCopy, pipelineConfig.PipelineId)
continue
}
processors = append(processors, e.eventProcessorCache.GetProcessorsById(pipelineConfig.PipelineId)...)
}
for _, processor := range processors {
logger.Infof("before processor notify_id: %d, event:%+v, processor:%+v", notifyRuleId, eventCopy, processor)
eventCopy = processor.Process(e.ctx, eventCopy)
logger.Infof("after processor notify_id: %d, event:%+v, processor:%+v", notifyRuleId, eventCopy, processor)
if eventCopy == nil {
logger.Warningf("notify_id: %d, event:%+v, processor:%+v, event is nil", notifyRuleId, eventCopy, processor)
break
}
}
if eventCopy == nil {
// 如果 eventCopy 为 nil说明 eventCopy 被 processor drop 掉了, 不再发送通知
eventCopy = HandleEventPipeline(notifyRule.PipelineConfigs, eventOrigin, eventCopy, e.eventProcessorCache, e.ctx, notifyRuleId, "notify_rule")
if ShouldSkipNotify(e.ctx, eventCopy, notifyRuleId) {
logger.Infof("notify_id: %d, event:%s, should skip notify", notifyRuleId, eventCopy.Hash)
continue
}
// notify
for i := range notifyRule.NotifyConfigs {
if !NotifyRuleApplicable(&notifyRule.NotifyConfigs[i], eventCopy) {
err := NotifyRuleMatchCheck(&notifyRule.NotifyConfigs[i], eventCopy)
if err != nil {
logger.Errorf("notify_id: %d, event:%s, channel_id:%d, template_id: %d, notify_config:%+v, err:%v", notifyRuleId, eventCopy.Hash, notifyRule.NotifyConfigs[i].ChannelID, notifyRule.NotifyConfigs[i].TemplateID, notifyRule.NotifyConfigs[i], err)
continue
}
notifyChannel := e.notifyChannelCache.Get(notifyRule.NotifyConfigs[i].ChannelID)
messageTemplate := e.messageTemplateCache.Get(notifyRule.NotifyConfigs[i].TemplateID)
if notifyChannel == nil {
sender.NotifyRecord(e.ctx, []*models.AlertCurEvent{eventCopy}, notifyRuleId, fmt.Sprintf("notify_channel_id:%d", notifyRule.NotifyConfigs[i].ChannelID), "", "", errors.New("notify_channel not found"))
logger.Warningf("notify_id: %d, event:%+v, channel_id:%d, template_id: %d, notify_channel not found", notifyRuleId, eventCopy, notifyRule.NotifyConfigs[i].ChannelID, notifyRule.NotifyConfigs[i].TemplateID)
logger.Warningf("notify_id: %d, event:%s, channel_id:%d, template_id: %d, notify_channel not found", notifyRuleId, eventCopy.Hash, notifyRule.NotifyConfigs[i].ChannelID, notifyRule.NotifyConfigs[i].TemplateID)
continue
}
if notifyChannel.RequestType != "flashduty" && messageTemplate == nil {
logger.Warningf("notify_id: %d, channel_name: %v, event:%+v, template_id: %d, message_template not found", notifyRuleId, notifyChannel.Ident, eventCopy, notifyRule.NotifyConfigs[i].TemplateID)
if notifyChannel.RequestType != "flashduty" && notifyChannel.RequestType != "pagerduty" && messageTemplate == nil {
logger.Warningf("notify_id: %d, channel_name: %v, event:%s, template_id: %d, message_template not found", notifyRuleId, notifyChannel.Ident, eventCopy.Hash, notifyRule.NotifyConfigs[i].TemplateID)
sender.NotifyRecord(e.ctx, []*models.AlertCurEvent{eventCopy}, notifyRuleId, notifyChannel.Name, "", "", errors.New("message_template not found"))
continue
}
// todo go send
// todo 聚合 event
go e.sendV2([]*models.AlertCurEvent{eventCopy}, notifyRuleId, &notifyRule.NotifyConfigs[i], notifyChannel, messageTemplate)
go SendByNotifyRule(e.ctx, e.userCache, e.userGroupCache, e.notifyChannelCache, e.configCvalCache, []*models.AlertCurEvent{eventCopy}, notifyRuleId, &notifyRule.NotifyConfigs[i], notifyChannel, messageTemplate)
}
}
}
}
func pipelineApplicable(pipeline *models.EventPipeline, event *models.AlertCurEvent) bool {
func shouldSkipNotify(ctx *ctx.Context, event *models.AlertCurEvent, notifyRuleId int64) bool {
if event == nil {
// 如果 eventCopy 为 nil说明 eventCopy 被 processor drop 掉了, 不再发送通知
return true
}
if event.IsRecovered && event.NotifyRecovered == 0 {
// 如果 eventCopy 是恢复事件,且 NotifyRecovered 为 0则不发送通知
return true
}
return false
}
func HandleEventPipeline(pipelineConfigs []models.PipelineConfig, eventOrigin, event *models.AlertCurEvent, eventProcessorCache *memsto.EventProcessorCacheType, ctx *ctx.Context, id int64, from string) *models.AlertCurEvent {
workflowEngine := engine.NewWorkflowEngine(ctx)
for _, pipelineConfig := range pipelineConfigs {
if !pipelineConfig.Enable {
continue
}
eventPipeline := eventProcessorCache.Get(pipelineConfig.PipelineId)
if eventPipeline == nil {
logger.Warningf("processor_by_%s_id:%d pipeline_id:%d, event pipeline not found, event: %s", from, id, pipelineConfig.PipelineId, event.Hash)
continue
}
if !PipelineApplicable(eventPipeline, event) {
logger.Debugf("processor_by_%s_id:%d pipeline_id:%d, event pipeline not applicable, event: %s", from, id, pipelineConfig.PipelineId, event.Hash)
continue
}
// 统一使用工作流引擎执行(兼容线性模式和工作流模式)
triggerCtx := &models.WorkflowTriggerContext{
Mode: models.TriggerModeEvent,
TriggerBy: from + "_" + strconv.FormatInt(id, 10),
}
resultEvent, result, err := workflowEngine.Execute(eventPipeline, event, triggerCtx)
if err != nil {
logger.Errorf("processor_by_%s_id:%d pipeline_id:%d, pipeline execute error: %v", from, id, pipelineConfig.PipelineId, err)
continue
}
if resultEvent == nil {
logger.Infof("processor_by_%s_id:%d pipeline_id:%d, event dropped, event: %s", from, id, pipelineConfig.PipelineId, eventOrigin.Hash)
if from == "notify_rule" {
sender.NotifyRecord(ctx, []*models.AlertCurEvent{eventOrigin}, id, "", "", result.Message, fmt.Errorf("processor_by_%s_id:%d pipeline_id:%d, drop by pipeline", from, id, pipelineConfig.PipelineId))
}
return nil
}
event = resultEvent
logger.Infof("processor_by_%s_id:%d pipeline_id:%d, pipeline executed, status:%s, message:%s", from, id, pipelineConfig.PipelineId, result.Status, result.Message)
}
event.FE2DB()
event.FillTagsMap()
return event
}
func PipelineApplicable(pipeline *models.EventPipeline, event *models.AlertCurEvent) bool {
if pipeline == nil {
return true
}
@@ -237,15 +290,18 @@ func pipelineApplicable(pipeline *models.EventPipeline, event *models.AlertCurEv
tagMatch := true
if len(pipeline.LabelFilters) > 0 {
for i := range pipeline.LabelFilters {
if pipeline.LabelFilters[i].Func == "" {
pipeline.LabelFilters[i].Func = pipeline.LabelFilters[i].Op
// Deep copy to avoid concurrent map writes on cached objects
labelFiltersCopy := make([]models.TagFilter, len(pipeline.LabelFilters))
copy(labelFiltersCopy, pipeline.LabelFilters)
for i := range labelFiltersCopy {
if labelFiltersCopy[i].Func == "" {
labelFiltersCopy[i].Func = labelFiltersCopy[i].Op
}
}
tagFilters, err := models.ParseTagFilter(pipeline.LabelFilters)
tagFilters, err := models.ParseTagFilter(labelFiltersCopy)
if err != nil {
logger.Errorf("pipeline applicable failed to parse tag filter: %v event:%+v pipeline:%+v", err, event, pipeline)
logger.Errorf("pipeline applicable failed to parse tag filter: %v event:%s pipeline:%+v", err, event.Hash, pipeline)
return false
}
tagMatch = common.MatchTags(event.TagsMap, tagFilters)
@@ -253,9 +309,13 @@ func pipelineApplicable(pipeline *models.EventPipeline, event *models.AlertCurEv
attributesMatch := true
if len(pipeline.AttrFilters) > 0 {
tagFilters, err := models.ParseTagFilter(pipeline.AttrFilters)
// Deep copy to avoid concurrent map writes on cached objects
attrFiltersCopy := make([]models.TagFilter, len(pipeline.AttrFilters))
copy(attrFiltersCopy, pipeline.AttrFilters)
tagFilters, err := models.ParseTagFilter(attrFiltersCopy)
if err != nil {
logger.Errorf("pipeline applicable failed to parse tag filter: %v event:%+v pipeline:%+v err:%v", tagFilters, event, pipeline, err)
logger.Errorf("pipeline applicable failed to parse tag filter: %v event:%s pipeline:%+v err:%v", tagFilters, event.Hash, pipeline, err)
return false
}
@@ -265,7 +325,7 @@ func pipelineApplicable(pipeline *models.EventPipeline, event *models.AlertCurEv
return tagMatch && attributesMatch
}
func NotifyRuleApplicable(notifyConfig *models.NotifyConfig, event *models.AlertCurEvent) bool {
func NotifyRuleMatchCheck(notifyConfig *models.NotifyConfig, event *models.AlertCurEvent) error {
tm := time.Unix(event.TriggerTime, 0)
triggerTime := tm.Format("15:04")
triggerWeek := int(tm.Weekday())
@@ -317,6 +377,10 @@ func NotifyRuleApplicable(notifyConfig *models.NotifyConfig, event *models.Alert
}
}
if !timeMatch {
return fmt.Errorf("event time not match time filter")
}
severityMatch := false
for i := range notifyConfig.Severities {
if notifyConfig.Severities[i] == event.Severity {
@@ -324,39 +388,60 @@ func NotifyRuleApplicable(notifyConfig *models.NotifyConfig, event *models.Alert
}
}
if !severityMatch {
return fmt.Errorf("event severity not match severity filter")
}
tagMatch := true
if len(notifyConfig.LabelKeys) > 0 {
for i := range notifyConfig.LabelKeys {
if notifyConfig.LabelKeys[i].Func == "" {
notifyConfig.LabelKeys[i].Func = notifyConfig.LabelKeys[i].Op
// Deep copy to avoid concurrent map writes on cached objects
labelKeysCopy := make([]models.TagFilter, len(notifyConfig.LabelKeys))
copy(labelKeysCopy, notifyConfig.LabelKeys)
for i := range labelKeysCopy {
if labelKeysCopy[i].Func == "" {
labelKeysCopy[i].Func = labelKeysCopy[i].Op
}
}
tagFilters, err := models.ParseTagFilter(notifyConfig.LabelKeys)
tagFilters, err := models.ParseTagFilter(labelKeysCopy)
if err != nil {
logger.Errorf("notify send failed to parse tag filter: %v event:%+v notify_config:%+v", err, event, notifyConfig)
return false
logger.Errorf("notify send failed to parse tag filter: %v event:%s notify_config:%+v", err, event.Hash, notifyConfig)
return fmt.Errorf("failed to parse tag filter: %v", err)
}
tagMatch = common.MatchTags(event.TagsMap, tagFilters)
}
if !tagMatch {
return fmt.Errorf("event tag not match tag filter")
}
attributesMatch := true
if len(notifyConfig.Attributes) > 0 {
tagFilters, err := models.ParseTagFilter(notifyConfig.Attributes)
// Deep copy to avoid concurrent map writes on cached objects
attributesCopy := make([]models.TagFilter, len(notifyConfig.Attributes))
copy(attributesCopy, notifyConfig.Attributes)
tagFilters, err := models.ParseTagFilter(attributesCopy)
if err != nil {
logger.Errorf("notify send failed to parse tag filter: %v event:%+v notify_config:%+v err:%v", tagFilters, event, notifyConfig, err)
return false
logger.Errorf("notify send failed to parse tag filter: %v event:%s notify_config:%+v err:%v", tagFilters, event.Hash, notifyConfig, err)
return fmt.Errorf("failed to parse tag filter: %v", err)
}
attributesMatch = common.MatchTags(event.JsonTagsAndValue(), tagFilters)
}
logger.Infof("notify send timeMatch:%v severityMatch:%v tagMatch:%v attributesMatch:%v event:%+v notify_config:%+v", timeMatch, severityMatch, tagMatch, attributesMatch, event, notifyConfig)
return timeMatch && severityMatch && tagMatch && attributesMatch
if !attributesMatch {
return fmt.Errorf("event attributes not match attributes filter")
}
logger.Infof("notify send timeMatch:%v severityMatch:%v tagMatch:%v attributesMatch:%v event:%s notify_config:%+v", timeMatch, severityMatch, tagMatch, attributesMatch, event.Hash, notifyConfig)
return nil
}
func GetNotifyConfigParams(notifyConfig *models.NotifyConfig, contactKey string, userCache *memsto.UserCacheType, userGroupCache *memsto.UserGroupCacheType) ([]string, []int64, map[string]string) {
func GetNotifyConfigParams(notifyConfig *models.NotifyConfig, contactKey string, userCache *memsto.UserCacheType, userGroupCache *memsto.UserGroupCacheType) ([]string, []int64, []string, map[string]string) {
customParams := make(map[string]string)
var flashDutyChannelIDs []int64
var pagerDutyRoutingKeys []string
var userInfoParams models.CustomParams
for key, value := range notifyConfig.Params {
@@ -374,13 +459,26 @@ func GetNotifyConfigParams(notifyConfig *models.NotifyConfig, contactKey string,
}
}
}
case "pagerduty_integration_keys", "pagerduty_integration_ids":
if key == "pagerduty_integration_ids" {
// 不处理ids直接跳过这个字段只给前端标记用
continue
}
if data, err := json.Marshal(value); err == nil {
var keys []string
if json.Unmarshal(data, &keys) == nil {
pagerDutyRoutingKeys = keys
break
}
}
default:
// 避免直接 value.(string) 导致 panic支持多种类型并统一为字符串
customParams[key] = value.(string)
}
}
if len(userInfoParams.UserIDs) == 0 && len(userInfoParams.UserGroupIDs) == 0 {
return []string{}, flashDutyChannelIDs, customParams
return []string{}, flashDutyChannelIDs, pagerDutyRoutingKeys, customParams
}
userIds := make([]int64, 0)
@@ -416,18 +514,20 @@ func GetNotifyConfigParams(notifyConfig *models.NotifyConfig, contactKey string,
visited[user.Id] = true
}
return sendtos, flashDutyChannelIDs, customParams
return sendtos, flashDutyChannelIDs, pagerDutyRoutingKeys, customParams
}
func (e *Dispatch) sendV2(events []*models.AlertCurEvent, notifyRuleId int64, notifyConfig *models.NotifyConfig, notifyChannel *models.NotifyChannelConfig, messageTemplate *models.MessageTemplate) {
func SendNotifyRuleMessage(ctx *ctx.Context, userCache *memsto.UserCacheType, userGroupCache *memsto.UserGroupCacheType, notifyChannelCache *memsto.NotifyChannelCacheType, configCvalCache *memsto.CvalCache,
events []*models.AlertCurEvent, notifyRuleId int64, notifyConfig *models.NotifyConfig, notifyChannel *models.NotifyChannelConfig, messageTemplate *models.MessageTemplate) {
if len(events) == 0 {
logger.Errorf("notify_id: %d events is empty", notifyRuleId)
return
}
siteInfo := configCvalCache.GetSiteInfo()
tplContent := make(map[string]interface{})
if notifyChannel.RequestType != "flashduty" {
tplContent = messageTemplate.RenderEvent(events)
tplContent = messageTemplate.RenderEvent(events, siteInfo.SiteUrl)
}
var contactKey string
@@ -435,10 +535,7 @@ func (e *Dispatch) sendV2(events []*models.AlertCurEvent, notifyRuleId int64, no
contactKey = notifyChannel.ParamConfig.UserInfo.ContactKey
}
sendtos, flashDutyChannelIDs, customParams := GetNotifyConfigParams(notifyConfig, contactKey, e.userCache, e.userGroupCache)
e.Astats.GaugeNotifyRecordQueueSize.Inc()
defer e.Astats.GaugeNotifyRecordQueueSize.Dec()
sendtos, flashDutyChannelIDs, pagerdutyRoutingKeys, customParams := GetNotifyConfigParams(notifyConfig, contactKey, userCache, userGroupCache)
switch notifyChannel.RequestType {
case "flashduty":
@@ -447,45 +544,53 @@ func (e *Dispatch) sendV2(events []*models.AlertCurEvent, notifyRuleId int64, no
}
for i := range flashDutyChannelIDs {
respBody, err := notifyChannel.SendFlashDuty(events, flashDutyChannelIDs[i], e.notifyChannelCache.GetHttpClient(notifyChannel.ID))
logger.Infof("notify_id: %d, channel_name: %v, event:%+v, IntegrationUrl: %v dutychannel_id: %v, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0], notifyChannel.RequestConfig.FlashDutyRequestConfig.IntegrationUrl, flashDutyChannelIDs[i], respBody, err)
sender.NotifyRecord(e.ctx, events, notifyRuleId, notifyChannel.Name, strconv.FormatInt(flashDutyChannelIDs[i], 10), respBody, err)
start := time.Now()
respBody, err := notifyChannel.SendFlashDuty(events, flashDutyChannelIDs[i], notifyChannelCache.GetHttpClient(notifyChannel.ID))
respBody = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), respBody)
logger.Infof("duty_sender notify_id: %d, channel_name: %v, event:%s, IntegrationUrl: %v dutychannel_id: %v, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0].Hash, notifyChannel.RequestConfig.FlashDutyRequestConfig.IntegrationUrl, flashDutyChannelIDs[i], respBody, err)
sender.NotifyRecord(ctx, events, notifyRuleId, notifyChannel.Name, strconv.FormatInt(flashDutyChannelIDs[i], 10), respBody, err)
}
return
case "pagerduty":
for _, routingKey := range pagerdutyRoutingKeys {
start := time.Now()
respBody, err := notifyChannel.SendPagerDuty(events, routingKey, siteInfo.SiteUrl, notifyChannelCache.GetHttpClient(notifyChannel.ID))
respBody = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), respBody)
logger.Infof("pagerduty_sender notify_id: %d, channel_name: %v, event:%s, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0].Hash, respBody, err)
sender.NotifyRecord(ctx, events, notifyRuleId, notifyChannel.Name, "", respBody, err)
}
case "http":
if e.notifyChannelCache.HttpConcurrencyAdd(notifyChannel.ID) {
defer e.notifyChannelCache.HttpConcurrencyDone(notifyChannel.ID)
}
if notifyChannel.RequestConfig == nil {
logger.Warningf("notify_id: %d, channel_name: %v, event:%+v, request config not found", notifyRuleId, notifyChannel.Name, events[0])
// 使用队列模式处理 http 通知
// 创建通知任务
task := &memsto.NotifyTask{
Events: events,
NotifyRuleId: notifyRuleId,
NotifyChannel: notifyChannel,
TplContent: tplContent,
CustomParams: customParams,
Sendtos: sendtos,
}
if notifyChannel.RequestConfig.HTTPRequestConfig == nil {
logger.Warningf("notify_id: %d, channel_name: %v, event:%+v, http request config not found", notifyRuleId, notifyChannel.Name, events[0])
}
if NeedBatchContacts(notifyChannel.RequestConfig.HTTPRequestConfig) || len(sendtos) == 0 {
resp, err := notifyChannel.SendHTTP(events, tplContent, customParams, sendtos, e.notifyChannelCache.GetHttpClient(notifyChannel.ID))
logger.Infof("notify_id: %d, channel_name: %v, event:%+v, tplContent:%s, customParams:%v, userInfo:%+v, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0], tplContent, customParams, sendtos, resp, err)
sender.NotifyRecord(e.ctx, events, notifyRuleId, notifyChannel.Name, getSendTarget(customParams, sendtos), resp, err)
} else {
for i := range sendtos {
resp, err := notifyChannel.SendHTTP(events, tplContent, customParams, []string{sendtos[i]}, e.notifyChannelCache.GetHttpClient(notifyChannel.ID))
logger.Infof("notify_id: %d, channel_name: %v, event:%+v, tplContent:%s, customParams:%v, userInfo:%+v, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0], tplContent, customParams, sendtos[i], resp, err)
sender.NotifyRecord(e.ctx, events, notifyRuleId, notifyChannel.Name, getSendTarget(customParams, []string{sendtos[i]}), resp, err)
}
// 将任务加入队列
success := notifyChannelCache.EnqueueNotifyTask(task)
if !success {
logger.Errorf("failed to enqueue notify task for channel %d, notify_id: %d", notifyChannel.ID, notifyRuleId)
// 如果入队失败,记录错误通知
sender.NotifyRecord(ctx, events, notifyRuleId, notifyChannel.Name, getSendTarget(customParams, sendtos), "", errors.New("failed to enqueue notify task, queue is full"))
}
case "smtp":
notifyChannel.SendEmail(notifyRuleId, events, tplContent, sendtos, e.notifyChannelCache.GetSmtpClient(notifyChannel.ID))
notifyChannel.SendEmail(notifyRuleId, events, tplContent, sendtos, notifyChannelCache.GetSmtpClient(notifyChannel.ID))
case "script":
start := time.Now()
target, res, err := notifyChannel.SendScript(events, tplContent, customParams, sendtos)
logger.Infof("notify_id: %d, channel_name: %v, event:%+v, tplContent:%s, customParams:%v, target:%s, res:%s, err:%v", notifyRuleId, notifyChannel.Name, events[0], tplContent, customParams, target, res, err)
sender.NotifyRecord(e.ctx, events, notifyRuleId, notifyChannel.Name, target, res, err)
res = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), res)
logger.Infof("script_sender notify_id: %d, channel_name: %v, event:%s, tplContent:%s, customParams:%v, target:%s, res:%s, err:%v", notifyRuleId, notifyChannel.Name, events[0].Hash, tplContent, customParams, target, res, err)
sender.NotifyRecord(ctx, events, notifyRuleId, notifyChannel.Name, target, res, err)
default:
logger.Warningf("notify_id: %d, channel_name: %v, event:%+v send type not found", notifyRuleId, notifyChannel.Name, events[0])
logger.Warningf("notify_id: %d, channel_name: %v, event:%s send type not found", notifyRuleId, notifyChannel.Name, events[0].Hash)
}
}
@@ -498,6 +603,11 @@ func NeedBatchContacts(requestConfig *models.HTTPRequestConfig) bool {
// event: 告警/恢复事件
// isSubscribe: 告警事件是否由subscribe的配置产生
func (e *Dispatch) HandleEventNotify(event *models.AlertCurEvent, isSubscribe bool) {
go e.HandleEventWithNotifyRule(event)
if event.IsRecovered && event.NotifyRecovered == 0 {
return
}
rule := e.alertRuleCache.Get(event.RuleId)
if rule == nil {
return
@@ -530,7 +640,6 @@ func (e *Dispatch) HandleEventNotify(event *models.AlertCurEvent, isSubscribe bo
notifyTarget.AndMerge(handler(rule, event, notifyTarget, e))
}
go e.HandleEventWithNotifyRule(event)
go e.Send(rule, event, notifyTarget, isSubscribe)
// 如果是不是订阅规则出现的event, 则需要处理订阅规则的event
@@ -570,6 +679,10 @@ func (e *Dispatch) handleSub(sub *models.AlertSubscribe, event models.AlertCurEv
return
}
if !sub.MatchCate(event.Cate) {
return
}
if !common.MatchTags(event.TagsMap, sub.ITags) {
return
}
@@ -621,7 +734,7 @@ func (e *Dispatch) Send(rule *models.AlertRule, event *models.AlertCurEvent, not
event = msgCtx.Events[0]
}
logger.Debugf("send to channel:%s event:%+v users:%+v", channel, event, msgCtx.Users)
logger.Debugf("send to channel:%s event:%s users:%+v", channel, event.Hash, msgCtx.Users)
s.Send(msgCtx)
}
}
@@ -720,12 +833,12 @@ func (e *Dispatch) HandleIbex(rule *models.AlertRule, event *models.AlertCurEven
if len(t.Host) == 0 {
sender.CallIbex(e.ctx, t.TplId, event.TargetIdent,
e.taskTplsCache, e.targetCache, e.userCache, event)
e.taskTplsCache, e.targetCache, e.userCache, event, "")
continue
}
for _, host := range t.Host {
sender.CallIbex(e.ctx, t.TplId, host,
e.taskTplsCache, e.targetCache, e.userCache, event)
e.taskTplsCache, e.targetCache, e.userCache, event, "")
}
}
}

View File

@@ -18,17 +18,18 @@ func LogEvent(event *models.AlertCurEvent, location string, err ...error) {
}
logger.Infof(
"event(%s %s) %s: rule_id=%d sub_id:%d notify_rule_ids:%v cluster:%s %v%s@%d %s",
"alert_eval_%d event(%s %s) %s: sub_id:%d notify_rule_ids:%v cluster:%s %v%s@%d last_eval_time:%d %s",
event.RuleId,
event.Hash,
status,
location,
event.RuleId,
event.SubRuleId,
event.NotifyRuleIds,
event.Cluster,
event.TagsJSON,
event.TriggerValue,
event.TriggerTime,
event.LastEvalTime,
message,
)
}

View File

@@ -101,17 +101,17 @@ func (s *Scheduler) syncAlertRules() {
}
ds := s.datasourceCache.GetById(dsId)
if ds == nil {
logger.Debugf("datasource %d not found", dsId)
logger.Debugf("alert_eval_%d datasource %d not found", rule.Id, dsId)
continue
}
if ds.PluginType != ruleType {
logger.Debugf("datasource %d category is %s not %s", dsId, ds.PluginType, ruleType)
logger.Debugf("alert_eval_%d datasource %d category is %s not %s", rule.Id, dsId, ds.PluginType, ruleType)
continue
}
if ds.Status != "enabled" {
logger.Debugf("datasource %d status is %s", dsId, ds.Status)
logger.Debugf("alert_eval_%d datasource %d status is %s", rule.Id, dsId, ds.Status)
continue
}
processor := process.NewProcessor(s.aconf.Heartbeat.EngineName, rule, dsId, s.alertRuleCache, s.targetCache, s.targetsOfAlertRuleCache, s.busiGroupCache, s.alertMuteCache, s.datasourceCache, s.ctx, s.stats)
@@ -134,12 +134,12 @@ func (s *Scheduler) syncAlertRules() {
for _, dsId := range dsIds {
ds := s.datasourceCache.GetById(dsId)
if ds == nil {
logger.Debugf("datasource %d not found", dsId)
logger.Debugf("alert_eval_%d datasource %d not found", rule.Id, dsId)
continue
}
if ds.Status != "enabled" {
logger.Debugf("datasource %d status is %s", dsId, ds.Status)
logger.Debugf("alert_eval_%d datasource %d status is %s", rule.Id, dsId, ds.Status)
continue
}
processor := process.NewProcessor(s.aconf.Heartbeat.EngineName, rule, dsId, s.alertRuleCache, s.targetCache, s.targetsOfAlertRuleCache, s.busiGroupCache, s.alertMuteCache, s.datasourceCache, s.ctx, s.stats)

View File

@@ -11,6 +11,7 @@ import (
"strconv"
"strings"
"sync"
"text/template"
"time"
"github.com/ccfos/nightingale/v6/alert/astats"
@@ -24,6 +25,7 @@ import (
"github.com/ccfos/nightingale/v6/pkg/poster"
promsdk "github.com/ccfos/nightingale/v6/pkg/prom"
promql2 "github.com/ccfos/nightingale/v6/pkg/promql"
"github.com/ccfos/nightingale/v6/pkg/tplx"
"github.com/ccfos/nightingale/v6/pkg/unit"
"github.com/ccfos/nightingale/v6/prom"
"github.com/prometheus/common/model"
@@ -60,6 +62,7 @@ const (
CHECK_QUERY = "check_query_config"
GET_CLIENT = "get_client"
QUERY_DATA = "query_data"
EXEC_TEMPLATE = "exec_template"
)
const (
@@ -106,7 +109,7 @@ func NewAlertRuleWorker(rule *models.AlertRule, datasourceId int64, Processor *p
})
if err != nil {
logger.Errorf("alert rule %s add cron pattern error: %v", arw.Key(), err)
logger.Errorf("alert_eval_%d datasource_%d add cron pattern error: %v", arw.Rule.Id, arw.DatasourceId, err)
}
Processor.ScheduleEntry = arw.Scheduler.Entry(entryID)
@@ -144,14 +147,24 @@ func (arw *AlertRuleWorker) Start() {
}
func (arw *AlertRuleWorker) Eval() {
logger.Infof("eval:%s started", arw.Key())
begin := time.Now()
var message string
defer func() {
if len(message) == 0 {
logger.Infof("alert_eval_%d datasource_%d finished, duration:%v", arw.Rule.Id, arw.DatasourceId, time.Since(begin))
} else {
logger.Warningf("alert_eval_%d datasource_%d finished, duration:%v, message:%s", arw.Rule.Id, arw.DatasourceId, time.Since(begin), message)
}
}()
if arw.Processor.PromEvalInterval == 0 {
arw.Processor.PromEvalInterval = getPromEvalInterval(arw.Processor.ScheduleEntry.Schedule)
}
cachedRule := arw.Rule
if cachedRule == nil {
// logger.Errorf("rule_eval:%s Rule not found", arw.Key())
message = "rule not found"
return
}
arw.Processor.Stats.CounterRuleEval.WithLabelValues().Inc()
@@ -176,12 +189,12 @@ func (arw *AlertRuleWorker) Eval() {
}
if err != nil {
logger.Errorf("rule_eval:%s get anomaly point err:%s", arw.Key(), err.Error())
message = fmt.Sprintf("failed to get anomaly points: %v", err)
return
}
if arw.Processor == nil {
logger.Warningf("rule_eval:%s Processor is nil", arw.Key())
message = "processor is nil"
return
}
@@ -223,7 +236,7 @@ func (arw *AlertRuleWorker) Eval() {
}
func (arw *AlertRuleWorker) Stop() {
logger.Infof("rule_eval %s stopped", arw.Key())
logger.Infof("alert_eval_%d datasource_%d stopped", arw.Rule.Id, arw.DatasourceId)
close(arw.Quit)
c := arw.Scheduler.Stop()
<-c.Done()
@@ -239,7 +252,7 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
var rule *models.PromRuleConfig
if err := json.Unmarshal([]byte(ruleConfig), &rule); err != nil {
logger.Errorf("rule_eval:%s rule_config:%s, error:%v", arw.Key(), ruleConfig, err)
logger.Errorf("alert_eval_%d datasource_%d rule_config:%s, error:%v", arw.Rule.Id, arw.DatasourceId, ruleConfig, err)
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_RULE_CONFIG, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
fmt.Sprintf("%v", arw.Rule.Id),
@@ -250,7 +263,7 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
}
if rule == nil {
logger.Errorf("rule_eval:%s rule_config:%s, error:rule is nil", arw.Key(), ruleConfig)
logger.Errorf("alert_eval_%d datasource_%d rule_config:%s, error:rule is nil", arw.Rule.Id, arw.DatasourceId, ruleConfig)
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_RULE_CONFIG, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
fmt.Sprintf("%v", arw.Rule.Id),
@@ -265,7 +278,7 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
readerClient := arw.PromClients.GetCli(arw.DatasourceId)
if readerClient == nil {
logger.Warningf("rule_eval:%s error reader client is nil", arw.Key())
logger.Warningf("alert_eval_%d datasource_%d error reader client is nil", arw.Rule.Id, arw.DatasourceId)
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_CLIENT, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
fmt.Sprintf("%v", arw.Rule.Id),
@@ -275,7 +288,7 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
continue
}
if query.VarEnabled {
if query.VarEnabled && strings.Contains(query.PromQl, "$") {
var anomalyPoints []models.AnomalyPoint
if hasLabelLossAggregator(query) || notExactMatch(query) {
// 若有聚合函数或非精确匹配则需要先填充变量然后查询,这个方式效率较低
@@ -301,13 +314,13 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
// 无变量
promql := strings.TrimSpace(query.PromQl)
if promql == "" {
logger.Warningf("rule_eval:%s promql is blank", arw.Key())
logger.Warningf("alert_eval_%d datasource_%d promql is blank", arw.Rule.Id, arw.DatasourceId)
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), CHECK_QUERY, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
continue
}
if arw.PromClients.IsNil(arw.DatasourceId) {
logger.Warningf("rule_eval:%s error reader client is nil", arw.Key())
logger.Warningf("alert_eval_%d datasource_%d error reader client is nil", arw.Rule.Id, arw.DatasourceId)
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_CLIENT, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
continue
}
@@ -316,7 +329,7 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
arw.Processor.Stats.CounterQueryDataTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId), fmt.Sprintf("%d", arw.Rule.Id)).Inc()
value, warnings, err := readerClient.Query(context.Background(), promql, time.Now())
if err != nil {
logger.Errorf("rule_eval:%s promql:%s, error:%v", arw.Key(), promql, err)
logger.Errorf("alert_eval_%d datasource_%d promql:%s, error:%v", arw.Rule.Id, arw.DatasourceId, promql, err)
arw.Processor.Stats.CounterQueryDataErrorTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId)).Inc()
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), QUERY_DATA, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
@@ -328,12 +341,12 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
}
if len(warnings) > 0 {
logger.Errorf("rule_eval:%s promql:%s, warnings:%v", arw.Key(), promql, warnings)
logger.Errorf("alert_eval_%d datasource_%d promql:%s, warnings:%v", arw.Rule.Id, arw.DatasourceId, promql, warnings)
arw.Processor.Stats.CounterQueryDataErrorTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId)).Inc()
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), QUERY_DATA, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
}
logger.Infof("rule_eval:%s query:%+v, value:%v", arw.Key(), query, value)
logger.Infof("alert_eval_%d datasource_%d query:%+v, value:%v", arw.Rule.Id, arw.DatasourceId, query, value)
points := models.ConvertAnomalyPoints(value)
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
fmt.Sprintf("%v", arw.Rule.Id),
@@ -427,14 +440,14 @@ func (arw *AlertRuleWorker) VarFillingAfterQuery(query models.PromQuery, readerC
arw.Processor.Stats.CounterQueryDataTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId), fmt.Sprintf("%d", arw.Rule.Id)).Inc()
value, _, err := readerClient.Query(context.Background(), curQuery, time.Now())
if err != nil {
logger.Errorf("rule_eval:%s, promql:%s, error:%v", arw.Key(), curQuery, err)
logger.Errorf("alert_eval_%d datasource_%d promql:%s, error:%v", arw.Rule.Id, arw.DatasourceId, curQuery, err)
continue
}
seqVals := getSamples(value)
// 得到参数变量的所有组合
paramPermutation, err := arw.getParamPermutation(param, ParamKeys, varToLabel, query.PromQl, readerClient)
if err != nil {
logger.Errorf("rule_eval:%s, paramPermutation error:%v", arw.Key(), err)
logger.Errorf("alert_eval_%d datasource_%d paramPermutation error:%v", arw.Rule.Id, arw.DatasourceId, err)
continue
}
// 判断哪些参数值符合条件
@@ -567,14 +580,14 @@ func (arw *AlertRuleWorker) getParamPermutation(paramVal map[string]models.Param
case "host":
hostIdents, err := arw.getHostIdents(paramQuery)
if err != nil {
logger.Errorf("rule_eval:%s, fail to get host idents, error:%v", arw.Key(), err)
logger.Errorf("alert_eval_%d datasource_%d fail to get host idents, error:%v", arw.Rule.Id, arw.DatasourceId, err)
break
}
params = hostIdents
case "device":
deviceIdents, err := arw.getDeviceIdents(paramQuery)
if err != nil {
logger.Errorf("rule_eval:%s, fail to get device idents, error:%v", arw.Key(), err)
logger.Errorf("alert_eval_%d datasource_%d fail to get device idents, error:%v", arw.Rule.Id, arw.DatasourceId, err)
break
}
params = deviceIdents
@@ -583,12 +596,12 @@ func (arw *AlertRuleWorker) getParamPermutation(paramVal map[string]models.Param
var query []string
err := json.Unmarshal(q, &query)
if err != nil {
logger.Errorf("query:%s fail to unmarshalling into string slice, error:%v", paramQuery.Query, err)
logger.Errorf("alert_eval_%d datasource_%d query:%s fail to unmarshalling into string slice, error:%v", arw.Rule.Id, arw.DatasourceId, paramQuery.Query, err)
}
if len(query) == 0 {
paramsKeyAllLabel, err := getParamKeyAllLabel(varToLabel[paramKey], originPromql, readerClient, arw.DatasourceId, arw.Rule.Id, arw.Processor.Stats)
if err != nil {
logger.Errorf("rule_eval:%s, fail to getParamKeyAllLabel, error:%v query:%s", arw.Key(), err, paramQuery.Query)
logger.Errorf("alert_eval_%d datasource_%d fail to getParamKeyAllLabel, error:%v query:%s", arw.Rule.Id, arw.DatasourceId, err, paramQuery.Query)
}
params = paramsKeyAllLabel
} else {
@@ -602,7 +615,7 @@ func (arw *AlertRuleWorker) getParamPermutation(paramVal map[string]models.Param
return nil, fmt.Errorf("param key: %s, params is empty", paramKey)
}
logger.Infof("rule_eval:%s paramKey: %s, params: %v", arw.Key(), paramKey, params)
logger.Infof("alert_eval_%d datasource_%d paramKey: %s, params: %v", arw.Rule.Id, arw.DatasourceId, paramKey, params)
paramMap[paramKey] = params
}
@@ -753,7 +766,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
var rule *models.HostRuleConfig
if err := json.Unmarshal([]byte(ruleConfig), &rule); err != nil {
logger.Errorf("rule_eval:%s rule_config:%s, error:%v", arw.Key(), ruleConfig, err)
logger.Errorf("alert_eval_%d datasource_%d rule_config:%s, error:%v", arw.Rule.Id, arw.DatasourceId, ruleConfig, err)
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_RULE_CONFIG, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
fmt.Sprintf("%v", arw.Rule.Id),
@@ -764,7 +777,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
}
if rule == nil {
logger.Errorf("rule_eval:%s rule_config:%s, error:rule is nil", arw.Key(), ruleConfig)
logger.Errorf("alert_eval_%d datasource_%d rule_config:%s, error:rule is nil", arw.Rule.Id, arw.DatasourceId, ruleConfig)
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_RULE_CONFIG, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
fmt.Sprintf("%v", arw.Rule.Id),
@@ -787,7 +800,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
// 如果是中心节点, 将不再上报数据的主机 engineName 为空的机器,也加入到 targets 中
missEngineIdents, exists = arw.Processor.TargetsOfAlertRuleCache.Get("", arw.Rule.Id)
if !exists {
logger.Debugf("rule_eval:%s targets not found engineName:%s", arw.Key(), arw.Processor.EngineName)
logger.Debugf("alert_eval_%d datasource_%d targets not found engineName:%s", arw.Rule.Id, arw.DatasourceId, arw.Processor.EngineName)
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), QUERY_DATA, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
}
}
@@ -795,7 +808,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
engineIdents, exists = arw.Processor.TargetsOfAlertRuleCache.Get(arw.Processor.EngineName, arw.Rule.Id)
if !exists {
logger.Warningf("rule_eval:%s targets not found engineName:%s", arw.Key(), arw.Processor.EngineName)
logger.Warningf("alert_eval_%d datasource_%d targets not found engineName:%s", arw.Rule.Id, arw.DatasourceId, arw.Processor.EngineName)
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), QUERY_DATA, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
}
idents = append(idents, engineIdents...)
@@ -822,7 +835,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
"",
).Set(float64(len(missTargets)))
logger.Debugf("rule_eval:%s missTargets:%v", arw.Key(), missTargets)
logger.Debugf("alert_eval_%d datasource_%d missTargets:%v", arw.Rule.Id, arw.DatasourceId, missTargets)
targets := arw.Processor.TargetCache.Gets(missTargets)
for _, target := range targets {
m := make(map[string]string)
@@ -831,7 +844,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
}
m["ident"] = target.Ident
lst = append(lst, models.NewAnomalyPoint(trigger.Type, m, now, float64(now-target.UpdateAt), trigger.Severity))
lst = append(lst, models.NewAnomalyPoint(trigger.Type, m, now, float64(now-target.BeatTime), trigger.Severity))
}
case "offset":
idents, exists := arw.Processor.TargetsOfAlertRuleCache.Get(arw.Processor.EngineName, arw.Rule.Id)
@@ -841,7 +854,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
fmt.Sprintf("%v", arw.Processor.DatasourceId()),
"",
).Set(0)
logger.Warningf("rule_eval:%s targets not found", arw.Key())
logger.Warningf("alert_eval_%d datasource_%d targets not found", arw.Rule.Id, arw.DatasourceId)
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), QUERY_DATA, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
continue
}
@@ -860,7 +873,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
continue
}
if target, exists := targetMap[ident]; exists {
if now-target.UpdateAt > 120 {
if now-target.BeatTime > 120 {
// means this target is not a active host, do not check offset
continue
}
@@ -872,7 +885,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
}
}
logger.Debugf("rule_eval:%s offsetIdents:%v", arw.Key(), offsetIdents)
logger.Debugf("alert_eval_%d datasource_%d offsetIdents:%v", arw.Rule.Id, arw.DatasourceId, offsetIdents)
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
fmt.Sprintf("%v", arw.Rule.Id),
fmt.Sprintf("%v", arw.Processor.DatasourceId()),
@@ -899,7 +912,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
fmt.Sprintf("%v", arw.Processor.DatasourceId()),
"",
).Set(0)
logger.Warningf("rule_eval:%s targets not found", arw.Key())
logger.Warningf("alert_eval_%d datasource_%d targets not found", arw.Rule.Id, arw.DatasourceId)
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), QUERY_DATA, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
continue
}
@@ -911,7 +924,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
missTargets = append(missTargets, ident)
}
}
logger.Debugf("rule_eval:%s missTargets:%v", arw.Key(), missTargets)
logger.Debugf("alert_eval_%d datasource_%d missTargets:%v", arw.Rule.Id, arw.DatasourceId, missTargets)
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
fmt.Sprintf("%v", arw.Rule.Id),
fmt.Sprintf("%v", arw.Processor.DatasourceId()),
@@ -1066,15 +1079,15 @@ func exclude(reHashTagIndex1 map[uint64][][]uint64, reHashTagIndex2 map[uint64][
func MakeSeriesMap(series []models.DataResp, seriesTagIndex map[uint64][]uint64, seriesStore map[uint64]models.DataResp) {
for i := 0; i < len(series); i++ {
serieHash := hash.GetHash(series[i].Metric, series[i].Ref)
seriesHash := hash.GetHash(series[i].Metric, series[i].Ref)
tagHash := hash.GetTagHash(series[i].Metric)
seriesStore[serieHash] = series[i]
seriesStore[seriesHash] = series[i]
// 将曲线按照相同的 tag 分组
if _, exists := seriesTagIndex[tagHash]; !exists {
seriesTagIndex[tagHash] = make([]uint64, 0)
}
seriesTagIndex[tagHash] = append(seriesTagIndex[tagHash], serieHash)
seriesTagIndex[tagHash] = append(seriesTagIndex[tagHash], seriesHash)
}
}
@@ -1107,7 +1120,7 @@ func ProcessJoins(ruleId int64, trigger models.Trigger, seriesTagIndexes map[str
// 有 join 条件,按条件依次合并
if len(seriesTagIndexes) < len(trigger.Joins)+1 {
logger.Errorf("rule_eval rid:%d queries' count: %d not match join condition's count: %d", ruleId, len(seriesTagIndexes), len(trigger.Joins))
logger.Errorf("alert_eval_%d queries' count: %d not match join condition's count: %d", ruleId, len(seriesTagIndexes), len(trigger.Joins))
return nil
}
@@ -1143,7 +1156,7 @@ func ProcessJoins(ruleId int64, trigger models.Trigger, seriesTagIndexes map[str
lastRehashed = exclude(curRehashed, lastRehashed)
last = flatten(lastRehashed)
default:
logger.Warningf("rule_eval rid:%d join type:%s not support", ruleId, trigger.Joins[i].JoinType)
logger.Warningf("alert_eval_%d join type:%s not support", ruleId, trigger.Joins[i].JoinType)
}
}
return last
@@ -1263,7 +1276,7 @@ func (arw *AlertRuleWorker) VarFillingBeforeQuery(query models.PromQuery, reader
// 得到参数变量的所有组合
paramPermutation, err := arw.getParamPermutation(param, ParamKeys, varToLabel, query.PromQl, readerClient)
if err != nil {
logger.Errorf("rule_eval:%s, paramPermutation error:%v", arw.Key(), err)
logger.Errorf("alert_eval_%d datasource_%d paramPermutation error:%v", arw.Rule.Id, arw.DatasourceId, err)
continue
}
@@ -1291,10 +1304,10 @@ func (arw *AlertRuleWorker) VarFillingBeforeQuery(query models.PromQuery, reader
arw.Processor.Stats.CounterQueryDataTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId), fmt.Sprintf("%d", arw.Rule.Id)).Inc()
value, _, err := readerClient.Query(context.Background(), promql, time.Now())
if err != nil {
logger.Errorf("rule_eval:%s, promql:%s, error:%v", arw.Key(), promql, err)
logger.Errorf("alert_eval_%d datasource_%d promql:%s, error:%v", arw.Rule.Id, arw.DatasourceId, promql, err)
return
}
logger.Infof("rule_eval:%s, promql:%s, value:%+v", arw.Key(), promql, value)
logger.Infof("alert_eval_%d datasource_%d promql:%s, value:%+v", arw.Rule.Id, arw.DatasourceId, promql, value)
points := models.ConvertAnomalyPoints(value)
if len(points) == 0 {
@@ -1433,7 +1446,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
recoverPoints := []models.AnomalyPoint{}
ruleConfig := strings.TrimSpace(rule.RuleConfig)
if ruleConfig == "" {
logger.Warningf("rule_eval:%d ruleConfig is blank", rule.Id)
logger.Warningf("alert_eval_%d datasource_%d ruleConfig is blank", rule.Id, dsId)
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_RULE_CONFIG, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
fmt.Sprintf("%v", arw.Rule.Id),
@@ -1441,15 +1454,15 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
"",
).Set(0)
return points, recoverPoints, fmt.Errorf("rule_eval:%d ruleConfig is blank", rule.Id)
return points, recoverPoints, fmt.Errorf("alert_eval_%d datasource_%d ruleConfig is blank", rule.Id, dsId)
}
var ruleQuery models.RuleQuery
err := json.Unmarshal([]byte(ruleConfig), &ruleQuery)
if err != nil {
logger.Warningf("rule_eval:%d promql parse error:%s", rule.Id, err.Error())
logger.Warningf("alert_eval_%d datasource_%d promql parse error:%s", rule.Id, dsId, err.Error())
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_RULE_CONFIG, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
return points, recoverPoints, fmt.Errorf("rule_eval:%d promql parse error:%s", rule.Id, err.Error())
return points, recoverPoints, fmt.Errorf("alert_eval_%d datasource_%d promql parse error:%s", rule.Id, dsId, err.Error())
}
arw.Inhibit = ruleQuery.Inhibit
@@ -1461,7 +1474,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
plug, exists := dscache.DsCache.Get(rule.Cate, dsId)
if !exists {
logger.Warningf("rule_eval rid:%d datasource:%d not exists", rule.Id, dsId)
logger.Warningf("alert_eval_%d datasource_%d not exists", rule.Id, dsId)
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_CLIENT, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
@@ -1470,14 +1483,24 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
fmt.Sprintf("%v", i),
).Set(-2)
return points, recoverPoints, fmt.Errorf("rule_eval:%d datasource:%d not exists", rule.Id, dsId)
return points, recoverPoints, fmt.Errorf("alert_eval_%d datasource_%d not exists", rule.Id, dsId)
}
if err = ExecuteQueryTemplate(rule.Cate, query, nil); err != nil {
logger.Warningf("alert_eval_%d datasource_%d execute query template error: %v", rule.Id, dsId, err)
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), EXEC_TEMPLATE, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
fmt.Sprintf("%v", arw.Rule.Id),
fmt.Sprintf("%v", arw.Processor.DatasourceId()),
fmt.Sprintf("%v", i),
).Set(-3)
}
ctx := context.WithValue(context.Background(), "delay", int64(rule.Delay))
series, err := plug.QueryData(ctx, query)
arw.Processor.Stats.CounterQueryDataTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId), fmt.Sprintf("%d", rule.Id)).Inc()
if err != nil {
logger.Warningf("rule_eval rid:%d query data error: %v", rule.Id, err)
logger.Warningf("alert_eval_%d datasource_%d query data error: %v", rule.Id, dsId, err)
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_CLIENT, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
fmt.Sprintf("%v", arw.Rule.Id),
@@ -1485,7 +1508,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
fmt.Sprintf("%v", i),
).Set(-1)
return points, recoverPoints, fmt.Errorf("rule_eval:%d query data error: %v", rule.Id, err)
return points, recoverPoints, fmt.Errorf("alert_eval_%d datasource_%d query data error: %v", rule.Id, dsId, err)
}
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
@@ -1495,21 +1518,21 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
).Set(float64(len(series)))
// 此条日志很重要,是告警判断的现场值
logger.Infof("rule_eval rid:%d req:%+v resp:%v", rule.Id, query, series)
logger.Infof("alert_eval_%d datasource_%d req:%+v resp:%v", rule.Id, dsId, query, series)
for i := 0; i < len(series); i++ {
serieHash := hash.GetHash(series[i].Metric, series[i].Ref)
seriesHash := hash.GetHash(series[i].Metric, series[i].Ref)
tagHash := hash.GetTagHash(series[i].Metric)
seriesStore[serieHash] = series[i]
seriesStore[seriesHash] = series[i]
// 将曲线按照相同的 tag 分组
if _, exists := seriesTagIndex[tagHash]; !exists {
seriesTagIndex[tagHash] = make([]uint64, 0)
}
seriesTagIndex[tagHash] = append(seriesTagIndex[tagHash], serieHash)
seriesTagIndex[tagHash] = append(seriesTagIndex[tagHash], seriesHash)
}
ref, err := GetQueryRef(query)
if err != nil {
logger.Warningf("rule_eval rid:%d query:%+v get ref error:%s", rule.Id, query, err.Error())
logger.Warningf("alert_eval_%d datasource_%d query:%+v get ref error:%s", rule.Id, dsId, query, err.Error())
continue
}
seriesTagIndexes[ref] = seriesTagIndex
@@ -1519,7 +1542,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
for _, query := range ruleQuery.Queries {
ref, unit, err := GetQueryRefAndUnit(query)
if err != nil {
logger.Warningf("rule_eval rid:%d query:%+v get ref and unit error:%s", rule.Id, query, err.Error())
logger.Warningf("alert_eval_%d datasource_%d query:%+v get ref and unit error:%s", rule.Id, dsId, query, err.Error())
continue
}
unitMap[ref] = unit
@@ -1539,15 +1562,15 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
var ts int64
var sample models.DataResp
var value float64
for _, serieHash := range seriesHash {
series, exists := seriesStore[serieHash]
for _, seriesHash := range seriesHash {
series, exists := seriesStore[seriesHash]
if !exists {
logger.Warningf("rule_eval rid:%d series:%+v not found", rule.Id, series)
logger.Warningf("alert_eval_%d datasource_%d series:%+v not found", rule.Id, dsId, series)
continue
}
t, v, exists := series.Last()
if !exists {
logger.Warningf("rule_eval rid:%d series:%+v value not found", rule.Id, series)
logger.Warningf("alert_eval_%d datasource_%d series:%+v value not found", rule.Id, dsId, series)
continue
}
@@ -1578,12 +1601,12 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
ts = int64(t)
sample = series
value = v
logger.Infof("rule_eval rid:%d origin series labels:%+v", rule.Id, series.Metric)
logger.Infof("alert_eval_%d datasource_%d origin series labels:%+v", rule.Id, dsId, series.Metric)
}
isTriggered := parser.CalcWithRid(trigger.Exp, m, rule.Id)
// 此条日志很重要,是告警判断的现场值
logger.Infof("rule_eval rid:%d trigger:%+v exp:%s res:%v m:%v", rule.Id, trigger, trigger.Exp, isTriggered, m)
logger.Infof("alert_eval_%d datasource_%d trigger:%+v exp:%s res:%v m:%v", rule.Id, dsId, trigger, trigger.Exp, isTriggered, m)
var values string
for k, v := range m {
@@ -1591,11 +1614,15 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
continue
}
switch v.(type) {
case float64:
values += fmt.Sprintf("%s:%.3f ", k, v)
case string:
values += fmt.Sprintf("%s:%s ", k, v)
if u, exists := valuesUnitMap[k]; exists { // 配置了单位,优先用配置了单位的值
values += fmt.Sprintf("%s:%s ", k, u.Text)
} else {
switch v.(type) {
case float64:
values += fmt.Sprintf("%s:%.3f ", k, v)
case string:
values += fmt.Sprintf("%s:%s ", k, v)
}
}
}
@@ -1652,7 +1679,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
// 检查是否超过 resolve_after 时间
if now-int64(lastTs) > int64(ruleQuery.NodataTrigger.ResolveAfter) {
logger.Infof("rule_eval rid:%d series:%+v resolve after %d seconds now:%d lastTs:%d", rule.Id, lastSeries, ruleQuery.NodataTrigger.ResolveAfter, now, int64(lastTs))
logger.Infof("alert_eval_%d datasource_%d series:%+v resolve after %d seconds now:%d lastTs:%d", rule.Id, dsId, lastSeries, ruleQuery.NodataTrigger.ResolveAfter, now, int64(lastTs))
delete(arw.LastSeriesStore, hash)
continue
}
@@ -1673,7 +1700,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
TriggerType: models.TriggerTypeNodata,
}
points = append(points, point)
logger.Infof("rule_eval rid:%d nodata point:%+v", rule.Id, point)
logger.Infof("alert_eval_%d datasource_%d nodata point:%+v", rule.Id, dsId, point)
}
}
@@ -1688,3 +1715,61 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
return points, recoverPoints, nil
}
// ExecuteQueryTemplate 根据数据源类型对 Query 进行模板渲染处理
// cate: 数据源类别,如 "mysql", "pgsql" 等
// query: 查询对象,如果是数据库类型的数据源,会处理其中的 sql 字段
// data: 模板数据对象,如果为 nil 则使用空结构体(不支持变量渲染),如果不为 nil 则使用传入的数据(支持变量渲染)
func ExecuteQueryTemplate(cate string, query interface{}, data interface{}) error {
// 检查 query 是否是 map且包含 sql 字段
queryMap, ok := query.(map[string]interface{})
if !ok {
return nil
}
sqlVal, exists := queryMap["sql"]
if !exists {
return nil
}
sqlStr, ok := sqlVal.(string)
if !ok {
return nil
}
// 调用 ExecuteSqlTemplate 处理 sql 字段
processedSQL, err := ExecuteSqlTemplate(sqlStr, data)
if err != nil {
return fmt.Errorf("execute sql template error: %w", err)
}
// 更新 query 中的 sql 字段
queryMap["sql"] = processedSQL
return nil
}
// ExecuteSqlTemplate 执行 query 中的 golang 模板语法函数
// query: 要处理的 query 字符串
// data: 模板数据对象,如果为 nil 则使用空结构体(不支持变量渲染),如果不为 nil 则使用传入的数据(支持变量渲染)
func ExecuteSqlTemplate(query string, data interface{}) (string, error) {
if !strings.Contains(query, "{{") || !strings.Contains(query, "}}") {
return query, nil
}
tmpl, err := template.New("query").Funcs(tplx.TemplateFuncMap).Parse(query)
if err != nil {
return "", fmt.Errorf("query tmpl parse error: %w", err)
}
var buf strings.Builder
templateData := data
if templateData == nil {
templateData = struct{}{}
}
if err := tmpl.Execute(&buf, templateData); err != nil {
return "", fmt.Errorf("query tmpl execute error: %w", err)
}
return buf.String(), nil
}

View File

@@ -1,6 +1,7 @@
package mute
import (
"slices"
"strconv"
"strings"
"time"
@@ -9,6 +10,7 @@ import (
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/pkg/errors"
"github.com/toolkits/pkg/logger"
)
@@ -39,8 +41,28 @@ func IsMuted(rule *models.AlertRule, event *models.AlertCurEvent, targetCache *m
// TimeSpanMuteStrategy 根据规则配置的告警生效时间段过滤,如果产生的告警不在规则配置的告警生效时间段内,则不告警,即被mute
// 时间范围左闭右开默认范围00:00-24:00
// 如果规则配置了时区,则在该时区下进行时间判断;如果时区为空,则使用系统时区
func TimeSpanMuteStrategy(rule *models.AlertRule, event *models.AlertCurEvent) bool {
tm := time.Unix(event.TriggerTime, 0)
// 确定使用的时区
var targetLoc *time.Location
var err error
timezone := rule.TimeZone
if timezone == "" {
// 如果时区为空,使用系统时区(保持原有逻辑)
targetLoc = time.Local
} else {
// 加载规则配置的时区
targetLoc, err = time.LoadLocation(timezone)
if err != nil {
// 如果时区加载失败,记录错误并使用系统时区
logger.Warningf("Failed to load timezone %s for rule %d, using system timezone: %v", timezone, rule.Id, err)
targetLoc = time.Local
}
}
// 将触发时间转换到目标时区
tm := time.Unix(event.TriggerTime, 0).In(targetLoc)
triggerTime := tm.Format("15:04")
triggerWeek := strconv.Itoa(int(tm.Weekday()))
@@ -100,7 +122,7 @@ func IdentNotExistsMuteStrategy(rule *models.AlertRule, event *models.AlertCurEv
// 如果是target_up的告警,且ident已经不存在了,直接过滤掉
// 这里的判断有点太粗暴了,但是目前没有更好的办法
if !exists && strings.Contains(rule.PromQl, "target_up") {
logger.Debugf("[%s] mute: rule_eval:%d cluster:%s ident:%s", "IdentNotExistsMuteStrategy", rule.Id, event.Cluster, ident)
logger.Debugf("alert_eval_%d [IdentNotExistsMuteStrategy] mute: cluster:%s ident:%s", rule.Id, event.Cluster, ident)
return true
}
return false
@@ -122,7 +144,7 @@ func BgNotMatchMuteStrategy(rule *models.AlertRule, event *models.AlertCurEvent,
// 对于包含ident的告警事件check一下ident所属bg和rule所属bg是否相同
// 如果告警规则选择了只在本BG生效那其他BG的机器就不能因此规则产生告警
if exists && !target.MatchGroupId(rule.GroupId) {
logger.Debugf("[%s] mute: rule_eval:%d cluster:%s", "BgNotMatchMuteStrategy", rule.Id, event.Cluster)
logger.Debugf("alert_eval_%d [BgNotMatchMuteStrategy] mute: cluster:%s", rule.Id, event.Cluster)
return true
}
return false
@@ -135,7 +157,8 @@ func EventMuteStrategy(event *models.AlertCurEvent, alertMuteCache *memsto.Alert
}
for i := 0; i < len(mutes); i++ {
if MatchMute(event, mutes[i]) {
matched, _ := MatchMute(event, mutes[i])
if matched {
return true, mutes[i].Id
}
}
@@ -144,27 +167,21 @@ func EventMuteStrategy(event *models.AlertCurEvent, alertMuteCache *memsto.Alert
}
// MatchMute 如果传入了clock这个可选参数就表示使用这个clock表示的时间否则就从event的字段中取TriggerTime
func MatchMute(event *models.AlertCurEvent, mute *models.AlertMute, clock ...int64) bool {
func MatchMute(event *models.AlertCurEvent, mute *models.AlertMute, clock ...int64) (bool, error) {
if mute.Disabled == 1 {
return false
return false, errors.New("mute is disabled")
}
// 如果不是全局的,判断 匹配的 datasource id
if len(mute.DatasourceIdsJson) != 0 && mute.DatasourceIdsJson[0] != 0 && event.DatasourceId != 0 {
idm := make(map[int64]struct{}, len(mute.DatasourceIdsJson))
for i := 0; i < len(mute.DatasourceIdsJson); i++ {
idm[mute.DatasourceIdsJson[i]] = struct{}{}
}
// 判断 event.datasourceId 是否包含在 idm 中
if _, has := idm[event.DatasourceId]; !has {
return false
if !slices.Contains(mute.DatasourceIdsJson, event.DatasourceId) {
return false, errors.New("datasource id not match")
}
}
if mute.MuteTimeType == models.TimeRange {
if !mute.IsWithinTimeRange(event.TriggerTime) {
return false
return false, errors.New("event trigger time not within mute time range")
}
} else if mute.MuteTimeType == models.Periodic {
ts := event.TriggerTime
@@ -173,11 +190,11 @@ func MatchMute(event *models.AlertCurEvent, mute *models.AlertMute, clock ...int
}
if !mute.IsWithinPeriodicMute(ts) {
return false
return false, errors.New("event trigger time not within periodic mute range")
}
} else {
logger.Warningf("mute time type invalid, %d", mute.MuteTimeType)
return false
return false, errors.New("mute time type invalid")
}
var matchSeverity bool
@@ -193,12 +210,14 @@ func MatchMute(event *models.AlertCurEvent, mute *models.AlertMute, clock ...int
}
if !matchSeverity {
return false
return false, errors.New("event severity not match mute severity")
}
if mute.ITags == nil || len(mute.ITags) == 0 {
return true
if len(mute.ITags) == 0 {
return true, nil
}
return common.MatchTags(event.TagsMap, mute.ITags)
if !common.MatchTags(event.TagsMap, mute.ITags) {
return false, errors.New("event tags not match mute tags")
}
return true, nil
}

View File

@@ -115,7 +115,7 @@ func (n *Naming) heartbeat() error {
newDatasource[datasourceIds[i]] = struct{}{}
servers, err := n.ActiveServers(datasourceIds[i])
if err != nil {
logger.Warningf("hearbeat %d get active server err:%v", datasourceIds[i], err)
logger.Warningf("heartbeat %d get active server err:%v", datasourceIds[i], err)
n.astats.CounterHeartbeatErrorTotal.WithLabelValues().Inc()
continue
}
@@ -148,7 +148,7 @@ func (n *Naming) heartbeat() error {
servers, err := n.ActiveServersByEngineName()
if err != nil {
logger.Warningf("hearbeat %d get active server err:%v", HostDatasource, err)
logger.Warningf("heartbeat %d get active server err:%v", HostDatasource, err)
n.astats.CounterHeartbeatErrorTotal.WithLabelValues().Inc()
return nil
}

View File

@@ -0,0 +1,380 @@
package engine
import (
"fmt"
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/google/uuid"
"github.com/toolkits/pkg/logger"
)
type WorkflowEngine struct {
ctx *ctx.Context
}
func NewWorkflowEngine(c *ctx.Context) *WorkflowEngine {
return &WorkflowEngine{ctx: c}
}
func (e *WorkflowEngine) Execute(pipeline *models.EventPipeline, event *models.AlertCurEvent, triggerCtx *models.WorkflowTriggerContext) (*models.AlertCurEvent, *models.WorkflowResult, error) {
startTime := time.Now()
wfCtx := e.initWorkflowContext(pipeline, event, triggerCtx)
nodes := pipeline.GetWorkflowNodes()
connections := pipeline.GetWorkflowConnections()
if len(nodes) == 0 {
return event, &models.WorkflowResult{
Event: event,
Status: models.ExecutionStatusSuccess,
Message: "no nodes to execute",
}, nil
}
nodeMap := make(map[string]*models.WorkflowNode)
for i := range nodes {
if nodes[i].RetryInterval == 0 {
nodes[i].RetryInterval = 1
}
if nodes[i].MaxRetries == 0 {
nodes[i].MaxRetries = 1
}
nodeMap[nodes[i].ID] = &nodes[i]
}
result := e.executeDAG(nodeMap, connections, wfCtx)
result.Event = wfCtx.Event
duration := time.Since(startTime).Milliseconds()
if triggerCtx != nil && triggerCtx.Mode != "" {
e.saveExecutionRecord(pipeline, wfCtx, result, triggerCtx, startTime.Unix(), duration)
}
return wfCtx.Event, result, nil
}
func (e *WorkflowEngine) initWorkflowContext(pipeline *models.EventPipeline, event *models.AlertCurEvent, triggerCtx *models.WorkflowTriggerContext) *models.WorkflowContext {
// 合并输入参数
inputs := pipeline.GetInputsMap()
if triggerCtx != nil && triggerCtx.InputsOverrides != nil {
for k, v := range triggerCtx.InputsOverrides {
inputs[k] = v
}
}
metadata := map[string]string{
"start_time": fmt.Sprintf("%d", time.Now().Unix()),
"pipeline_id": fmt.Sprintf("%d", pipeline.ID),
}
// 是否启用流式输出
stream := false
if triggerCtx != nil {
metadata["request_id"] = triggerCtx.RequestID
metadata["trigger_mode"] = triggerCtx.Mode
metadata["trigger_by"] = triggerCtx.TriggerBy
stream = triggerCtx.Stream
}
return &models.WorkflowContext{
Event: event,
Inputs: inputs,
Vars: make(map[string]interface{}), // 初始化空的 Vars供节点间传递数据
Metadata: metadata,
Stream: stream,
}
}
// executeDAG 使用 Kahn 算法执行 DAG
func (e *WorkflowEngine) executeDAG(nodeMap map[string]*models.WorkflowNode, connections models.Connections, wfCtx *models.WorkflowContext) *models.WorkflowResult {
result := &models.WorkflowResult{
Status: models.ExecutionStatusSuccess,
NodeResults: make([]*models.NodeExecutionResult, 0),
Stream: wfCtx.Stream, // 从上下文继承流式输出设置
}
// 计算每个节点的入度
inDegree := make(map[string]int)
for nodeID := range nodeMap {
inDegree[nodeID] = 0
}
// 遍历连接,计算入度
for _, nodeConns := range connections {
for _, targets := range nodeConns.Main {
for _, target := range targets {
inDegree[target.Node]++
}
}
}
// 找到所有入度为 0 的节点(起始节点)
queue := make([]string, 0)
for nodeID, degree := range inDegree {
if degree == 0 {
queue = append(queue, nodeID)
}
}
// 如果没有起始节点,说明存在循环依赖
if len(queue) == 0 && len(nodeMap) > 0 {
result.Status = models.ExecutionStatusFailed
result.Message = "workflow has circular dependency"
return result
}
// 记录已执行的节点
executed := make(map[string]bool)
// 记录节点的分支选择结果
branchResults := make(map[string]*int)
for len(queue) > 0 {
// 取出队首节点
nodeID := queue[0]
queue = queue[1:]
// 检查是否已执行
if executed[nodeID] {
continue
}
node, exists := nodeMap[nodeID]
if !exists {
continue
}
// 执行节点
nodeResult, nodeOutput := e.executeNode(node, wfCtx)
result.NodeResults = append(result.NodeResults, nodeResult)
if nodeOutput != nil && nodeOutput.Stream && nodeOutput.StreamChan != nil {
// 流式输出节点通常是最后一个节点
// 直接传递 StreamChan 给 WorkflowResult不阻塞等待
result.Stream = true
result.StreamChan = nodeOutput.StreamChan
result.Event = wfCtx.Event
result.Status = "streaming"
result.Message = fmt.Sprintf("streaming output from node: %s", node.Name)
// 更新节点状态为 streaming
nodeResult.Status = "streaming"
nodeResult.Message = "streaming in progress"
// 立即返回,让 API 层处理流式响应
return result
}
executed[nodeID] = true
// 保存分支结果
if nodeResult.BranchIndex != nil {
branchResults[nodeID] = nodeResult.BranchIndex
}
// 检查执行状态
if nodeResult.Status == "failed" {
if !node.ContinueOnFail {
result.Status = models.ExecutionStatusFailed
result.ErrorNode = nodeID
result.Message = fmt.Sprintf("node %s failed: %s", node.Name, nodeResult.Error)
}
}
// 检查是否终止
if nodeResult.Status == "terminated" {
result.Message = fmt.Sprintf("workflow terminated at node %s", node.Name)
return result
}
// 更新后继节点的入度
if nodeConns, ok := connections[nodeID]; ok {
for outputIndex, targets := range nodeConns.Main {
// 检查是否应该走这个分支
if !e.shouldFollowBranch(nodeID, outputIndex, branchResults) {
continue
}
for _, target := range targets {
inDegree[target.Node]--
if inDegree[target.Node] == 0 {
queue = append(queue, target.Node)
}
}
}
}
}
return result
}
// executeNode 执行单个节点
// 返回:节点执行结果、节点输出(用于流式输出检测)
func (e *WorkflowEngine) executeNode(node *models.WorkflowNode, wfCtx *models.WorkflowContext) (*models.NodeExecutionResult, *models.NodeOutput) {
startTime := time.Now()
nodeResult := &models.NodeExecutionResult{
NodeID: node.ID,
NodeName: node.Name,
NodeType: node.Type,
StartedAt: startTime.Unix(),
}
var nodeOutput *models.NodeOutput
// 跳过禁用的节点
if node.Disabled {
nodeResult.Status = "skipped"
nodeResult.Message = "node is disabled"
nodeResult.FinishedAt = time.Now().Unix()
nodeResult.DurationMs = time.Since(startTime).Milliseconds()
return nodeResult, nil
}
// 获取处理器
processor, err := models.GetProcessorByType(node.Type, node.Config)
if err != nil {
nodeResult.Status = "failed"
nodeResult.Error = fmt.Sprintf("failed to get processor: %v", err)
nodeResult.FinishedAt = time.Now().Unix()
nodeResult.DurationMs = time.Since(startTime).Milliseconds()
return nodeResult, nil
}
// 执行处理器(带重试)
var retries int
maxRetries := node.MaxRetries
if !node.RetryOnFail {
maxRetries = 0
}
for retries <= maxRetries {
// 检查是否为分支处理器
if branchProcessor, ok := processor.(models.BranchProcessor); ok {
output, err := branchProcessor.ProcessWithBranch(e.ctx, wfCtx)
if err != nil {
if retries < maxRetries {
retries++
time.Sleep(time.Duration(node.RetryInterval) * time.Second)
continue
}
nodeResult.Status = "failed"
nodeResult.Error = err.Error()
} else {
nodeResult.Status = "success"
if output != nil {
nodeOutput = output
if output.WfCtx != nil {
wfCtx = output.WfCtx
}
nodeResult.Message = output.Message
nodeResult.BranchIndex = output.BranchIndex
if output.Terminate {
nodeResult.Status = "terminated"
}
}
}
break
}
// 普通处理器
newWfCtx, msg, err := processor.Process(e.ctx, wfCtx)
if err != nil {
if retries < maxRetries {
retries++
time.Sleep(time.Duration(node.RetryInterval) * time.Second)
continue
}
nodeResult.Status = "failed"
nodeResult.Error = err.Error()
} else {
nodeResult.Status = "success"
nodeResult.Message = msg
if newWfCtx != nil {
wfCtx = newWfCtx
// 检测流式输出标记
if newWfCtx.Stream && newWfCtx.StreamChan != nil {
nodeOutput = &models.NodeOutput{
WfCtx: newWfCtx,
Message: msg,
Stream: true,
StreamChan: newWfCtx.StreamChan,
}
}
}
// 如果事件被 drop返回 nil 或 Event 为 nil标记为终止
if newWfCtx == nil || newWfCtx.Event == nil {
nodeResult.Status = "terminated"
nodeResult.Message = msg
}
}
break
}
nodeResult.FinishedAt = time.Now().Unix()
nodeResult.DurationMs = time.Since(startTime).Milliseconds()
logger.Infof("workflow: executed node %s (type=%s) status=%s msg=%s duration=%dms",
node.Name, node.Type, nodeResult.Status, nodeResult.Message, nodeResult.DurationMs)
return nodeResult, nodeOutput
}
// shouldFollowBranch 判断是否应该走某个分支
func (e *WorkflowEngine) shouldFollowBranch(nodeID string, outputIndex int, branchResults map[string]*int) bool {
branchIndex, hasBranch := branchResults[nodeID]
if !hasBranch {
// 没有分支结果,说明不是分支节点,只走第一个输出
return outputIndex == 0
}
if branchIndex == nil {
// branchIndex 为 nil走默认分支通常是最后一个
return true
}
// 只走选中的分支
return outputIndex == *branchIndex
}
func (e *WorkflowEngine) saveExecutionRecord(pipeline *models.EventPipeline, wfCtx *models.WorkflowContext, result *models.WorkflowResult, triggerCtx *models.WorkflowTriggerContext, startTime int64, duration int64) {
executionID := triggerCtx.RequestID
if executionID == "" {
executionID = uuid.New().String()
}
execution := &models.EventPipelineExecution{
ID: executionID,
PipelineID: pipeline.ID,
PipelineName: pipeline.Name,
Mode: triggerCtx.Mode,
Status: result.Status,
ErrorMessage: result.Message,
ErrorNode: result.ErrorNode,
CreatedAt: startTime,
FinishedAt: time.Now().Unix(),
DurationMs: duration,
TriggerBy: triggerCtx.TriggerBy,
}
if wfCtx.Event != nil {
execution.EventID = wfCtx.Event.Id
}
if err := execution.SetNodeResults(result.NodeResults); err != nil {
logger.Errorf("workflow: failed to set node results: pipeline_id=%d, error=%v", pipeline.ID, err)
}
if err := execution.SetInputsSnapshot(wfCtx.Inputs); err != nil {
logger.Errorf("workflow: failed to set inputs snapshot: pipeline_id=%d, error=%v", pipeline.ID, err)
}
if err := models.CreateEventPipelineExecution(e.ctx, execution); err != nil {
logger.Errorf("workflow: failed to save execution record: pipeline_id=%d, error=%v", pipeline.ID, err)
}
}

View File

@@ -5,6 +5,7 @@ import (
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/callback"
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/eventdrop"
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/eventupdate"
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/logic"
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/relabel"
)

View File

@@ -8,6 +8,7 @@ import (
"io"
"net/http"
"net/url"
"strconv"
"strings"
"text/template"
"time"
@@ -17,7 +18,6 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/tplx"
"github.com/toolkits/pkg/logger"
)
const (
@@ -55,26 +55,24 @@ func (c *AISummaryConfig) Init(settings interface{}) (models.Processor, error) {
return result, err
}
func (c *AISummaryConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) *models.AlertCurEvent {
func (c *AISummaryConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
event := wfCtx.Event
if c.Client == nil {
if err := c.initHTTPClient(); err != nil {
logger.Errorf("failed to initialize HTTP client: %v", err)
return event
return wfCtx, "", fmt.Errorf("failed to initialize HTTP client: %v processor: %v", err, c)
}
}
// 准备告警事件信息
eventInfo, err := c.prepareEventInfo(event)
eventInfo, err := c.prepareEventInfo(wfCtx)
if err != nil {
logger.Errorf("failed to prepare event info: %v", err)
return event
return wfCtx, "", fmt.Errorf("failed to prepare event info: %v processor: %v", err, c)
}
// 调用AI模型生成总结
summary, err := c.generateAISummary(eventInfo)
if err != nil {
logger.Errorf("failed to generate AI summary: %v", err)
return event
return wfCtx, "", fmt.Errorf("failed to generate AI summary: %v processor: %v", err, c)
}
// 将总结添加到annotations字段
@@ -86,12 +84,11 @@ func (c *AISummaryConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent)
// 更新Annotations字段
b, err := json.Marshal(event.AnnotationsJSON)
if err != nil {
logger.Errorf("failed to marshal annotations: %v", err)
return event
return wfCtx, "", fmt.Errorf("failed to marshal annotations: %v processor: %v", err, c)
}
event.Annotations = string(b)
return event
return wfCtx, "", nil
}
func (c *AISummaryConfig) initHTTPClient() error {
@@ -114,9 +111,10 @@ func (c *AISummaryConfig) initHTTPClient() error {
return nil
}
func (c *AISummaryConfig) prepareEventInfo(event *models.AlertCurEvent) (string, error) {
func (c *AISummaryConfig) prepareEventInfo(wfCtx *models.WorkflowContext) (string, error) {
var defs = []string{
"{{$event := .}}",
"{{$event := .Event}}",
"{{$inputs := .Inputs}}",
}
text := strings.Join(append(defs, c.PromptTemplate), "")
@@ -126,7 +124,7 @@ func (c *AISummaryConfig) prepareEventInfo(event *models.AlertCurEvent) (string,
}
var body bytes.Buffer
err = t.Execute(&body, event)
err = t.Execute(&body, wfCtx)
if err != nil {
return "", fmt.Errorf("failed to execute prompt template: %v", err)
}
@@ -137,7 +135,7 @@ func (c *AISummaryConfig) prepareEventInfo(event *models.AlertCurEvent) (string,
func (c *AISummaryConfig) generateAISummary(eventInfo string) (string, error) {
// 构建基础请求参数
reqParams := map[string]interface{}{
"model": c.ModelName,
"model": c.ModelName,
"messages": []Message{
{
Role: "user",
@@ -148,7 +146,11 @@ func (c *AISummaryConfig) generateAISummary(eventInfo string) (string, error) {
// 合并自定义参数
for k, v := range c.CustomParams {
reqParams[k] = v
converted, err := convertCustomParam(v)
if err != nil {
return "", fmt.Errorf("failed to convert custom param %s: %v", k, err)
}
reqParams[k] = converted
}
// 序列化请求体
@@ -201,3 +203,44 @@ func (c *AISummaryConfig) generateAISummary(eventInfo string) (string, error) {
return chatResp.Choices[0].Message.Content, nil
}
// convertCustomParam 将前端传入的参数转换为正确的类型
func convertCustomParam(value interface{}) (interface{}, error) {
if value == nil {
return nil, nil
}
// 如果是字符串,尝试转换为其他类型
if str, ok := value.(string); ok {
// 尝试转换为数字
if f, err := strconv.ParseFloat(str, 64); err == nil {
// 检查是否为整数
if f == float64(int64(f)) {
return int64(f), nil
}
return f, nil
}
// 尝试转换为布尔值
if b, err := strconv.ParseBool(str); err == nil {
return b, nil
}
// 尝试解析为JSON数组
if strings.HasPrefix(strings.TrimSpace(str), "[") {
var arr []interface{}
if err := json.Unmarshal([]byte(str), &arr); err == nil {
return arr, nil
}
}
// 尝试解析为JSON对象
if strings.HasPrefix(strings.TrimSpace(str), "{") {
var obj map[string]interface{}
if err := json.Unmarshal([]byte(str), &obj); err == nil {
return obj, nil
}
}
}
return value, nil
}

View File

@@ -42,8 +42,14 @@ func TestAISummaryConfig_Process(t *testing.T) {
},
}
// 创建 WorkflowContext
wfCtx := &models.WorkflowContext{
Event: event,
Inputs: map[string]string{},
}
// 测试模板处理
eventInfo, err := config.prepareEventInfo(event)
eventInfo, err := config.prepareEventInfo(wfCtx)
assert.NoError(t, err)
assert.Contains(t, eventInfo, "Test Rule")
assert.Contains(t, eventInfo, "1")
@@ -54,15 +60,86 @@ func TestAISummaryConfig_Process(t *testing.T) {
assert.NotNil(t, processor)
// 测试处理函数
result := processor.Process(&ctx.Context{}, event)
result, _, err := processor.Process(&ctx.Context{}, wfCtx)
assert.NoError(t, err)
assert.NotNil(t, result)
assert.NotEmpty(t, result.AnnotationsJSON["ai_summary"])
assert.NotEmpty(t, result.Event.AnnotationsJSON["ai_summary"])
// 展示处理结果
t.Log("\n=== 处理结果 ===")
t.Logf("告警规则: %s", result.RuleName)
t.Logf("严重程度: %d", result.Severity)
t.Logf("标签: %v", result.TagsMap)
t.Logf("原始注释: %v", result.AnnotationsJSON["description"])
t.Logf("AI总结: %s", result.AnnotationsJSON["ai_summary"])
t.Logf("告警规则: %s", result.Event.RuleName)
t.Logf("严重程度: %d", result.Event.Severity)
t.Logf("标签: %v", result.Event.TagsMap)
t.Logf("原始注释: %v", result.Event.AnnotationsJSON["description"])
t.Logf("AI总结: %s", result.Event.AnnotationsJSON["ai_summary"])
}
func TestConvertCustomParam(t *testing.T) {
tests := []struct {
name string
input interface{}
expected interface{}
hasError bool
}{
{
name: "nil value",
input: nil,
expected: nil,
hasError: false,
},
{
name: "string number to int64",
input: "123",
expected: int64(123),
hasError: false,
},
{
name: "string float to float64",
input: "123.45",
expected: 123.45,
hasError: false,
},
{
name: "string boolean to bool",
input: "true",
expected: true,
hasError: false,
},
{
name: "string false to bool",
input: "false",
expected: false,
hasError: false,
},
{
name: "JSON array string to slice",
input: `["a", "b", "c"]`,
expected: []interface{}{"a", "b", "c"},
hasError: false,
},
{
name: "JSON object string to map",
input: `{"key": "value", "num": 123}`,
expected: map[string]interface{}{"key": "value", "num": float64(123)},
hasError: false,
},
{
name: "plain string remains string",
input: "hello world",
expected: "hello world",
hasError: false,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
converted, err := convertCustomParam(test.input)
if test.hasError {
assert.Error(t, err)
return
}
assert.NoError(t, err)
assert.Equal(t, test.expected, converted)
})
}
}

View File

@@ -3,6 +3,7 @@ package callback
import (
"crypto/tls"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
@@ -10,6 +11,7 @@ import (
"time"
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/common"
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/utils"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/toolkits/pkg/logger"
@@ -42,7 +44,8 @@ func (c *CallbackConfig) Init(settings interface{}) (models.Processor, error) {
return result, err
}
func (c *CallbackConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) *models.AlertCurEvent {
func (c *CallbackConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
event := wfCtx.Event
if c.Client == nil {
transport := &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: c.SkipSSLVerify},
@@ -51,7 +54,7 @@ func (c *CallbackConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent)
if c.Proxy != "" {
proxyURL, err := url.Parse(c.Proxy)
if err != nil {
logger.Errorf("failed to parse proxy url: %v", err)
return wfCtx, "", fmt.Errorf("failed to parse proxy url: %v processor: %v", err, c)
} else {
transport.Proxy = http.ProxyURL(proxyURL)
}
@@ -69,16 +72,19 @@ func (c *CallbackConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent)
headers[k] = v
}
body, err := json.Marshal(event)
url, err := utils.TplRender(wfCtx, c.URL)
if err != nil {
logger.Errorf("failed to marshal event: %v", err)
return event
return wfCtx, "", fmt.Errorf("failed to render url template: %v processor: %v", err, c)
}
req, err := http.NewRequest("POST", c.URL, strings.NewReader(string(body)))
body, err := json.Marshal(event)
if err != nil {
logger.Errorf("failed to create request: %v event: %v", err, event)
return event
return wfCtx, "", fmt.Errorf("failed to marshal event: %v processor: %v", err, c)
}
req, err := http.NewRequest("POST", url, strings.NewReader(string(body)))
if err != nil {
return wfCtx, "", fmt.Errorf("failed to create request: %v processor: %v", err, c)
}
for k, v := range headers {
@@ -91,16 +97,14 @@ func (c *CallbackConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent)
resp, err := c.Client.Do(req)
if err != nil {
logger.Errorf("failed to send request: %v event: %v", err, event)
return event
return wfCtx, "", fmt.Errorf("failed to send request: %v processor: %v", err, c)
}
b, err := io.ReadAll(resp.Body)
if err != nil {
logger.Errorf("failed to read response body: %v event: %v", err, event)
return event
return wfCtx, "", fmt.Errorf("failed to read response body: %v processor: %v", err, c)
}
logger.Infof("response body: %s", string(b))
return event
logger.Debugf("callback processor response body: %s", string(b))
return wfCtx, "callback success", nil
}

View File

@@ -2,6 +2,7 @@ package eventdrop
import (
"bytes"
"fmt"
"strings"
texttemplate "text/template"
@@ -25,37 +26,38 @@ func (c *EventDropConfig) Init(settings interface{}) (models.Processor, error) {
return result, err
}
func (c *EventDropConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) *models.AlertCurEvent {
func (c *EventDropConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
// 使用背景是可以根据此处理器,实现对事件进行更加灵活的过滤的逻辑
// 在标签过滤和属性过滤都不满足需求时可以使用
// 如果模板执行结果为 true则删除该事件
event := wfCtx.Event
var defs = []string{
"{{ $event := . }}",
"{{ $labels := .TagsMap }}",
"{{ $value := .TriggerValue }}",
"{{ $event := .Event }}",
"{{ $labels := .Event.TagsMap }}",
"{{ $value := .Event.TriggerValue }}",
"{{ $inputs := .Inputs }}",
}
text := strings.Join(append(defs, c.Content), "")
tpl, err := texttemplate.New("eventdrop").Funcs(tplx.TemplateFuncMap).Parse(text)
if err != nil {
logger.Errorf("processor failed to parse template: %v event: %v", err, event)
return event
return wfCtx, "", fmt.Errorf("processor failed to parse template: %v processor: %v", err, c)
}
var body bytes.Buffer
if err = tpl.Execute(&body, event); err != nil {
logger.Errorf("processor failed to execute template: %v event: %v", err, event)
return event
if err = tpl.Execute(&body, wfCtx); err != nil {
return wfCtx, "", fmt.Errorf("processor failed to execute template: %v processor: %v", err, c)
}
result := strings.TrimSpace(body.String())
logger.Infof("processor eventdrop result: %v", result)
if result == "true" {
logger.Infof("processor eventdrop drop event: %v", event)
return nil
wfCtx.Event = nil
logger.Infof("processor eventdrop drop event: %s", event.Hash)
return wfCtx, "drop event success", nil
}
return event
return wfCtx, "drop event failed", nil
}

View File

@@ -3,6 +3,7 @@ package eventupdate
import (
"crypto/tls"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
@@ -30,7 +31,8 @@ func (c *EventUpdateConfig) Init(settings interface{}) (models.Processor, error)
return result, err
}
func (c *EventUpdateConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) *models.AlertCurEvent {
func (c *EventUpdateConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
event := wfCtx.Event
if c.Client == nil {
transport := &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: c.SkipSSLVerify},
@@ -39,7 +41,7 @@ func (c *EventUpdateConfig) Process(ctx *ctx.Context, event *models.AlertCurEven
if c.Proxy != "" {
proxyURL, err := url.Parse(c.Proxy)
if err != nil {
logger.Errorf("failed to parse proxy url: %v", err)
return wfCtx, "", fmt.Errorf("failed to parse proxy url: %v processor: %v", err, c)
} else {
transport.Proxy = http.ProxyURL(proxyURL)
}
@@ -59,14 +61,12 @@ func (c *EventUpdateConfig) Process(ctx *ctx.Context, event *models.AlertCurEven
body, err := json.Marshal(event)
if err != nil {
logger.Errorf("failed to marshal event: %v", err)
return event
return wfCtx, "", fmt.Errorf("failed to marshal event: %v processor: %v", err, c)
}
req, err := http.NewRequest("POST", c.URL, strings.NewReader(string(body)))
if err != nil {
logger.Errorf("failed to create request: %v event: %v", err, event)
return event
return wfCtx, "", fmt.Errorf("failed to create request: %v processor: %v", err, c)
}
for k, v := range headers {
@@ -79,22 +79,19 @@ func (c *EventUpdateConfig) Process(ctx *ctx.Context, event *models.AlertCurEven
resp, err := c.Client.Do(req)
if err != nil {
logger.Errorf("failed to send request: %v event: %v", err, event)
return event
return wfCtx, "", fmt.Errorf("failed to send request: %v processor: %v", err, c)
}
b, err := io.ReadAll(resp.Body)
if err != nil {
logger.Errorf("failed to read response body: %v event: %v", err, event)
return event
return nil, "", fmt.Errorf("failed to read response body: %v processor: %v", err, c)
}
logger.Infof("response body: %s", string(b))
logger.Debugf("event update processor response body: %s", string(b))
err = json.Unmarshal(b, &event)
if err != nil {
logger.Errorf("failed to unmarshal response body: %v event: %v", err, event)
return event
return wfCtx, "", fmt.Errorf("failed to unmarshal response body: %v processor: %v", err, c)
}
return event
return wfCtx, "", nil
}

View File

@@ -0,0 +1,197 @@
package logic
import (
"bytes"
"fmt"
"strings"
"text/template"
alertCommon "github.com/ccfos/nightingale/v6/alert/common"
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/common"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/tplx"
)
// 判断模式常量
const (
ConditionModeExpression = "expression" // 表达式模式(默认)
ConditionModeTags = "tags" // 标签/属性模式
)
// IfConfig If 条件处理器配置
type IfConfig struct {
// 判断模式expression表达式或 tags标签/属性)
Mode string `json:"mode,omitempty"`
// 表达式模式配置
// 条件表达式(支持 Go 模板语法)
// 例如:{{ if eq .Severity 1 }}true{{ end }}
Condition string `json:"condition,omitempty"`
// 标签/属性模式配置
LabelKeys []models.TagFilter `json:"label_keys,omitempty"` // 适用标签
Attributes []models.TagFilter `json:"attributes,omitempty"` // 适用属性
// 内部使用,解析后的过滤器
parsedLabelKeys []models.TagFilter `json:"-"`
parsedAttributes []models.TagFilter `json:"-"`
}
func init() {
models.RegisterProcessor("logic.if", &IfConfig{})
}
func (c *IfConfig) Init(settings interface{}) (models.Processor, error) {
result, err := common.InitProcessor[*IfConfig](settings)
if err != nil {
return nil, err
}
// 解析标签过滤器
if len(result.LabelKeys) > 0 {
// Deep copy to avoid concurrent map writes on cached objects
labelKeysCopy := make([]models.TagFilter, len(result.LabelKeys))
copy(labelKeysCopy, result.LabelKeys)
for i := range labelKeysCopy {
if labelKeysCopy[i].Func == "" {
labelKeysCopy[i].Func = labelKeysCopy[i].Op
}
}
result.parsedLabelKeys, err = models.ParseTagFilter(labelKeysCopy)
if err != nil {
return nil, fmt.Errorf("failed to parse label_keys: %v", err)
}
}
// 解析属性过滤器
if len(result.Attributes) > 0 {
// Deep copy to avoid concurrent map writes on cached objects
attributesCopy := make([]models.TagFilter, len(result.Attributes))
copy(attributesCopy, result.Attributes)
for i := range attributesCopy {
if attributesCopy[i].Func == "" {
attributesCopy[i].Func = attributesCopy[i].Op
}
}
result.parsedAttributes, err = models.ParseTagFilter(attributesCopy)
if err != nil {
return nil, fmt.Errorf("failed to parse attributes: %v", err)
}
}
return result, nil
}
// Process 实现 Processor 接口(兼容旧模式)
func (c *IfConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
result, err := c.evaluateCondition(wfCtx)
if err != nil {
return wfCtx, "", fmt.Errorf("if processor: failed to evaluate condition: %v", err)
}
if result {
return wfCtx, "condition matched (true branch)", nil
}
return wfCtx, "condition not matched (false branch)", nil
}
// ProcessWithBranch 实现 BranchProcessor 接口
func (c *IfConfig) ProcessWithBranch(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.NodeOutput, error) {
result, err := c.evaluateCondition(wfCtx)
if err != nil {
return nil, fmt.Errorf("if processor: failed to evaluate condition: %v", err)
}
output := &models.NodeOutput{
WfCtx: wfCtx,
}
if result {
// 条件为 true走输出 0true 分支)
branchIndex := 0
output.BranchIndex = &branchIndex
output.Message = "condition matched (true branch)"
} else {
// 条件为 false走输出 1false 分支)
branchIndex := 1
output.BranchIndex = &branchIndex
output.Message = "condition not matched (false branch)"
}
return output, nil
}
// evaluateCondition 评估条件
func (c *IfConfig) evaluateCondition(wfCtx *models.WorkflowContext) (bool, error) {
mode := c.Mode
if mode == "" {
mode = ConditionModeExpression // 默认表达式模式
}
switch mode {
case ConditionModeTags:
return c.evaluateTagsCondition(wfCtx.Event)
default:
return c.evaluateExpressionCondition(wfCtx)
}
}
// evaluateExpressionCondition 评估表达式条件
func (c *IfConfig) evaluateExpressionCondition(wfCtx *models.WorkflowContext) (bool, error) {
if c.Condition == "" {
return true, nil
}
// 构建模板数据
var defs = []string{
"{{ $event := .Event }}",
"{{ $labels := .Event.TagsMap }}",
"{{ $value := .Event.TriggerValue }}",
"{{ $inputs := .Inputs }}",
}
text := strings.Join(append(defs, c.Condition), "")
tpl, err := template.New("if_condition").Funcs(tplx.TemplateFuncMap).Parse(text)
if err != nil {
return false, err
}
var buf bytes.Buffer
if err = tpl.Execute(&buf, wfCtx); err != nil {
return false, err
}
result := strings.TrimSpace(strings.ToLower(buf.String()))
return result == "true" || result == "1", nil
}
// evaluateTagsCondition 评估标签/属性条件
func (c *IfConfig) evaluateTagsCondition(event *models.AlertCurEvent) (bool, error) {
// 如果没有配置任何过滤条件,默认返回 true
if len(c.parsedLabelKeys) == 0 && len(c.parsedAttributes) == 0 {
return true, nil
}
// 匹配标签 (TagsMap)
if len(c.parsedLabelKeys) > 0 {
tagsMap := event.TagsMap
if tagsMap == nil {
tagsMap = make(map[string]string)
}
if !alertCommon.MatchTags(tagsMap, c.parsedLabelKeys) {
return false, nil
}
}
// 匹配属性 (JsonTagsAndValue - 所有 JSON 字段)
if len(c.parsedAttributes) > 0 {
attributesMap := event.JsonTagsAndValue()
if !alertCommon.MatchTags(attributesMap, c.parsedAttributes) {
return false, nil
}
}
return true, nil
}

View File

@@ -0,0 +1,224 @@
package logic
import (
"bytes"
"fmt"
"strings"
"text/template"
alertCommon "github.com/ccfos/nightingale/v6/alert/common"
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/common"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/tplx"
)
// SwitchCase Switch 分支定义
type SwitchCase struct {
// 判断模式expression表达式或 tags标签/属性)
Mode string `json:"mode,omitempty"`
// 表达式模式配置
// 条件表达式(支持 Go 模板语法)
Condition string `json:"condition,omitempty"`
// 标签/属性模式配置
LabelKeys []models.TagFilter `json:"label_keys,omitempty"` // 适用标签
Attributes []models.TagFilter `json:"attributes,omitempty"` // 适用属性
// 分支名称(可选,用于日志)
Name string `json:"name,omitempty"`
// 内部使用,解析后的过滤器
parsedLabelKeys []models.TagFilter `json:"-"`
parsedAttributes []models.TagFilter `json:"-"`
}
// SwitchConfig Switch 多分支处理器配置
type SwitchConfig struct {
// 分支条件列表
// 按顺序匹配,第一个为 true 的分支将被选中
Cases []SwitchCase `json:"cases"`
// 是否允许多个分支同时匹配(默认 false只走第一个匹配的
AllowMultiple bool `json:"allow_multiple,omitempty"`
}
func init() {
models.RegisterProcessor("logic.switch", &SwitchConfig{})
}
func (c *SwitchConfig) Init(settings interface{}) (models.Processor, error) {
result, err := common.InitProcessor[*SwitchConfig](settings)
if err != nil {
return nil, err
}
// 解析每个 case 的标签和属性过滤器
for i := range result.Cases {
if len(result.Cases[i].LabelKeys) > 0 {
// Deep copy to avoid concurrent map writes on cached objects
labelKeysCopy := make([]models.TagFilter, len(result.Cases[i].LabelKeys))
copy(labelKeysCopy, result.Cases[i].LabelKeys)
for j := range labelKeysCopy {
if labelKeysCopy[j].Func == "" {
labelKeysCopy[j].Func = labelKeysCopy[j].Op
}
}
result.Cases[i].parsedLabelKeys, err = models.ParseTagFilter(labelKeysCopy)
if err != nil {
return nil, fmt.Errorf("failed to parse label_keys for case[%d]: %v", i, err)
}
}
if len(result.Cases[i].Attributes) > 0 {
// Deep copy to avoid concurrent map writes on cached objects
attributesCopy := make([]models.TagFilter, len(result.Cases[i].Attributes))
copy(attributesCopy, result.Cases[i].Attributes)
for j := range attributesCopy {
if attributesCopy[j].Func == "" {
attributesCopy[j].Func = attributesCopy[j].Op
}
}
result.Cases[i].parsedAttributes, err = models.ParseTagFilter(attributesCopy)
if err != nil {
return nil, fmt.Errorf("failed to parse attributes for case[%d]: %v", i, err)
}
}
}
return result, nil
}
// Process 实现 Processor 接口(兼容旧模式)
func (c *SwitchConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
index, caseName, err := c.evaluateCases(wfCtx)
if err != nil {
return wfCtx, "", fmt.Errorf("switch processor: failed to evaluate cases: %v", err)
}
if index >= 0 {
if caseName != "" {
return wfCtx, fmt.Sprintf("matched case[%d]: %s", index, caseName), nil
}
return wfCtx, fmt.Sprintf("matched case[%d]", index), nil
}
// 走默认分支(最后一个输出)
return wfCtx, "no case matched, using default branch", nil
}
// ProcessWithBranch 实现 BranchProcessor 接口
func (c *SwitchConfig) ProcessWithBranch(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.NodeOutput, error) {
index, caseName, err := c.evaluateCases(wfCtx)
if err != nil {
return nil, fmt.Errorf("switch processor: failed to evaluate cases: %v", err)
}
output := &models.NodeOutput{
WfCtx: wfCtx,
}
if index >= 0 {
output.BranchIndex = &index
if caseName != "" {
output.Message = fmt.Sprintf("matched case[%d]: %s", index, caseName)
} else {
output.Message = fmt.Sprintf("matched case[%d]", index)
}
} else {
// 默认分支的索引是 cases 数量(即最后一个输出端口)
defaultIndex := len(c.Cases)
output.BranchIndex = &defaultIndex
output.Message = "no case matched, using default branch"
}
return output, nil
}
// evaluateCases 评估所有分支条件
// 返回匹配的分支索引和分支名称,如果没有匹配返回 -1
func (c *SwitchConfig) evaluateCases(wfCtx *models.WorkflowContext) (int, string, error) {
for i := range c.Cases {
matched, err := c.evaluateCaseCondition(&c.Cases[i], wfCtx)
if err != nil {
return -1, "", fmt.Errorf("case[%d] evaluation error: %v", i, err)
}
if matched {
return i, c.Cases[i].Name, nil
}
}
return -1, "", nil
}
// evaluateCaseCondition 评估单个分支条件
func (c *SwitchConfig) evaluateCaseCondition(caseItem *SwitchCase, wfCtx *models.WorkflowContext) (bool, error) {
mode := caseItem.Mode
if mode == "" {
mode = ConditionModeExpression // 默认表达式模式
}
switch mode {
case ConditionModeTags:
return c.evaluateTagsCondition(caseItem, wfCtx.Event)
default:
return c.evaluateExpressionCondition(caseItem.Condition, wfCtx)
}
}
// evaluateExpressionCondition 评估表达式条件
func (c *SwitchConfig) evaluateExpressionCondition(condition string, wfCtx *models.WorkflowContext) (bool, error) {
if condition == "" {
return false, nil
}
var defs = []string{
"{{ $event := .Event }}",
"{{ $labels := .Event.TagsMap }}",
"{{ $value := .Event.TriggerValue }}",
"{{ $inputs := .Inputs }}",
}
text := strings.Join(append(defs, condition), "")
tpl, err := template.New("switch_condition").Funcs(tplx.TemplateFuncMap).Parse(text)
if err != nil {
return false, err
}
var buf bytes.Buffer
if err = tpl.Execute(&buf, wfCtx); err != nil {
return false, err
}
result := strings.TrimSpace(strings.ToLower(buf.String()))
return result == "true" || result == "1", nil
}
// evaluateTagsCondition 评估标签/属性条件
func (c *SwitchConfig) evaluateTagsCondition(caseItem *SwitchCase, event *models.AlertCurEvent) (bool, error) {
// 如果没有配置任何过滤条件,默认返回 false不匹配
if len(caseItem.parsedLabelKeys) == 0 && len(caseItem.parsedAttributes) == 0 {
return false, nil
}
// 匹配标签 (TagsMap)
if len(caseItem.parsedLabelKeys) > 0 {
tagsMap := event.TagsMap
if tagsMap == nil {
tagsMap = make(map[string]string)
}
if !alertCommon.MatchTags(tagsMap, caseItem.parsedLabelKeys) {
return false, nil
}
}
// 匹配属性 (JsonTagsAndValue - 所有 JSON 字段)
if len(caseItem.parsedAttributes) > 0 {
attributesMap := event.JsonTagsAndValue()
if !alertCommon.MatchTags(attributesMap, caseItem.parsedAttributes) {
return false, nil
}
}
return true, nil
}

View File

@@ -42,7 +42,7 @@ func (r *RelabelConfig) Init(settings interface{}) (models.Processor, error) {
return result, err
}
func (r *RelabelConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) *models.AlertCurEvent {
func (r *RelabelConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
sourceLabels := make([]model.LabelName, len(r.SourceLabels))
for i := range r.SourceLabels {
sourceLabels[i] = model.LabelName(strings.ReplaceAll(r.SourceLabels[i], ".", REPLACE_DOT))
@@ -63,8 +63,8 @@ func (r *RelabelConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) *
},
}
EventRelabel(event, relabelConfigs)
return event
EventRelabel(wfCtx.Event, relabelConfigs)
return wfCtx, "", nil
}
func EventRelabel(event *models.AlertCurEvent, relabelConfigs []*pconf.RelabelConfig) {

View File

@@ -0,0 +1,32 @@
package utils
import (
"bytes"
"fmt"
"strings"
"text/template"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/tplx"
)
func TplRender(wfCtx *models.WorkflowContext, content string) (string, error) {
var defs = []string{
"{{ $event := .Event }}",
"{{ $labels := .Event.TagsMap }}",
"{{ $value := .Event.TriggerValue }}",
"{{ $inputs := .Inputs }}",
}
text := strings.Join(append(defs, content), "")
tpl, err := template.New("tpl").Funcs(tplx.TemplateFuncMap).Parse(text)
if err != nil {
return "", fmt.Errorf("failed to parse template: %v", err)
}
var body bytes.Buffer
if err = tpl.Execute(&body, wfCtx); err != nil {
return "", fmt.Errorf("failed to execute template: %v", err)
}
return strings.TrimSpace(body.String()), nil
}

View File

@@ -26,8 +26,6 @@ import (
"github.com/toolkits/pkg/str"
)
type EventMuteHookFunc func(event *models.AlertCurEvent) bool
type ExternalProcessorsType struct {
ExternalLock sync.RWMutex
Processors map[string]*Processor
@@ -76,7 +74,6 @@ type Processor struct {
HandleFireEventHook HandleEventFunc
HandleRecoverEventHook HandleEventFunc
EventMuteHook EventMuteHookFunc
ScheduleEntry cron.Entry
PromEvalInterval int
@@ -121,7 +118,6 @@ func NewProcessor(engineName string, rule *models.AlertRule, datasourceId int64,
HandleFireEventHook: func(event *models.AlertCurEvent) {},
HandleRecoverEventHook: func(event *models.AlertCurEvent) {},
EventMuteHook: func(event *models.AlertCurEvent) bool { return false },
}
p.mayHandleGroup()
@@ -135,7 +131,7 @@ func (p *Processor) Handle(anomalyPoints []models.AnomalyPoint, from string, inh
p.inhibit = inhibit
cachedRule := p.alertRuleCache.Get(p.rule.Id)
if cachedRule == nil {
logger.Errorf("rule not found %+v", anomalyPoints)
logger.Warningf("alert_eval_%d datasource_%d handle error: rule not found, maybe rule has been deleted, anomalyPoints:%+v", p.rule.Id, p.datasourceId, anomalyPoints)
p.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", p.DatasourceId()), "handle_event", p.BusiGroupCache.GetNameByBusiGroupId(p.rule.GroupId), fmt.Sprintf("%v", p.rule.Id)).Inc()
return
}
@@ -155,9 +151,19 @@ func (p *Processor) Handle(anomalyPoints []models.AnomalyPoint, from string, inh
// 如果 event 被 mute 了,本质也是 fire 的状态,这里无论如何都添加到 alertingKeys 中,防止 fire 的事件自动恢复了
hash := event.Hash
alertingKeys[hash] = struct{}{}
// event processor
eventCopy := event.DeepCopy()
event = dispatch.HandleEventPipeline(cachedRule.PipelineConfigs, eventCopy, event, dispatch.EventProcessorCache, p.ctx, cachedRule.Id, "alert_rule")
if event == nil {
logger.Infof("alert_eval_%d datasource_%d is muted drop by pipeline event:%s", p.rule.Id, p.datasourceId, eventCopy.Hash)
continue
}
// event mute
isMuted, detail, muteId := mute.IsMuted(cachedRule, event, p.TargetCache, p.alertMuteCache)
if isMuted {
logger.Debugf("rule_eval:%s event:%v is muted, detail:%s", p.Key(), event, detail)
logger.Infof("alert_eval_%d datasource_%d is muted, detail:%s event:%s", p.rule.Id, p.datasourceId, detail, event.Hash)
p.Stats.CounterMuteTotal.WithLabelValues(
fmt.Sprintf("%v", event.GroupName),
fmt.Sprintf("%v", p.rule.Id),
@@ -167,8 +173,8 @@ func (p *Processor) Handle(anomalyPoints []models.AnomalyPoint, from string, inh
continue
}
if p.EventMuteHook(event) {
logger.Debugf("rule_eval:%s event:%v is muted by hook", p.Key(), event)
if dispatch.EventMuteHook(event) {
logger.Infof("alert_eval_%d datasource_%d is muted by hook event:%s", p.rule.Id, p.datasourceId, event.Hash)
p.Stats.CounterMuteTotal.WithLabelValues(
fmt.Sprintf("%v", event.GroupName),
fmt.Sprintf("%v", p.rule.Id),
@@ -241,7 +247,7 @@ func (p *Processor) BuildEvent(anomalyPoint models.AnomalyPoint, from string, no
if err := json.Unmarshal([]byte(p.rule.Annotations), &event.AnnotationsJSON); err != nil {
event.AnnotationsJSON = make(map[string]string) // 解析失败时使用空 map
logger.Warningf("unmarshal annotations json failed: %v, rule: %d", err, p.rule.Id)
logger.Warningf("alert_eval_%d datasource_%d unmarshal annotations json failed: %v", p.rule.Id, p.datasourceId, err)
}
if event.TriggerValues != "" && strings.Count(event.TriggerValues, "$") > 1 {
@@ -266,7 +272,7 @@ func (p *Processor) BuildEvent(anomalyPoint models.AnomalyPoint, from string, no
pt.GroupNames = p.BusiGroupCache.GetNamesByBusiGroupIds(pt.GroupIds)
event.Target = pt
} else {
logger.Infof("fill event target error, ident: %s doesn't exist in cache.", event.TargetIdent)
logger.Infof("alert_eval_%d datasource_%d fill event target error, ident: %s doesn't exist in cache.", p.rule.Id, p.datasourceId, event.TargetIdent)
}
}
@@ -365,19 +371,19 @@ func (p *Processor) RecoverSingle(byRecover bool, hash string, now int64, value
lastPendingEvent, has := p.pendingsUseByRecover.Get(hash)
if !has {
// 说明没有产生过异常点,就不需要恢复了
logger.Debugf("rule_eval:%s event:%v do not has pending event, not recover", p.Key(), event)
logger.Debugf("alert_eval_%d datasource_%d event:%s do not has pending event, not recover", p.rule.Id, p.datasourceId, event.Hash)
return
}
if now-lastPendingEvent.LastEvalTime < cachedRule.RecoverDuration {
logger.Debugf("rule_eval:%s event:%v not recover", p.Key(), event)
logger.Debugf("alert_eval_%d datasource_%d event:%s not recover", p.rule.Id, p.datasourceId, event.Hash)
return
}
}
// 如果设置了恢复条件,则不能在此处恢复,必须依靠 recoverPoint 来恢复
if event.RecoverConfig.JudgeType != models.Origin && !byRecover {
logger.Debugf("rule_eval:%s event:%v not recover", p.Key(), event)
logger.Debugf("alert_eval_%d datasource_%d event:%s not recover", p.rule.Id, p.datasourceId, event.Hash)
return
}
@@ -428,17 +434,18 @@ func (p *Processor) handleEvent(events []*models.AlertCurEvent) {
continue
}
var preTriggerTime int64 // 第一个 pending event 的触发时间
var preEvalTime int64 // 第一个 pending event 的检测时间
preEvent, has := p.pendings.Get(event.Hash)
if has {
p.pendings.UpdateLastEvalTime(event.Hash, event.LastEvalTime)
preTriggerTime = preEvent.TriggerTime
preEvalTime = preEvent.FirstEvalTime
} else {
event.FirstEvalTime = event.LastEvalTime
p.pendings.Set(event.Hash, event)
preTriggerTime = event.TriggerTime
preEvalTime = event.FirstEvalTime
}
if event.LastEvalTime-preTriggerTime+int64(event.PromEvalInterval) >= int64(p.rule.PromForDuration) {
if event.LastEvalTime-preEvalTime+int64(event.PromEvalInterval) >= int64(p.rule.PromForDuration) {
fireEvents = append(fireEvents, event)
if severity > event.Severity {
severity = event.Severity
@@ -453,7 +460,7 @@ func (p *Processor) handleEvent(events []*models.AlertCurEvent) {
func (p *Processor) inhibitEvent(events []*models.AlertCurEvent, highSeverity int) {
for _, event := range events {
if p.inhibit && event.Severity > highSeverity {
logger.Debugf("rule_eval:%s event:%+v inhibit highSeverity:%d", p.Key(), event, highSeverity)
logger.Debugf("alert_eval_%d datasource_%d event:%s inhibit highSeverity:%d", p.rule.Id, p.datasourceId, event.Hash, highSeverity)
continue
}
p.fireEvent(event)
@@ -467,16 +474,18 @@ func (p *Processor) fireEvent(event *models.AlertCurEvent) {
return
}
logger.Debugf("rule_eval:%s event:%+v fire", p.Key(), event)
message := "unknown"
defer func() {
logger.Infof("alert_eval_%d datasource_%d event-hash-%s %s", p.rule.Id, p.datasourceId, event.Hash, message)
}()
if fired, has := p.fires.Get(event.Hash); has {
p.fires.UpdateLastEvalTime(event.Hash, event.LastEvalTime)
event.FirstTriggerTime = fired.FirstTriggerTime
p.HandleFireEventHook(event)
if cachedRule.NotifyRepeatStep == 0 {
logger.Debugf("rule_eval:%s event:%+v repeat is zero nothing to do", p.Key(), event)
// 说明不想重复通知那就直接返回了nothing to do
// do not need to send alert again
message = "stalled, rule.notify_repeat_step is 0, no need to repeat notify"
return
}
@@ -485,21 +494,26 @@ func (p *Processor) fireEvent(event *models.AlertCurEvent) {
if cachedRule.NotifyMaxNumber == 0 {
// 最大可以发送次数如果是0表示不想限制最大发送次数一直发即可
event.NotifyCurNumber = fired.NotifyCurNumber + 1
message = fmt.Sprintf("fired, notify_repeat_step_matched(%d >= %d + %d * 60) notify_max_number_ignore(#%d / %d)", event.LastEvalTime, fired.LastSentTime, cachedRule.NotifyRepeatStep, event.NotifyCurNumber, cachedRule.NotifyMaxNumber)
p.pushEventToQueue(event)
} else {
// 有最大发送次数的限制,就要看已经发了几次了,是否达到了最大发送次数
if fired.NotifyCurNumber >= cachedRule.NotifyMaxNumber {
logger.Debugf("rule_eval:%s event:%+v reach max number", p.Key(), event)
message = fmt.Sprintf("stalled, notify_repeat_step_matched(%d >= %d + %d * 60) notify_max_number_not_matched(#%d / %d)", event.LastEvalTime, fired.LastSentTime, cachedRule.NotifyRepeatStep, fired.NotifyCurNumber, cachedRule.NotifyMaxNumber)
return
} else {
event.NotifyCurNumber = fired.NotifyCurNumber + 1
message = fmt.Sprintf("fired, notify_repeat_step_matched(%d >= %d + %d * 60) notify_max_number_matched(#%d / %d)", event.LastEvalTime, fired.LastSentTime, cachedRule.NotifyRepeatStep, event.NotifyCurNumber, cachedRule.NotifyMaxNumber)
p.pushEventToQueue(event)
}
}
} else {
message = fmt.Sprintf("stalled, notify_repeat_step_not_matched(%d < %d + %d * 60)", event.LastEvalTime, fired.LastSentTime, cachedRule.NotifyRepeatStep)
}
} else {
event.NotifyCurNumber = 1
event.FirstTriggerTime = event.TriggerTime
message = fmt.Sprintf("fired, first_trigger_time: %d", event.FirstTriggerTime)
p.HandleFireEventHook(event)
p.pushEventToQueue(event)
}
@@ -513,7 +527,7 @@ func (p *Processor) pushEventToQueue(e *models.AlertCurEvent) {
dispatch.LogEvent(e, "push_queue")
if !queue.EventQueue.PushFront(e) {
logger.Warningf("event_push_queue: queue is full, event:%+v", e)
logger.Warningf("alert_eval_%d datasource_%d event_push_queue: queue is full, event:%s", p.rule.Id, p.datasourceId, e.Hash)
p.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", p.DatasourceId()), "push_event_queue", p.BusiGroupCache.GetNameByBusiGroupId(p.rule.GroupId), fmt.Sprintf("%v", p.rule.Id)).Inc()
}
}
@@ -524,7 +538,7 @@ func (p *Processor) RecoverAlertCurEventFromDb() {
curEvents, err := models.AlertCurEventGetByRuleIdAndDsId(p.ctx, p.rule.Id, p.datasourceId)
if err != nil {
logger.Errorf("recover event from db for rule:%s failed, err:%s", p.Key(), err)
logger.Errorf("alert_eval_%d datasource_%d recover event from db failed, err:%s", p.rule.Id, p.datasourceId, err)
p.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", p.DatasourceId()), "get_recover_event", p.BusiGroupCache.GetNameByBusiGroupId(p.rule.GroupId), fmt.Sprintf("%v", p.rule.Id)).Inc()
p.fires = NewAlertCurEventMap(nil)
return
@@ -577,7 +591,9 @@ func (p *Processor) fillTags(anomalyPoint models.AnomalyPoint) {
}
// handle rule tags
for _, tag := range p.rule.AppendTagsJSON {
tags := p.rule.AppendTagsJSON
tags = append(tags, "rulename="+p.rule.Name)
for _, tag := range tags {
arr := strings.SplitN(tag, "=", 2)
var defs = []string{
@@ -603,8 +619,6 @@ func (p *Processor) fillTags(anomalyPoint models.AnomalyPoint) {
tagsMap[arr[0]] = body.String()
}
tagsMap["rulename"] = p.rule.Name
p.tagsMap = tagsMap
// handle tagsArr

View File

@@ -22,10 +22,11 @@ type Router struct {
AlertStats *astats.Stats
Ctx *ctx.Context
ExternalProcessors *process.ExternalProcessorsType
LogDir string
}
func New(httpConfig httpx.Config, alert aconf.Alert, amc *memsto.AlertMuteCacheType, tc *memsto.TargetCacheType, bgc *memsto.BusiGroupCacheType,
astats *astats.Stats, ctx *ctx.Context, externalProcessors *process.ExternalProcessorsType) *Router {
astats *astats.Stats, ctx *ctx.Context, externalProcessors *process.ExternalProcessorsType, logDir string) *Router {
return &Router{
HTTP: httpConfig,
Alert: alert,
@@ -35,6 +36,7 @@ func New(httpConfig httpx.Config, alert aconf.Alert, amc *memsto.AlertMuteCacheT
AlertStats: astats,
Ctx: ctx,
ExternalProcessors: externalProcessors,
LogDir: logDir,
}
}
@@ -50,6 +52,9 @@ func (rt *Router) Config(r *gin.Engine) {
service.POST("/event", rt.pushEventToQueue)
service.POST("/event-persist", rt.eventPersist)
service.POST("/make-event", rt.makeEvent)
service.GET("/event-detail/:hash", rt.eventDetail)
service.GET("/alert-eval-detail/:id", rt.alertEvalDetail)
service.GET("/trace-logs/:traceid", rt.traceLogs)
}
func Render(c *gin.Context, data, msg interface{}) {

View File

@@ -0,0 +1,28 @@
package router
import (
"fmt"
"github.com/ccfos/nightingale/v6/pkg/loggrep"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
)
func (rt *Router) alertEvalDetail(c *gin.Context) {
id := ginx.UrlParamStr(c, "id")
if !loggrep.IsValidRuleID(id) {
ginx.Bomb(200, "invalid rule id format")
}
instance := fmt.Sprintf("%s:%d", rt.Alert.Heartbeat.IP, rt.HTTP.Port)
keyword := fmt.Sprintf("alert_eval_%s", id)
logs, err := loggrep.GrepLogDir(rt.LogDir, keyword)
ginx.Dangerous(err)
ginx.NewRender(c).Data(loggrep.EventDetailResp{
Logs: logs,
Instance: instance,
}, nil)
}

View File

@@ -13,9 +13,9 @@ import (
"github.com/ccfos/nightingale/v6/alert/queue"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/poster"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
@@ -25,6 +25,7 @@ func (rt *Router) pushEventToQueue(c *gin.Context) {
if event.RuleId == 0 {
ginx.Bomb(200, "event is illegal")
}
event.FE2DB()
event.TagsMap = make(map[string]string)
for i := 0; i < len(event.TagsJSON); i++ {
@@ -40,7 +41,7 @@ func (rt *Router) pushEventToQueue(c *gin.Context) {
event.TagsMap[arr[0]] = arr[1]
}
hit, _ := mute.EventMuteStrategy(event, rt.AlertMuteCache)
hit, _ := mute.EventMuteStrategy(event, rt.AlertMuteCache)
if hit {
logger.Infof("event_muted: rule_id=%d %s", event.RuleId, event.Hash)
ginx.NewRender(c).Message(nil)
@@ -74,7 +75,7 @@ func (rt *Router) pushEventToQueue(c *gin.Context) {
dispatch.LogEvent(event, "http_push_queue")
if !queue.EventQueue.PushFront(event) {
msg := fmt.Sprintf("event:%+v push_queue err: queue is full", event)
msg := fmt.Sprintf("event:%s push_queue err: queue is full", event.Hash)
ginx.Bomb(200, msg)
logger.Warningf(msg)
}
@@ -104,21 +105,21 @@ func (rt *Router) makeEvent(c *gin.Context) {
for i := 0; i < len(events); i++ {
node, err := naming.DatasourceHashRing.GetNode(strconv.FormatInt(events[i].DatasourceId, 10), fmt.Sprintf("%d", events[i].RuleId))
if err != nil {
logger.Warningf("event:%+v get node err:%v", events[i], err)
logger.Warningf("event(rule_id=%d ds_id=%d) get node err:%v", events[i].RuleId, events[i].DatasourceId, err)
ginx.Bomb(200, "event node not exists")
}
if node != rt.Alert.Heartbeat.Endpoint {
err := forwardEvent(events[i], node)
if err != nil {
logger.Warningf("event:%+v forward err:%v", events[i], err)
logger.Warningf("event(rule_id=%d ds_id=%d) forward err:%v", events[i].RuleId, events[i].DatasourceId, err)
ginx.Bomb(200, "event forward error")
}
continue
}
ruleWorker, exists := rt.ExternalProcessors.GetExternalAlertRule(events[i].DatasourceId, events[i].RuleId)
logger.Debugf("handle event:%+v exists:%v", events[i], exists)
logger.Debugf("handle event(rule_id=%d ds_id=%d) exists:%v", events[i].RuleId, events[i].DatasourceId, exists)
if !exists {
ginx.Bomb(200, "rule not exists")
}
@@ -142,6 +143,6 @@ func forwardEvent(event *eventForm, instance string) error {
if err != nil {
return err
}
logger.Infof("forward event: result=succ url=%s code=%d event:%v response=%s", ur, code, event, string(res))
logger.Infof("forward event: result=succ url=%s code=%d rule_id=%d response=%s", ur, code, event.RuleId, string(res))
return nil
}

View File

@@ -0,0 +1,27 @@
package router
import (
"fmt"
"github.com/ccfos/nightingale/v6/pkg/loggrep"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
)
func (rt *Router) eventDetail(c *gin.Context) {
hash := ginx.UrlParamStr(c, "hash")
if !loggrep.IsValidHash(hash) {
ginx.Bomb(200, "invalid hash format")
}
instance := fmt.Sprintf("%s:%d", rt.Alert.Heartbeat.IP, rt.HTTP.Port)
logs, err := loggrep.GrepLogDir(rt.LogDir, hash)
ginx.Dangerous(err)
ginx.NewRender(c).Data(loggrep.EventDetailResp{
Logs: logs,
Instance: instance,
}, nil)
}

View File

@@ -0,0 +1,28 @@
package router
import (
"fmt"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/ccfos/nightingale/v6/pkg/loggrep"
"github.com/gin-gonic/gin"
)
func (rt *Router) traceLogs(c *gin.Context) {
traceId := ginx.UrlParamStr(c, "traceid")
if !loggrep.IsValidTraceID(traceId) {
ginx.Bomb(200, "invalid trace id format")
}
instance := fmt.Sprintf("%s:%d", rt.Alert.Heartbeat.IP, rt.HTTP.Port)
keyword := "trace_id=" + traceId
logs, err := loggrep.GrepLatestLogFiles(rt.LogDir, keyword)
ginx.Dangerous(err)
ginx.NewRender(c).Data(loggrep.EventDetailResp{
Logs: logs,
Instance: instance,
}, nil)
}

View File

@@ -1,6 +1,7 @@
package sender
import (
"fmt"
"html/template"
"net/url"
"strings"
@@ -140,7 +141,7 @@ func doSendAndRecord(ctx *ctx.Context, url, token string, body interface{}, chan
func NotifyRecord(ctx *ctx.Context, evts []*models.AlertCurEvent, notifyRuleID int64, channel, target, res string, err error) {
// 一个通知可能对应多个 event都需要记录
notis := make([]*models.NotificaitonRecord, 0, len(evts))
notis := make([]*models.NotificationRecord, 0, len(evts))
for _, evt := range evts {
noti := models.NewNotificationRecord(evt, notifyRuleID, channel, target)
if err != nil {
@@ -166,11 +167,13 @@ func NotifyRecord(ctx *ctx.Context, evts []*models.AlertCurEvent, notifyRuleID i
func doSend(url string, body interface{}, channel string, stats *astats.Stats) (string, error) {
stats.AlertNotifyTotal.WithLabelValues(channel).Inc()
start := time.Now()
res, code, err := poster.PostJSON(url, time.Second*5, body, 3)
res = []byte(fmt.Sprintf("duration: %d ms status_code:%d, response:%s", time.Since(start).Milliseconds(), code, string(res)))
if err != nil {
logger.Errorf("%s_sender: result=fail url=%s code=%d error=%v req:%v response=%s", channel, url, code, err, body, string(res))
stats.AlertNotifyErrorTotal.WithLabelValues(channel).Inc()
return "", err
return string(res), err
}
logger.Infof("%s_sender: result=succ url=%s code=%d req:%v response=%s", channel, url, code, body, string(res))
@@ -202,6 +205,6 @@ func PushCallbackEvent(ctx *ctx.Context, webhook *models.Webhook, event *models.
succ := queue.eventQueue.Push(event)
if !succ {
logger.Warningf("Write channel(%s) full, current channel size: %d event:%v", webhook.Url, queue.eventQueue.Len(), event)
logger.Warningf("Write channel(%s) full, current channel size: %d event:%s", webhook.Url, queue.eventQueue.Len(), event.Hash)
}
}

View File

@@ -141,7 +141,7 @@ func updateSmtp(ctx *ctx.Context, ncc *memsto.NotifyConfigCacheType) {
func startEmailSender(ctx *ctx.Context, smtp aconf.SMTPConfig) {
conf := smtp
if conf.Host == "" || conf.Port == 0 {
logger.Warning("SMTP configurations invalid")
logger.Debug("SMTP configurations invalid")
<-mailQuit
return
}

View File

@@ -30,14 +30,14 @@ type IbexCallBacker struct {
func (c *IbexCallBacker) CallBack(ctx CallBackContext) {
if len(ctx.CallBackURL) == 0 || len(ctx.Events) == 0 {
logger.Warningf("event_callback_ibex: url or events is empty, url: %s, events: %+v", ctx.CallBackURL, ctx.Events)
logger.Warningf("event_callback_ibex: url or events is empty, url: %s", ctx.CallBackURL)
return
}
event := ctx.Events[0]
if event.IsRecovered {
logger.Infof("event_callback_ibex: event is recovered, event: %+v", event)
logger.Infof("event_callback_ibex: event is recovered, event: %s", event.Hash)
return
}
@@ -45,9 +45,9 @@ func (c *IbexCallBacker) CallBack(ctx CallBackContext) {
}
func (c *IbexCallBacker) handleIbex(ctx *ctx.Context, url string, event *models.AlertCurEvent) {
logger.Infof("event_callback_ibex: url: %s, event: %+v", url, event)
logger.Infof("event_callback_ibex: url: %s, event: %s", url, event.Hash)
if imodels.DB() == nil && ctx.IsCenter {
logger.Warningf("event_callback_ibex: db is nil, event: %+v", event)
logger.Warningf("event_callback_ibex: db is nil, event: %s", event.Hash)
return
}
@@ -66,7 +66,7 @@ func (c *IbexCallBacker) handleIbex(ctx *ctx.Context, url string, event *models.
id, err := strconv.ParseInt(idstr, 10, 64)
if err != nil {
logger.Errorf("event_callback_ibex: failed to parse url: %s event: %+v", url, event)
logger.Errorf("event_callback_ibex: failed to parse url: %s event: %s", url, event.Hash)
return
}
@@ -82,34 +82,37 @@ func (c *IbexCallBacker) handleIbex(ctx *ctx.Context, url string, event *models.
}
if host == "" {
logger.Errorf("event_callback_ibex: failed to get host, id: %d, event: %+v", id, event)
logger.Errorf("event_callback_ibex: failed to get host, id: %d, event: %s", id, event.Hash)
return
}
CallIbex(ctx, id, host, c.taskTplCache, c.targetCache, c.userCache, event)
CallIbex(ctx, id, host, c.taskTplCache, c.targetCache, c.userCache, event, "")
}
func CallIbex(ctx *ctx.Context, id int64, host string,
taskTplCache *memsto.TaskTplCache, targetCache *memsto.TargetCacheType,
userCache *memsto.UserCacheType, event *models.AlertCurEvent) {
logger.Infof("event_callback_ibex: id: %d, host: %s, event: %+v", id, host, event)
userCache *memsto.UserCacheType, event *models.AlertCurEvent, args string) (int64, error) {
logger.Infof("event_callback_ibex: id: %d, host: %s, args: %s, event: %s", id, host, args, event.Hash)
tpl := taskTplCache.Get(id)
if tpl == nil {
logger.Errorf("event_callback_ibex: no such tpl(%d), event: %+v", id, event)
return
err := fmt.Errorf("event_callback_ibex: no such tpl(%d), event: %s", id, event.Hash)
logger.Errorf("%s", err)
return 0, err
}
// check perm
// tpl.GroupId - host - account 三元组校验权限
can, err := canDoIbex(tpl.UpdateBy, tpl, host, targetCache, userCache)
can, err := CanDoIbex(tpl.UpdateBy, tpl, host, targetCache, userCache)
if err != nil {
logger.Errorf("event_callback_ibex: check perm fail: %v, event: %+v", err, event)
return
err = fmt.Errorf("event_callback_ibex: check perm fail: %v, event: %s", err, event.Hash)
logger.Errorf("%s", err)
return 0, err
}
if !can {
logger.Errorf("event_callback_ibex: user(%s) no permission, event: %+v", tpl.UpdateBy, event)
return
err = fmt.Errorf("event_callback_ibex: user(%s) no permission, event: %s", tpl.UpdateBy, event.Hash)
logger.Errorf("%s", err)
return 0, err
}
tagsMap := make(map[string]string)
@@ -133,11 +136,16 @@ func CallIbex(ctx *ctx.Context, id int64, host string,
tags, err := json.Marshal(tagsMap)
if err != nil {
logger.Errorf("event_callback_ibex: failed to marshal tags to json: %v, event: %+v", tagsMap, event)
return
err = fmt.Errorf("event_callback_ibex: failed to marshal tags to json: %v, event: %s", tagsMap, event.Hash)
logger.Errorf("%s", err)
return 0, err
}
// call ibex
taskArgs := tpl.Args
if args != "" {
taskArgs = args
}
in := models.TaskForm{
Title: tpl.Title + " FH: " + host,
Account: tpl.Account,
@@ -146,7 +154,7 @@ func CallIbex(ctx *ctx.Context, id int64, host string,
Timeout: tpl.Timeout,
Pause: tpl.Pause,
Script: tpl.Script,
Args: tpl.Args,
Args: taskArgs,
Stdin: string(tags),
Action: "start",
Creator: tpl.UpdateBy,
@@ -156,8 +164,9 @@ func CallIbex(ctx *ctx.Context, id int64, host string,
id, err = TaskAdd(in, tpl.UpdateBy, ctx.IsCenter)
if err != nil {
logger.Errorf("event_callback_ibex: call ibex fail: %v, event: %+v", err, event)
return
err = fmt.Errorf("event_callback_ibex: call ibex fail: %v, event: %s", err, event.Hash)
logger.Errorf("%s", err)
return 0, err
}
// write db
@@ -178,11 +187,14 @@ func CallIbex(ctx *ctx.Context, id int64, host string,
}
if err = record.Add(ctx); err != nil {
logger.Errorf("event_callback_ibex: persist task_record fail: %v, event: %+v", err, event)
err = fmt.Errorf("event_callback_ibex: persist task_record fail: %v, event: %s", err, event.Hash)
logger.Errorf("%s", err)
return id, err
}
return id, nil
}
func canDoIbex(username string, tpl *models.TaskTpl, host string, targetCache *memsto.TargetCacheType, userCache *memsto.UserCacheType) (bool, error) {
func CanDoIbex(username string, tpl *models.TaskTpl, host string, targetCache *memsto.TargetCacheType, userCache *memsto.UserCacheType) (bool, error) {
user := userCache.GetByUsername(username)
if user != nil && user.IsAdmin() {
return true, nil

View File

@@ -24,7 +24,7 @@ func ReportNotifyRecordQueueSize(stats *astats.Stats) {
// 推送通知记录到队列
// 若队列满 则返回 error
func PushNotifyRecords(records []*models.NotificaitonRecord) error {
func PushNotifyRecords(records []*models.NotificationRecord) error {
for _, record := range records {
if ok := NotifyRecordQueue.PushFront(record); !ok {
logger.Warningf("notify record queue is full, record: %+v", record)
@@ -59,16 +59,16 @@ func (c *NotifyRecordConsumer) LoopConsume() {
}
// 类型转换,不然 CreateInBatches 会报错
notis := make([]*models.NotificaitonRecord, 0, len(inotis))
notis := make([]*models.NotificationRecord, 0, len(inotis))
for _, inoti := range inotis {
notis = append(notis, inoti.(*models.NotificaitonRecord))
notis = append(notis, inoti.(*models.NotificationRecord))
}
c.consume(notis)
}
}
func (c *NotifyRecordConsumer) consume(notis []*models.NotificaitonRecord) {
func (c *NotifyRecordConsumer) consume(notis []*models.NotificationRecord) {
if err := models.DB(c.ctx).CreateInBatches(notis, 100).Error; err != nil {
logger.Errorf("add notis:%v failed, err: %v", notis, err)
}

View File

@@ -35,7 +35,7 @@ func alertingCallScript(ctx *ctx.Context, stdinBytes []byte, notifyScript models
channel := "script"
stats.AlertNotifyTotal.WithLabelValues(channel).Inc()
fpath := ".notify_scriptt"
fpath := ".notify_script"
if config.Type == 1 {
fpath = config.Content
} else {
@@ -79,6 +79,7 @@ func alertingCallScript(ctx *ctx.Context, stdinBytes []byte, notifyScript models
cmd.Stdout = &buf
cmd.Stderr = &buf
start := time.Now()
err := startCmd(cmd)
if err != nil {
logger.Errorf("event_script_notify_fail: run cmd err: %v", err)
@@ -88,6 +89,7 @@ func alertingCallScript(ctx *ctx.Context, stdinBytes []byte, notifyScript models
err, isTimeout := sys.WrapTimeout(cmd, time.Duration(config.Timeout)*time.Second)
res := buf.String()
res = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), res)
// 截断超出长度的输出
if len(res) > 512 {

View File

@@ -13,10 +13,53 @@ import (
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/poster"
"github.com/toolkits/pkg/logger"
)
// webhookClientCache 缓存 http.Client避免每次请求都创建新的 Client 导致连接泄露
var webhookClientCache sync.Map // key: clientKey (string), value: *http.Client
// 相同配置的 webhook 会复用同一个 Client
func getWebhookClient(webhook *models.Webhook) *http.Client {
clientKey := webhook.Hash()
if client, ok := webhookClientCache.Load(clientKey); ok {
return client.(*http.Client)
}
// 创建新的 Client
transport := &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: webhook.SkipVerify},
MaxIdleConns: 100,
MaxIdleConnsPerHost: 10,
IdleConnTimeout: 90 * time.Second,
}
if poster.UseProxy(webhook.Url) {
transport.Proxy = http.ProxyFromEnvironment
}
timeout := webhook.Timeout
if timeout <= 0 {
timeout = 10
}
newClient := &http.Client{
Timeout: time.Duration(timeout) * time.Second,
Transport: transport,
}
// 使用 LoadOrStore 确保并发安全,避免重复创建
actual, loaded := webhookClientCache.LoadOrStore(clientKey, newClient)
if loaded {
return actual.(*http.Client)
}
return newClient
}
func sendWebhook(webhook *models.Webhook, event interface{}, stats *astats.Stats) (bool, string, error) {
channel := "webhook"
if webhook.Type == models.RuleCallback {
@@ -29,7 +72,7 @@ func sendWebhook(webhook *models.Webhook, event interface{}, stats *astats.Stats
}
bs, err := json.Marshal(event)
if err != nil {
logger.Errorf("%s alertingWebhook failed to marshal event:%+v err:%v", channel, event, err)
logger.Errorf("%s alertingWebhook failed to marshal event err:%v", channel, err)
return false, "", err
}
@@ -37,7 +80,7 @@ func sendWebhook(webhook *models.Webhook, event interface{}, stats *astats.Stats
req, err := http.NewRequest("POST", conf.Url, bf)
if err != nil {
logger.Warningf("%s alertingWebhook failed to new reques event:%s err:%v", channel, string(bs), err)
logger.Warningf("%s alertingWebhook failed to new request event:%s err:%v", channel, string(bs), err)
return true, "", err
}
@@ -55,25 +98,13 @@ func sendWebhook(webhook *models.Webhook, event interface{}, stats *astats.Stats
req.Header.Set(conf.Headers[i], conf.Headers[i+1])
}
}
insecureSkipVerify := false
if webhook != nil {
insecureSkipVerify = webhook.SkipVerify
}
if conf.Client == nil {
logger.Warningf("event_%s, event:%s, url: [%s], error: [%s]", channel, string(bs), conf.Url, "client is nil")
conf.Client = &http.Client{
Timeout: time.Duration(conf.Timeout) * time.Second,
Transport: &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: insecureSkipVerify},
},
}
}
// 使用全局 Client 缓存,避免每次请求都创建新的 Client 导致连接泄露
client := getWebhookClient(conf)
stats.AlertNotifyTotal.WithLabelValues(channel).Inc()
var resp *http.Response
var body []byte
resp, err = conf.Client.Do(req)
resp, err = client.Do(req)
if err != nil {
stats.AlertNotifyErrorTotal.WithLabelValues(channel).Inc()
@@ -88,18 +119,20 @@ func sendWebhook(webhook *models.Webhook, event interface{}, stats *astats.Stats
if resp.StatusCode == 429 {
logger.Errorf("event_%s_fail, url: %s, response code: %d, body: %s event:%s", channel, conf.Url, resp.StatusCode, string(body), string(bs))
return true, string(body), fmt.Errorf("status code is 429")
return true, fmt.Sprintf("status_code:%d, response:%s", resp.StatusCode, string(body)), fmt.Errorf("status code is 429")
}
logger.Debugf("event_%s_succ, url: %s, response code: %d, body: %s event:%s", channel, conf.Url, resp.StatusCode, string(body), string(bs))
return false, string(body), nil
return false, fmt.Sprintf("status_code:%d, response:%s", resp.StatusCode, string(body)), nil
}
func SingleSendWebhooks(ctx *ctx.Context, webhooks map[string]*models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
for _, conf := range webhooks {
retryCount := 0
for retryCount < 3 {
start := time.Now()
needRetry, res, err := sendWebhook(conf, event, stats)
res = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), res)
NotifyRecord(ctx, []*models.AlertCurEvent{event}, 0, "webhook", conf.Url, res, err)
if !needRetry {
break
@@ -112,7 +145,7 @@ func SingleSendWebhooks(ctx *ctx.Context, webhooks map[string]*models.Webhook, e
func BatchSendWebhooks(ctx *ctx.Context, webhooks map[string]*models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
for _, conf := range webhooks {
logger.Infof("push event:%+v to queue:%v", event, conf)
logger.Infof("push event:%s to queue:%v", event.Hash, conf)
PushEvent(ctx, conf, event, stats)
}
}
@@ -150,7 +183,7 @@ func PushEvent(ctx *ctx.Context, webhook *models.Webhook, event *models.AlertCur
succ := queue.eventQueue.Push(event)
if !succ {
stats.AlertNotifyErrorTotal.WithLabelValues("push_event_queue").Inc()
logger.Warningf("Write channel(%s) full, current channel size: %d event:%v", webhook.Url, queue.eventQueue.Len(), event)
logger.Warningf("Write channel(%s) full, current channel size: %d event:%s", webhook.Url, queue.eventQueue.Len(), event.Hash)
}
}
@@ -169,7 +202,9 @@ func StartConsumer(ctx *ctx.Context, queue *WebhookQueue, popSize int, webhook *
retryCount := 0
for retryCount < webhook.RetryCount {
start := time.Now()
needRetry, res, err := sendWebhook(webhook, events, stats)
res = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), res)
go NotifyRecord(ctx, events, 0, "webhook", webhook.Url, res, err)
if !needRetry {
break

View File

@@ -1,20 +1,26 @@
package cconf
import "time"
import (
"time"
"github.com/ccfos/nightingale/v6/pkg/httpx"
)
type Center struct {
Plugins []Plugin
MetricsYamlFile string
OpsYamlFile string
BuiltinIntegrationsDir string
I18NHeaderKey string
MetricDesc MetricDescType
AnonymousAccess AnonymousAccess
UseFileAssets bool
FlashDuty FlashDuty
EventHistoryGroupView bool
CleanNotifyRecordDay int
MigrateBusiGroupLabel bool
Plugins []Plugin
MetricsYamlFile string
OpsYamlFile string
BuiltinIntegrationsDir string
I18NHeaderKey string
MetricDesc MetricDescType
AnonymousAccess AnonymousAccess
UseFileAssets bool
FlashDuty FlashDuty
EventHistoryGroupView bool
CleanNotifyRecordDay int
CleanPipelineExecutionDay int
MigrateBusiGroupLabel bool
RSA httpx.RSAConfig
}
type Plugin struct {

View File

@@ -43,4 +43,22 @@ var Plugins = []Plugin{
Type: "pgsql",
TypeName: "PostgreSQL",
},
{
Id: 8,
Category: "logging",
Type: "doris",
TypeName: "Doris",
},
{
Id: 9,
Category: "logging",
Type: "opensearch",
TypeName: "OpenSearch",
},
{
Id: 10,
Category: "logging",
Type: "victorialogs",
TypeName: "VictoriaLogs",
},
}

View File

@@ -2,10 +2,13 @@ package center
import (
"context"
"encoding/json"
"fmt"
"github.com/ccfos/nightingale/v6/dscache"
"github.com/toolkits/pkg/logger"
"github.com/ccfos/nightingale/v6/alert"
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/alert/dispatch"
@@ -96,6 +99,9 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
models.MigrateEP(ctx)
}
// 初始化 siteUrl如果为空则设置默认值
InitSiteUrl(ctx, config.Alert.Heartbeat.IP, config.HTTP.Port)
configCache := memsto.NewConfigCache(ctx, syncStats, config.HTTP.RSA.RSAPrivateKey, config.HTTP.RSA.RSAPassWord)
busiGroupCache := memsto.NewBusiGroupCache(ctx, syncStats)
targetCache := memsto.NewTargetCache(ctx, syncStats, redis)
@@ -121,18 +127,19 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
macros.RegisterMacro(macros.MacroInVain)
dscache.Init(ctx, false)
alert.Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache, alertRuleCache, notifyConfigCache, taskTplCache, dsCache, ctx, promClients, userCache, userGroupCache, notifyRuleCache, notifyChannelCache, messageTemplateCache)
alert.Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache, alertRuleCache, notifyConfigCache, taskTplCache, dsCache, ctx, promClients, userCache, userGroupCache, notifyRuleCache, notifyChannelCache, messageTemplateCache, configCvalCache)
writers := writer.NewWriters(config.Pushgw)
go version.GetGithubVersion()
go cron.CleanNotifyRecord(ctx, config.Center.CleanNotifyRecordDay)
go cron.CleanPipelineExecution(ctx, config.Center.CleanPipelineExecutionDay)
alertrtRouter := alertrt.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors)
alertrtRouter := alertrt.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors, config.Log.Dir)
centerRouter := centerrt.New(config.HTTP, config.Center, config.Alert, config.Ibex,
cconf.Operations, dsCache, notifyConfigCache, promClients,
redis, sso, ctx, metas, idents, targetCache, userCache, userGroupCache, userTokenCache)
redis, sso, ctx, metas, idents, targetCache, userCache, userGroupCache, userTokenCache, config.Log.Dir)
pushgwRouter := pushgwrt.New(config.HTTP, config.Pushgw, config.Alert, targetCache, busiGroupCache, idents, metas, writers, ctx)
r := httpx.GinEngine(config.Global.RunMode, config.HTTP, configCvalCache.PrintBodyPaths, configCvalCache.PrintAccessLog)
@@ -159,3 +166,67 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
httpClean()
}, nil
}
// initSiteUrl 初始化 site_info 中的 site_url如果为空则使用服务器IP和端口设置默认值
func InitSiteUrl(ctx *ctx.Context, serverIP string, serverPort int) {
// 构造默认的 SiteUrl
defaultSiteUrl := fmt.Sprintf("http://%s:%d", serverIP, serverPort)
// 获取现有的 site_info 配置
siteInfoStr, err := models.ConfigsGet(ctx, "site_info")
if err != nil {
logger.Errorf("failed to get site_info config: %v", err)
return
}
// 如果 site_info 不存在,创建新的
if siteInfoStr == "" {
newSiteInfo := memsto.SiteInfo{
SiteUrl: defaultSiteUrl,
}
siteInfoBytes, err := json.Marshal(newSiteInfo)
if err != nil {
logger.Errorf("failed to marshal site_info: %v", err)
return
}
err = models.ConfigsSet(ctx, "site_info", string(siteInfoBytes))
if err != nil {
logger.Errorf("failed to set site_info: %v", err)
return
}
logger.Infof("initialized site_url with default value: %s", defaultSiteUrl)
return
}
// 检查现有的 site_info 中的 site_url 字段
var existingSiteInfo memsto.SiteInfo
err = json.Unmarshal([]byte(siteInfoStr), &existingSiteInfo)
if err != nil {
logger.Errorf("failed to unmarshal site_info: %v", err)
return
}
// 如果 site_url 已经有值,则不需要初始化
if existingSiteInfo.SiteUrl != "" {
return
}
// 设置 site_url
existingSiteInfo.SiteUrl = defaultSiteUrl
siteInfoBytes, err := json.Marshal(existingSiteInfo)
if err != nil {
logger.Errorf("failed to marshal updated site_info: %v", err)
return
}
err = models.ConfigsSet(ctx, "site_info", string(siteInfoBytes))
if err != nil {
logger.Errorf("failed to update site_info: %v", err)
return
}
logger.Infof("initialized site_url with default value: %s", defaultSiteUrl)
}

View File

@@ -3,11 +3,15 @@ package integration
import (
"encoding/json"
"path"
"sort"
"strings"
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/pkg/errors"
"github.com/toolkits/pkg/container/set"
"github.com/toolkits/pkg/file"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/runner"
@@ -15,7 +19,18 @@ import (
const SYSTEM = "system"
var BuiltinPayloadInFile *BuiltinPayloadInFileType
type BuiltinPayloadInFileType struct {
Data map[uint64]map[string]map[string][]*models.BuiltinPayload // map[component_id]map[type]map[cate][]*models.BuiltinPayload
IndexData map[int64]*models.BuiltinPayload // map[uuid]payload
BuiltinMetrics map[string]*models.BuiltinMetric
}
func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
BuiltinPayloadInFile = NewBuiltinPayloadInFileType()
err := models.InitBuiltinPayloads(ctx)
if err != nil {
logger.Warning("init old builtinPayloads fail ", err)
@@ -109,13 +124,13 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
component.ID = old.ID
}
// delete uuid is emtpy
// delete uuid is empty
err = models.DB(ctx).Exec("delete from builtin_payloads where uuid = 0 and type != 'collect' and (updated_by = 'system' or updated_by = '')").Error
if err != nil {
logger.Warning("delete builtin payloads fail ", err)
}
// delete builtin metrics uuid is emtpy
// delete builtin metrics uuid is empty
err = models.DB(ctx).Exec("delete from builtin_metrics where uuid = 0 and (updated_by = 'system' or updated_by = '')").Error
if err != nil {
logger.Warning("delete builtin metrics fail ", err)
@@ -146,11 +161,10 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
}
newAlerts := []models.AlertRule{}
writeAlertFileFlag := false
for _, alert := range alerts {
if alert.UUID == 0 {
writeAlertFileFlag = true
alert.UUID = time.Now().UnixNano()
time.Sleep(time.Microsecond)
alert.UUID = time.Now().UnixMicro()
}
newAlerts = append(newAlerts, alert)
@@ -169,47 +183,13 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
Tags: alert.AppendTags,
Content: string(content),
UUID: alert.UUID,
ID: alert.UUID,
CreatedBy: SYSTEM,
UpdatedBy: SYSTEM,
}
BuiltinPayloadInFile.AddBuiltinPayload(&builtinAlert)
old, err := models.BuiltinPayloadGet(ctx, "uuid = ?", alert.UUID)
if err != nil {
logger.Warning("get builtin alert fail ", builtinAlert, err)
continue
}
if old == nil {
err := builtinAlert.Add(ctx, SYSTEM)
if err != nil {
logger.Warning("add builtin alert fail ", builtinAlert, err)
}
continue
}
if old.UpdatedBy == SYSTEM {
old.ComponentID = component.ID
old.Content = string(content)
old.Name = alert.Name
old.Tags = alert.AppendTags
err = models.DB(ctx).Model(old).Select("*").Updates(old).Error
if err != nil {
logger.Warningf("update builtin alert:%+v fail %v", builtinAlert, err)
}
}
}
if writeAlertFileFlag {
bs, err = json.MarshalIndent(newAlerts, "", " ")
if err != nil {
logger.Warning("marshal builtin alerts fail ", newAlerts, err)
continue
}
_, err = file.WriteBytes(fp, bs)
if err != nil {
logger.Warning("write builtin alerts file fail ", f, err)
}
}
}
}
@@ -259,34 +239,14 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
Cate: "",
Name: dashboard.Name,
Tags: dashboard.Tags,
Note: dashboard.Note,
Content: string(content),
UUID: dashboard.UUID,
ID: dashboard.UUID,
CreatedBy: SYSTEM,
UpdatedBy: SYSTEM,
}
old, err := models.BuiltinPayloadGet(ctx, "uuid = ?", dashboard.UUID)
if err != nil {
logger.Warning("get builtin alert fail ", builtinDashboard, err)
continue
}
if old == nil {
err := builtinDashboard.Add(ctx, SYSTEM)
if err != nil {
logger.Warning("add builtin alert fail ", builtinDashboard, err)
}
continue
}
if old.UpdatedBy == SYSTEM {
old.ComponentID = component.ID
old.Content = string(content)
old.Name = dashboard.Name
old.Tags = dashboard.Tags
err = models.DB(ctx).Model(old).Select("*").Updates(old).Error
if err != nil {
logger.Warningf("update builtin alert:%+v fail %v", builtinDashboard, err)
}
}
BuiltinPayloadInFile.AddBuiltinPayload(&builtinDashboard)
}
} else if err != nil {
logger.Warningf("read builtin component dash dir fail %s %v", component.Ident, err)
@@ -304,64 +264,21 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
}
metrics := []models.BuiltinMetric{}
newMetrics := []models.BuiltinMetric{}
err = json.Unmarshal(bs, &metrics)
if err != nil {
logger.Warning("parse builtin component metrics file fail", f, err)
continue
}
writeMetricFileFlag := false
for _, metric := range metrics {
if metric.UUID == 0 {
writeMetricFileFlag = true
metric.UUID = time.Now().UnixNano()
}
newMetrics = append(newMetrics, metric)
time.Sleep(time.Microsecond)
metric.UUID = time.Now().UnixMicro()
metric.ID = metric.UUID
metric.CreatedBy = SYSTEM
metric.UpdatedBy = SYSTEM
old, err := models.BuiltinMetricGet(ctx, "uuid = ?", metric.UUID)
if err != nil {
logger.Warning("get builtin metrics fail ", metric, err)
continue
}
if old == nil {
err := metric.Add(ctx, SYSTEM)
if err != nil {
logger.Warning("add builtin metrics fail ", metric, err)
}
continue
}
if old.UpdatedBy == SYSTEM {
old.Collector = metric.Collector
old.Typ = metric.Typ
old.Name = metric.Name
old.Unit = metric.Unit
old.Note = metric.Note
old.Lang = metric.Lang
old.Expression = metric.Expression
err = models.DB(ctx).Model(old).Select("*").Updates(old).Error
if err != nil {
logger.Warningf("update builtin metric:%+v fail %v", metric, err)
}
}
BuiltinPayloadInFile.BuiltinMetrics[metric.Expression] = &metric
}
if writeMetricFileFlag {
bs, err = json.MarshalIndent(newMetrics, "", " ")
if err != nil {
logger.Warning("marshal builtin metrics fail ", newMetrics, err)
continue
}
_, err = file.WriteBytes(fp, bs)
if err != nil {
logger.Warning("write builtin metrics file fail ", f, err)
}
}
}
} else if err != nil {
logger.Warningf("read builtin component metrics dir fail %s %v", component.Ident, err)
@@ -375,6 +292,7 @@ type BuiltinBoard struct {
Name string `json:"name"`
Ident string `json:"ident"`
Tags string `json:"tags"`
Note string `json:"note"`
CreateAt int64 `json:"create_at"`
CreateBy string `json:"create_by"`
UpdateAt int64 `json:"update_at"`
@@ -387,3 +305,346 @@ type BuiltinBoard struct {
Hide int `json:"hide"` // 0: false, 1: true
UUID int64 `json:"uuid"`
}
func NewBuiltinPayloadInFileType() *BuiltinPayloadInFileType {
return &BuiltinPayloadInFileType{
Data: make(map[uint64]map[string]map[string][]*models.BuiltinPayload),
IndexData: make(map[int64]*models.BuiltinPayload),
BuiltinMetrics: make(map[string]*models.BuiltinMetric),
}
}
func (b *BuiltinPayloadInFileType) AddBuiltinPayload(bp *models.BuiltinPayload) {
if _, exists := b.Data[bp.ComponentID]; !exists {
b.Data[bp.ComponentID] = make(map[string]map[string][]*models.BuiltinPayload)
}
bpInType := b.Data[bp.ComponentID]
if _, exists := bpInType[bp.Type]; !exists {
bpInType[bp.Type] = make(map[string][]*models.BuiltinPayload)
}
bpInCate := bpInType[bp.Type]
if _, exists := bpInCate[bp.Cate]; !exists {
bpInCate[bp.Cate] = make([]*models.BuiltinPayload, 0)
}
bpInCate[bp.Cate] = append(bpInCate[bp.Cate], bp)
b.IndexData[bp.UUID] = bp
}
func (b *BuiltinPayloadInFileType) GetComponentIdentByCate(typ, cate string) string {
for _, source := range b.Data {
if source == nil {
continue
}
typeMap, exists := source[typ]
if !exists {
continue
}
payloads, exists := typeMap[cate]
if !exists {
continue
}
if len(payloads) > 0 {
return payloads[0].Component
}
}
return ""
}
func (b *BuiltinPayloadInFileType) GetBuiltinPayload(typ, cate, query string, componentId uint64) ([]*models.BuiltinPayload, error) {
var result []*models.BuiltinPayload
source := b.Data[componentId]
if source == nil {
return nil, nil
}
typeMap, exists := source[typ]
if !exists {
return nil, nil
}
if cate != "" {
payloads, exists := typeMap[cate]
if !exists {
return nil, nil
}
result = append(result, filterByQuery(payloads, query)...)
} else {
for _, payloads := range typeMap {
result = append(result, filterByQuery(payloads, query)...)
}
}
if len(result) > 0 {
sort.Slice(result, func(i, j int) bool {
return result[i].Name < result[j].Name
})
}
return result, nil
}
func (b *BuiltinPayloadInFileType) GetBuiltinPayloadCates(typ string, componentId uint64) ([]string, error) {
var result []string
source := b.Data[componentId]
if source == nil {
return result, nil
}
typeData := source[typ]
if typeData == nil {
return result, nil
}
for cate := range typeData {
result = append(result, cate)
}
sort.Strings(result)
return result, nil
}
func filterByQuery(payloads []*models.BuiltinPayload, query string) []*models.BuiltinPayload {
if query == "" {
return payloads
}
queryLower := strings.ToLower(query)
var filtered []*models.BuiltinPayload
for _, p := range payloads {
if strings.Contains(strings.ToLower(p.Name), queryLower) || strings.Contains(strings.ToLower(p.Tags), queryLower) {
filtered = append(filtered, p)
}
}
return filtered
}
func (b *BuiltinPayloadInFileType) BuiltinMetricGets(metricsInDB []*models.BuiltinMetric, lang, collector, typ, query, unit string, limit, offset int) ([]*models.BuiltinMetric, int, error) {
var filteredMetrics []*models.BuiltinMetric
expressionSet := set.NewStringSet()
builtinMetricsByDB := convertBuiltinMetricByDB(metricsInDB)
builtinMetricsMap := make(map[string]*models.BuiltinMetric)
for expression, metric := range builtinMetricsByDB {
builtinMetricsMap[expression] = metric
}
for expression, metric := range b.BuiltinMetrics {
builtinMetricsMap[expression] = metric
}
for _, metric := range builtinMetricsMap {
if !applyFilter(metric, collector, typ, query, unit) {
continue
}
// Skip if expression is already in db cache
// NOTE: 忽略重复的expression特别的在旧版本中用户可能已经创建了重复的metrics需要覆盖掉ByFile中相同的Metrics
// NOTE: Ignore duplicate expressions, especially in the old version, users may have created duplicate metrics,
if expressionSet.Exists(metric.Expression) {
continue
}
// Add db expression in set.
expressionSet.Add(metric.Expression)
// Apply language
trans, err := getTranslationWithLanguage(metric, lang)
if err != nil {
logger.Errorf("Error getting translation for metric %s: %v", metric.Name, err)
continue // Skip if translation not found
}
metric.Name = trans.Name
metric.Note = trans.Note
filteredMetrics = append(filteredMetrics, metric)
}
// Sort metrics
sort.Slice(filteredMetrics, func(i, j int) bool {
if filteredMetrics[i].Collector != filteredMetrics[j].Collector {
return filteredMetrics[i].Collector < filteredMetrics[j].Collector
}
if filteredMetrics[i].Typ != filteredMetrics[j].Typ {
return filteredMetrics[i].Typ < filteredMetrics[j].Typ
}
return filteredMetrics[i].Expression < filteredMetrics[j].Expression
})
totalCount := len(filteredMetrics)
// Validate parameters
if offset < 0 {
offset = 0
}
if limit < 0 {
limit = 0
}
// Handle edge cases
if offset >= totalCount || limit == 0 {
return []*models.BuiltinMetric{}, totalCount, nil
}
// Apply pagination
end := offset + limit
if end > totalCount {
end = totalCount
}
return filteredMetrics[offset:end], totalCount, nil
}
func (b *BuiltinPayloadInFileType) BuiltinMetricTypes(lang, collector, query string) []string {
typeSet := set.NewStringSet()
for _, metric := range b.BuiltinMetrics {
if !applyFilter(metric, collector, "", query, "") {
continue
}
typeSet.Add(metric.Typ)
}
return typeSet.ToSlice()
}
func (b *BuiltinPayloadInFileType) BuiltinMetricCollectors(lang, typ, query string) []string {
collectorSet := set.NewStringSet()
for _, metric := range b.BuiltinMetrics {
if !applyFilter(metric, "", typ, query, "") {
continue
}
collectorSet.Add(metric.Collector)
}
return collectorSet.ToSlice()
}
func applyFilter(metric *models.BuiltinMetric, collector, typ, query, unit string) bool {
if collector != "" && collector != metric.Collector {
return false
}
if typ != "" && typ != metric.Typ {
return false
}
if unit != "" && !containsUnit(unit, metric.Unit) {
return false
}
if query != "" && !applyQueryFilter(metric, query) {
return false
}
return true
}
func containsUnit(unit, metricUnit string) bool {
us := strings.Split(unit, ",")
for _, u := range us {
if u == metricUnit {
return true
}
}
return false
}
func applyQueryFilter(metric *models.BuiltinMetric, query string) bool {
qs := strings.Split(query, " ")
for _, q := range qs {
if strings.HasPrefix(q, "-") {
q = strings.TrimPrefix(q, "-")
if strings.Contains(metric.Name, q) || strings.Contains(metric.Note, q) || strings.Contains(metric.Expression, q) {
return false
}
} else {
if !strings.Contains(metric.Name, q) && !strings.Contains(metric.Note, q) && !strings.Contains(metric.Expression, q) {
return false
}
}
}
return true
}
func getTranslationWithLanguage(bm *models.BuiltinMetric, lang string) (*models.Translation, error) {
var defaultTranslation *models.Translation
for _, t := range bm.Translation {
if t.Lang == lang {
return &t, nil
}
if t.Lang == "en_US" {
defaultTranslation = &t
}
}
if defaultTranslation != nil {
return defaultTranslation, nil
}
return nil, errors.Errorf("translation not found for metric %s", bm.Name)
}
func convertBuiltinMetricByDB(metricsInDB []*models.BuiltinMetric) map[string]*models.BuiltinMetric {
builtinMetricsByDB := make(map[string]*models.BuiltinMetric)
builtinMetricsByDBList := make(map[string][]*models.BuiltinMetric)
for _, metric := range metricsInDB {
builtinMetrics, ok := builtinMetricsByDBList[metric.Expression]
if !ok {
builtinMetrics = []*models.BuiltinMetric{}
}
builtinMetrics = append(builtinMetrics, metric)
builtinMetricsByDBList[metric.Expression] = builtinMetrics
}
for expression, builtinMetrics := range builtinMetricsByDBList {
if len(builtinMetrics) == 0 {
continue
}
// NOTE: 为兼容旧版本用户已经创建的 metrics同时将修改 metrics 收敛到同一个记录上,
// 我们选择使用 expression 相同但是 id 最小的 metric 记录作为主要的 Metric。
sort.Slice(builtinMetrics, func(i, j int) bool {
return builtinMetrics[i].ID < builtinMetrics[j].ID
})
currentBuiltinMetric := builtinMetrics[0]
// User has no customized translation, so we can merge it
if len(currentBuiltinMetric.Translation) == 0 {
translationMap := make(map[string]models.Translation)
for _, bm := range builtinMetrics {
for _, t := range getDefaultTranslation(bm) {
translationMap[t.Lang] = t
}
}
currentBuiltinMetric.Translation = make([]models.Translation, 0, len(translationMap))
for _, t := range translationMap {
currentBuiltinMetric.Translation = append(currentBuiltinMetric.Translation, t)
}
}
builtinMetricsByDB[expression] = currentBuiltinMetric
}
return builtinMetricsByDB
}
func getDefaultTranslation(bm *models.BuiltinMetric) []models.Translation {
if len(bm.Translation) != 0 {
return bm.Translation
}
return []models.Translation{{
Lang: bm.Lang,
Name: bm.Name,
Note: bm.Note,
}}
}

View File

@@ -118,7 +118,7 @@ func (s *Set) updateTargets(m map[string]models.HostMeta) error {
}
start := time.Now()
err := storage.MSet(context.Background(), s.redis, newMap)
err := storage.MSet(context.Background(), s.redis, newMap, 7*24*time.Hour)
if err != nil {
cstats.RedisOperationLatency.WithLabelValues("mset_target_meta", "fail").Observe(time.Since(start).Seconds())
return err
@@ -127,7 +127,7 @@ func (s *Set) updateTargets(m map[string]models.HostMeta) error {
}
if len(extendMap) > 0 {
err = storage.MSet(context.Background(), s.redis, extendMap)
err = storage.MSet(context.Background(), s.redis, extendMap, 7*24*time.Hour)
if err != nil {
cstats.RedisOperationLatency.WithLabelValues("mset_target_extend", "fail").Observe(time.Since(start).Seconds())
return err

View File

@@ -24,11 +24,11 @@ import (
"github.com/ccfos/nightingale/v6/prom"
"github.com/ccfos/nightingale/v6/pushgw/idents"
"github.com/ccfos/nightingale/v6/storage"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"gorm.io/gorm"
"github.com/gin-gonic/gin"
"github.com/rakyll/statik/fs"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/runner"
)
@@ -51,6 +51,7 @@ type Router struct {
UserGroupCache *memsto.UserGroupCacheType
UserTokenCache *memsto.UserTokenCacheType
Ctx *ctx.Context
LogDir string
HeartbeatHook HeartbeatHookFunc
TargetDeleteHook models.TargetDeleteHookFunc
@@ -61,7 +62,7 @@ func New(httpConfig httpx.Config, center cconf.Center, alert aconf.Alert, ibex c
operations cconf.Operation, ds *memsto.DatasourceCacheType, ncc *memsto.NotifyConfigCacheType,
pc *prom.PromClientMap, redis storage.Redis,
sso *sso.SsoClient, ctx *ctx.Context, metaSet *metas.Set, idents *idents.Set,
tc *memsto.TargetCacheType, uc *memsto.UserCacheType, ugc *memsto.UserGroupCacheType, utc *memsto.UserTokenCacheType) *Router {
tc *memsto.TargetCacheType, uc *memsto.UserCacheType, ugc *memsto.UserGroupCacheType, utc *memsto.UserTokenCacheType, logDir string) *Router {
return &Router{
HTTP: httpConfig,
Center: center,
@@ -80,6 +81,7 @@ func New(httpConfig httpx.Config, center cconf.Center, alert aconf.Alert, ibex c
UserGroupCache: ugc,
UserTokenCache: utc,
Ctx: ctx,
LogDir: logDir,
HeartbeatHook: func(ident string) map[string]interface{} { return nil },
TargetDeleteHook: func(tx *gorm.DB, idents []string) error { return nil },
AlertRuleModifyHook: func(ar *models.AlertRule) {},
@@ -177,6 +179,7 @@ func (rt *Router) Config(r *gin.Engine) {
pages := r.Group(pagesPrefix)
{
pages.DELETE("/datasource/series", rt.auth(), rt.admin(), rt.deleteDatasourceSeries)
if rt.Center.AnonymousAccess.PromQuerier {
pages.Any("/proxy/:id/*url", rt.dsProxy)
pages.POST("/query-range-batch", rt.promBatchQueryRange)
@@ -210,8 +213,8 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/datasource/brief", rt.auth(), rt.user(), rt.datasourceBriefs)
pages.POST("/datasource/query", rt.auth(), rt.user(), rt.datasourceQuery)
pages.POST("/ds-query", rt.auth(), rt.QueryData)
pages.POST("/logs-query", rt.auth(), rt.QueryLogV2)
pages.POST("/ds-query", rt.auth(), rt.user(), rt.QueryData)
pages.POST("/logs-query", rt.auth(), rt.user(), rt.QueryLogV2)
pages.POST("/tdengine-databases", rt.auth(), rt.tdengineDatabases)
pages.POST("/tdengine-tables", rt.auth(), rt.tdengineTables)
@@ -231,6 +234,11 @@ func (rt *Router) Config(r *gin.Engine) {
pages.POST("/log-query", rt.QueryLog)
}
// OpenSearch 专用接口
pages.POST("/os-indices", rt.QueryOSIndices)
pages.POST("/os-variable", rt.QueryOSVariable)
pages.POST("/os-fields", rt.QueryOSFields)
pages.GET("/sql-template", rt.QuerySqlTemplate)
pages.POST("/auth/login", rt.jwtMock(), rt.loginPost)
pages.POST("/auth/logout", rt.jwtMock(), rt.auth(), rt.user(), rt.logoutPost)
@@ -244,9 +252,13 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/auth/redirect", rt.loginRedirect)
pages.GET("/auth/redirect/cas", rt.loginRedirectCas)
pages.GET("/auth/redirect/oauth", rt.loginRedirectOAuth)
pages.GET("/auth/redirect/dingtalk", rt.loginRedirectDingTalk)
pages.GET("/auth/redirect/feishu", rt.loginRedirectFeiShu)
pages.GET("/auth/callback", rt.loginCallback)
pages.GET("/auth/callback/cas", rt.loginCallbackCas)
pages.GET("/auth/callback/oauth", rt.loginCallbackOAuth)
pages.GET("/auth/callback/dingtalk", rt.loginCallbackDingTalk)
pages.GET("/auth/callback/feishu", rt.loginCallbackFeiShu)
pages.GET("/auth/perms", rt.allPerms)
pages.GET("/metrics/desc", rt.metricsDescGetFile)
@@ -254,6 +266,7 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/notify-channels", rt.notifyChannelsGets)
pages.GET("/contact-keys", rt.contactKeysGets)
pages.GET("/install-date", rt.installDateGet)
pages.GET("/self/perms", rt.auth(), rt.user(), rt.permsGets)
pages.GET("/self/profile", rt.auth(), rt.user(), rt.selfProfileGet)
@@ -309,6 +322,7 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/busi-groups/tags", rt.auth(), rt.user(), rt.busiGroupsGetTags)
pages.GET("/targets", rt.auth(), rt.user(), rt.targetGets)
pages.POST("/target-update", rt.auth(), rt.targetUpdate)
pages.GET("/target/extra-meta", rt.auth(), rt.user(), rt.targetExtendInfoByIdent)
pages.POST("/target/list", rt.auth(), rt.user(), rt.targetGetsByHostFilter)
pages.DELETE("/targets", rt.auth(), rt.user(), rt.perm("/targets/del"), rt.targetDel)
@@ -356,6 +370,7 @@ func (rt *Router) Config(r *gin.Engine) {
// pages.GET("/alert-rules/builtin/alerts-cates", rt.auth(), rt.user(), rt.builtinAlertCateGets)
// pages.GET("/alert-rules/builtin/list", rt.auth(), rt.user(), rt.builtinAlertRules)
pages.GET("/alert-rules/callbacks", rt.auth(), rt.user(), rt.alertRuleCallbacks)
pages.GET("/timezones", rt.auth(), rt.user(), rt.timezonesGet)
pages.GET("/busi-groups/alert-rules", rt.auth(), rt.user(), rt.perm("/alert-rules"), rt.alertRuleGetsByGids)
pages.GET("/busi-group/:id/alert-rules", rt.auth(), rt.user(), rt.perm("/alert-rules"), rt.alertRuleGets)
@@ -372,13 +387,15 @@ func (rt *Router) Config(r *gin.Engine) {
pages.POST("/relabel-test", rt.auth(), rt.user(), rt.relabelTest)
pages.POST("/busi-group/:id/alert-rules/clone", rt.auth(), rt.user(), rt.perm("/alert-rules/add"), rt.bgrw(), rt.cloneToMachine)
pages.POST("/busi-groups/alert-rules/clones", rt.auth(), rt.user(), rt.perm("/alert-rules/add"), rt.batchAlertRuleClone)
pages.POST("/busi-group/alert-rules/notify-tryrun", rt.auth(), rt.user(), rt.perm("/alert-rules/add"), rt.alertRuleNotifyTryRun)
pages.POST("/busi-group/alert-rules/enable-tryrun", rt.auth(), rt.user(), rt.perm("/alert-rules/add"), rt.alertRuleEnableTryRun)
pages.GET("/busi-groups/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules"), rt.recordingRuleGetsByGids)
pages.GET("/busi-group/:id/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules"), rt.recordingRuleGets)
pages.POST("/busi-group/:id/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules/add"), rt.bgrw(), rt.recordingRuleAddByFE)
pages.DELETE("/busi-group/:id/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules/del"), rt.bgrw(), rt.recordingRuleDel)
pages.PUT("/busi-group/:id/recording-rule/:rrid", rt.auth(), rt.user(), rt.perm("/recording-rules/put"), rt.bgrw(), rt.recordingRulePutByFE)
pages.GET("/recording-rule/:rrid", rt.auth(), rt.user(), rt.perm("/recording-rules"), rt.recordingRuleGet)
pages.PUT("/recording-rule/:rrid", rt.auth(), rt.user(), rt.perm("/recording-rules"), rt.recordingRulePutByFE)
pages.PUT("/busi-group/:id/recording-rules/fields", rt.auth(), rt.user(), rt.perm("/recording-rules/put"), rt.recordingRulePutFields)
pages.GET("/busi-groups/alert-mutes", rt.auth(), rt.user(), rt.perm("/alert-mutes"), rt.alertMuteGetsByGids)
@@ -397,10 +414,14 @@ func (rt *Router) Config(r *gin.Engine) {
pages.POST("/busi-group/:id/alert-subscribes", rt.auth(), rt.user(), rt.perm("/alert-subscribes/add"), rt.bgrw(), rt.alertSubscribeAdd)
pages.PUT("/busi-group/:id/alert-subscribes", rt.auth(), rt.user(), rt.perm("/alert-subscribes/put"), rt.bgrw(), rt.alertSubscribePut)
pages.DELETE("/busi-group/:id/alert-subscribes", rt.auth(), rt.user(), rt.perm("/alert-subscribes/del"), rt.bgrw(), rt.alertSubscribeDel)
pages.POST("/alert-subscribe/alert-subscribes-tryrun", rt.auth(), rt.user(), rt.perm("/alert-subscribes/add"), rt.alertSubscribeTryRun)
pages.GET("/alert-cur-event/:eid", rt.alertCurEventGet)
pages.GET("/alert-his-event/:eid", rt.alertHisEventGet)
pages.GET("/event-notify-records/:eid", rt.notificationRecordList)
pages.GET("/event-detail/:hash", rt.eventDetailPage)
pages.GET("/alert-eval-detail/:id", rt.alertEvalDetailPage)
pages.GET("/trace-logs/:traceid", rt.traceLogsPage)
// card logic
pages.GET("/alert-cur-events/list", rt.auth(), rt.user(), rt.alertCurEventsList)
@@ -439,7 +460,7 @@ func (rt *Router) Config(r *gin.Engine) {
pages.POST("/datasource/status/update", rt.auth(), rt.admin(), rt.datasourceUpdataStatus)
pages.DELETE("/datasource/", rt.auth(), rt.admin(), rt.datasourceDel)
pages.GET("/roles", rt.auth(), rt.user(), rt.perm("/roles"), rt.roleGets)
pages.GET("/roles", rt.auth(), rt.user(), rt.roleGets)
pages.POST("/roles", rt.auth(), rt.user(), rt.perm("/roles/add"), rt.roleAdd)
pages.PUT("/roles", rt.auth(), rt.user(), rt.perm("/roles/put"), rt.rolePut)
pages.DELETE("/role/:id", rt.auth(), rt.user(), rt.perm("/roles/del"), rt.roleDel)
@@ -513,10 +534,9 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/builtin-payloads", rt.auth(), rt.user(), rt.builtinPayloadsGets)
pages.GET("/builtin-payloads/cates", rt.auth(), rt.user(), rt.builtinPayloadcatesGet)
pages.POST("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/components/add"), rt.builtinPayloadsAdd)
pages.GET("/builtin-payload/:id", rt.auth(), rt.user(), rt.perm("/components"), rt.builtinPayloadGet)
pages.PUT("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/components/put"), rt.builtinPayloadsPut)
pages.DELETE("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/components/del"), rt.builtinPayloadsDel)
pages.GET("/builtin-payload", rt.auth(), rt.user(), rt.builtinPayloadsGetByUUIDOrID)
pages.GET("/builtin-payload", rt.auth(), rt.user(), rt.builtinPayloadsGetByUUID)
pages.POST("/message-templates", rt.auth(), rt.user(), rt.perm("/notification-templates/add"), rt.messageTemplatesAdd)
pages.DELETE("/message-templates", rt.auth(), rt.user(), rt.perm("/notification-templates/del"), rt.messageTemplatesDel)
@@ -534,6 +554,9 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/notify-rule/custom-params", rt.auth(), rt.user(), rt.perm("/notification-rules"), rt.notifyRuleCustomParamsGet)
pages.POST("/notify-rule/event-pipelines-tryrun", rt.auth(), rt.user(), rt.perm("/notification-rules/add"), rt.tryRunEventProcessorByNotifyRule)
pages.GET("/event-tagkeys", rt.auth(), rt.user(), rt.eventTagKeys)
pages.GET("/event-tagvalues", rt.auth(), rt.user(), rt.eventTagValues)
// 事件Pipeline相关路由
pages.GET("/event-pipelines", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.eventPipelinesList)
pages.POST("/event-pipeline", rt.auth(), rt.user(), rt.perm("/event-pipelines/add"), rt.addEventPipeline)
@@ -543,6 +566,19 @@ func (rt *Router) Config(r *gin.Engine) {
pages.POST("/event-pipeline-tryrun", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.tryRunEventPipeline)
pages.POST("/event-processor-tryrun", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.tryRunEventProcessor)
// API 触发工作流
pages.POST("/event-pipeline/:id/trigger", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.triggerEventPipelineByAPI)
// SSE 流式执行工作流
pages.POST("/event-pipeline/:id/stream", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.streamEventPipeline)
// 事件Pipeline执行记录路由
pages.GET("/event-pipeline-executions", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.listAllEventPipelineExecutions)
pages.GET("/event-pipeline/:id/executions", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.listEventPipelineExecutions)
pages.GET("/event-pipeline/:id/execution/:exec_id", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.getEventPipelineExecution)
pages.GET("/event-pipeline-execution/:exec_id", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.getEventPipelineExecution)
pages.GET("/event-pipeline/:id/execution-stats", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.getEventPipelineExecutionStats)
pages.POST("/event-pipeline-executions/clean", rt.auth(), rt.user(), rt.admin(), rt.cleanEventPipelineExecutions)
pages.POST("/notify-channel-configs", rt.auth(), rt.user(), rt.perm("/notification-channels/add"), rt.notifyChannelsAdd)
pages.DELETE("/notify-channel-configs", rt.auth(), rt.user(), rt.perm("/notification-channels/del"), rt.notifyChannelsDel)
pages.PUT("/notify-channel-config/:id", rt.auth(), rt.user(), rt.perm("/notification-channels/put"), rt.notifyChannelPut)
@@ -550,8 +586,18 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/notify-channel-configs", rt.auth(), rt.user(), rt.perm("/notification-channels"), rt.notifyChannelsGet)
pages.GET("/simplified-notify-channel-configs", rt.notifyChannelsGetForNormalUser)
pages.GET("/flashduty-channel-list/:id", rt.auth(), rt.user(), rt.flashDutyNotifyChannelsGet)
pages.GET("/pagerduty-integration-key/:id/:service_id/:integration_id", rt.auth(), rt.user(), rt.pagerDutyIntegrationKeyGet)
pages.GET("/pagerduty-service-list/:id", rt.auth(), rt.user(), rt.pagerDutyNotifyServicesGet)
pages.GET("/notify-channel-config", rt.auth(), rt.user(), rt.notifyChannelGetBy)
pages.GET("/notify-channel-config/idents", rt.notifyChannelIdentsGet)
// saved view 查询条件保存相关路由
pages.GET("/saved-views", rt.auth(), rt.user(), rt.savedViewGets)
pages.POST("/saved-views", rt.auth(), rt.user(), rt.savedViewAdd)
pages.PUT("/saved-view/:id", rt.auth(), rt.user(), rt.savedViewPut)
pages.DELETE("/saved-view/:id", rt.auth(), rt.user(), rt.savedViewDel)
pages.POST("/saved-view/:id/favorite", rt.auth(), rt.user(), rt.savedViewFavoriteAdd)
pages.DELETE("/saved-view/:id/favorite", rt.auth(), rt.user(), rt.savedViewFavoriteDel)
}
r.GET("/api/n9e/versions", func(c *gin.Context) {
@@ -608,6 +654,7 @@ func (rt *Router) Config(r *gin.Engine) {
service.GET("/busi-groups", rt.busiGroupGetsByService)
service.GET("/datasources", rt.datasourceGetsByService)
service.GET("/datasource-rsa-config", rt.datasourceRsaConfigGet)
service.GET("/datasource-ids", rt.getDatasourceIds)
service.POST("/server-heartbeat", rt.serverHeartbeat)
service.GET("/servers-active", rt.serversActive)
@@ -615,6 +662,7 @@ func (rt *Router) Config(r *gin.Engine) {
service.GET("/recording-rules", rt.recordingRuleGetsByService)
service.GET("/alert-mutes", rt.alertMuteGets)
service.GET("/active-alert-mutes", rt.activeAlertMuteGets)
service.POST("/alert-mutes", rt.alertMuteAddByService)
service.DELETE("/alert-mutes", rt.alertMuteDel)
@@ -663,6 +711,17 @@ func (rt *Router) Config(r *gin.Engine) {
service.GET("/message-templates", rt.messageTemplateGets)
service.GET("/event-pipelines", rt.eventPipelinesListByService)
service.POST("/event-pipeline/:id/trigger", rt.triggerEventPipelineByService)
service.POST("/event-pipeline/:id/stream", rt.streamEventPipelineByService)
service.POST("/event-pipeline-execution", rt.eventPipelineExecutionAdd)
// 手机号加密存储配置接口
service.POST("/users/phone/encrypt", rt.usersPhoneEncrypt)
service.POST("/users/phone/decrypt", rt.usersPhoneDecrypt)
service.POST("/users/phone/refresh-encryption-config", rt.usersPhoneDecryptRefresh)
service.GET("/builtin-components", rt.builtinComponentsGets)
service.GET("/builtin-payloads", rt.builtinPayloadsGets)
}
}

View File

@@ -4,9 +4,9 @@ import (
"net/http"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
// no param

View File

@@ -10,9 +10,10 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
func getUserGroupIds(ctx *gin.Context, rt *Router, myGroups bool) ([]int64, error) {
@@ -263,11 +264,11 @@ func GetCurEventDetail(ctx *ctx.Context, eid int64) (*models.AlertCurEvent, erro
event.NotifyVersion, err = GetEventNotifyVersion(ctx, event.RuleId, event.NotifyRuleIds)
ginx.Dangerous(err)
event.NotifyRules, err = GetEventNorifyRuleNames(ctx, event.NotifyRuleIds)
event.NotifyRules, err = GetEventNotifyRuleNames(ctx, event.NotifyRuleIds)
return event, err
}
func GetEventNorifyRuleNames(ctx *ctx.Context, notifyRuleIds []int64) ([]*models.EventNotifyRule, error) {
func GetEventNotifyRuleNames(ctx *ctx.Context, notifyRuleIds []int64) ([]*models.EventNotifyRule, error) {
notifyRuleNames := make([]*models.EventNotifyRule, 0)
notifyRules, err := models.NotifyRulesGet(ctx, "id in ?", notifyRuleIds)
if err != nil {
@@ -305,3 +306,123 @@ func (rt *Router) alertCurEventDelByHash(c *gin.Context) {
hash := ginx.QueryStr(c, "hash")
ginx.NewRender(c).Message(models.AlertCurEventDelByHash(rt.Ctx, hash))
}
func (rt *Router) eventTagKeys(c *gin.Context) {
// 获取最近1天的活跃告警事件
now := time.Now().Unix()
stime := now - 24*3600
etime := now
// 获取用户可见的业务组ID列表
bgids, err := GetBusinessGroupIds(c, rt.Ctx, rt.Center.EventHistoryGroupView, false)
if err != nil {
logger.Warningf("failed to get business group ids: %v", err)
ginx.NewRender(c).Data([]string{"ident", "app", "service", "instance"}, nil)
return
}
// 查询活跃告警事件,限制数量以提高性能
events, err := models.AlertCurEventsGet(rt.Ctx, []string{}, bgids, stime, etime, []int64{}, []int64{}, []string{}, 0, "", 200, 0, []int64{})
if err != nil {
logger.Warningf("failed to get current alert events: %v", err)
ginx.NewRender(c).Data([]string{"ident", "app", "service", "instance"}, nil)
return
}
// 如果没有查到事件,返回默认标签
if len(events) == 0 {
ginx.NewRender(c).Data([]string{"ident", "app", "service", "instance"}, nil)
return
}
// 收集所有标签键并去重
tagKeys := make(map[string]struct{})
for _, event := range events {
for key := range event.TagsMap {
tagKeys[key] = struct{}{}
}
}
// 转换为字符串切片
var result []string
for key := range tagKeys {
result = append(result, key)
}
// 如果没有收集到任何标签键,返回默认值
if len(result) == 0 {
result = []string{"ident", "app", "service", "instance"}
}
ginx.NewRender(c).Data(result, nil)
}
func (rt *Router) eventTagValues(c *gin.Context) {
// 获取标签key
tagKey := ginx.QueryStr(c, "key")
// 获取最近1天的活跃告警事件
now := time.Now().Unix()
stime := now - 24*3600
etime := now
// 获取用户可见的业务组ID列表
bgids, err := GetBusinessGroupIds(c, rt.Ctx, rt.Center.EventHistoryGroupView, false)
if err != nil {
logger.Warningf("failed to get business group ids: %v", err)
ginx.NewRender(c).Data([]string{}, nil)
return
}
// 查询活跃告警事件,获取更多数据以保证统计准确性
events, err := models.AlertCurEventsGet(rt.Ctx, []string{}, bgids, stime, etime, []int64{}, []int64{}, []string{}, 0, "", 1000, 0, []int64{})
if err != nil {
logger.Warningf("failed to get current alert events: %v", err)
ginx.NewRender(c).Data([]string{}, nil)
return
}
// 如果没有查到事件,返回空数组
if len(events) == 0 {
ginx.NewRender(c).Data([]string{}, nil)
return
}
// 统计标签值出现次数
valueCount := make(map[string]int)
for _, event := range events {
// TagsMap已经在AlertCurEventsGet中处理直接使用
if value, exists := event.TagsMap[tagKey]; exists && value != "" {
valueCount[value]++
}
}
// 转换为切片并按出现次数降序排序
type tagValue struct {
value string
count int
}
tagValues := make([]tagValue, 0, len(valueCount))
for value, count := range valueCount {
tagValues = append(tagValues, tagValue{value, count})
}
// 按出现次数降序排序
sort.Slice(tagValues, func(i, j int) bool {
return tagValues[i].count > tagValues[j].count
})
// 只取Top20并转换为字符串数组
limit := 20
if len(tagValues) < limit {
limit = len(tagValues)
}
result := make([]string, 0, limit)
for i := 0; i < limit; i++ {
result = append(result, tagValues[i].value)
}
ginx.NewRender(c).Data(result, nil)
}

View File

@@ -0,0 +1,168 @@
package router
import (
"encoding/json"
"fmt"
"io"
"net/http"
"sort"
"strconv"
"strings"
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/loggrep"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
)
// alertEvalDetailPage renders an HTML log viewer page for alert rule evaluation logs.
func (rt *Router) alertEvalDetailPage(c *gin.Context) {
id := ginx.UrlParamStr(c, "id")
if !loggrep.IsValidRuleID(id) {
c.String(http.StatusBadRequest, "invalid rule id format")
return
}
logs, instance, err := rt.getAlertEvalLogs(id)
if err != nil {
c.String(http.StatusInternalServerError, "Error: %v", err)
return
}
c.Header("Content-Type", "text/html; charset=utf-8")
err = loggrep.RenderAlertEvalHTML(c.Writer, loggrep.AlertEvalPageData{
RuleID: id,
Instance: instance,
Logs: logs,
Total: len(logs),
})
if err != nil {
c.String(http.StatusInternalServerError, "render error: %v", err)
}
}
// alertEvalDetailJSON returns JSON for alert rule evaluation logs.
func (rt *Router) alertEvalDetailJSON(c *gin.Context) {
id := ginx.UrlParamStr(c, "id")
if !loggrep.IsValidRuleID(id) {
ginx.Bomb(200, "invalid rule id format")
}
logs, instance, err := rt.getAlertEvalLogs(id)
ginx.Dangerous(err)
ginx.NewRender(c).Data(loggrep.EventDetailResp{
Logs: logs,
Instance: instance,
}, nil)
}
// getAlertEvalLogs resolves the target instance(s) and retrieves alert eval logs.
func (rt *Router) getAlertEvalLogs(id string) ([]string, string, error) {
ruleId, _ := strconv.ParseInt(id, 10, 64)
rule, err := models.AlertRuleGetById(rt.Ctx, ruleId)
if err != nil {
return nil, "", err
}
if rule == nil {
return nil, "", fmt.Errorf("no such alert rule")
}
instance := fmt.Sprintf("%s:%d", rt.Alert.Heartbeat.IP, rt.HTTP.Port)
keyword := fmt.Sprintf("alert_eval_%s", id)
// Get datasource IDs for this rule
dsIds := rt.DatasourceCache.GetIDsByDsCateAndQueries(rule.Cate, rule.DatasourceQueries)
if len(dsIds) == 0 {
// No datasources found (e.g. host rule), try local grep
logs, err := loggrep.GrepLogDir(rt.LogDir, keyword)
return logs, instance, err
}
// Find unique target nodes via hash ring, with DB fallback
nodeSet := make(map[string]struct{})
for _, dsId := range dsIds {
node, err := rt.getNodeForDatasource(dsId, id)
if err != nil {
continue
}
nodeSet[node] = struct{}{}
}
if len(nodeSet) == 0 {
// Hash ring not ready, grep locally
logs, err := loggrep.GrepLogDir(rt.LogDir, keyword)
return logs, instance, err
}
// Collect logs from all target nodes
var allLogs []string
var instances []string
for node := range nodeSet {
if node == instance {
logs, err := loggrep.GrepLogDir(rt.LogDir, keyword)
if err == nil {
allLogs = append(allLogs, logs...)
instances = append(instances, node)
}
} else {
logs, nodeAddr, err := rt.forwardAlertEvalDetail(node, id)
if err == nil {
allLogs = append(allLogs, logs...)
instances = append(instances, nodeAddr)
}
}
}
// Sort logs by timestamp descending
sort.Slice(allLogs, func(i, j int) bool {
return allLogs[i] > allLogs[j]
})
if len(allLogs) > loggrep.MaxLogLines {
allLogs = allLogs[:loggrep.MaxLogLines]
}
return allLogs, strings.Join(instances, ", "), nil
}
func (rt *Router) forwardAlertEvalDetail(node, id string) ([]string, string, error) {
url := fmt.Sprintf("http://%s/v1/n9e/alert-eval-detail/%s", node, id)
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, node, err
}
for user, pass := range rt.HTTP.APIForService.BasicAuth {
req.SetBasicAuth(user, pass)
break
}
client := &http.Client{Timeout: 15 * time.Second}
resp, err := client.Do(req)
if err != nil {
return nil, node, fmt.Errorf("forward to %s failed: %v", node, err)
}
defer resp.Body.Close()
body, err := io.ReadAll(io.LimitReader(resp.Body, 10*1024*1024)) // 10MB limit
if err != nil {
return nil, node, err
}
var result struct {
Dat loggrep.EventDetailResp `json:"dat"`
Err string `json:"err"`
}
if err := json.Unmarshal(body, &result); err != nil {
return nil, node, err
}
if result.Err != "" {
return nil, node, fmt.Errorf("%s", result.Err)
}
return result.Dat.Logs, result.Dat.Instance, nil
}

View File

@@ -8,9 +8,9 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
"golang.org/x/exp/slices"
)
@@ -62,11 +62,11 @@ func (rt *Router) alertHisEventsList(c *gin.Context) {
ginx.Dangerous(err)
total, err := models.AlertHisEventTotal(rt.Ctx, prods, bgids, stime, etime, severity,
recovered, dsIds, cates, ruleId, query)
recovered, dsIds, cates, ruleId, query, []int64{})
ginx.Dangerous(err)
list, err := models.AlertHisEventGets(rt.Ctx, prods, bgids, stime, etime, severity, recovered,
dsIds, cates, ruleId, query, limit, ginx.Offset(c, limit))
dsIds, cates, ruleId, query, limit, ginx.Offset(c, limit), []int64{})
ginx.Dangerous(err)
cache := make(map[int64]*models.UserGroup)
@@ -115,7 +115,18 @@ func (rt *Router) alertHisEventsDelete(c *gin.Context) {
time.Sleep(100 * time.Millisecond) // 防止锁表
}
}()
ginx.NewRender(c).Message("Alert history events deletion started")
ginx.NewRender(c).Data("Alert history events deletion started", nil)
}
var TransferEventToCur func(*ctx.Context, *models.AlertHisEvent) *models.AlertCurEvent
func init() {
TransferEventToCur = transferEventToCur
}
func transferEventToCur(ctx *ctx.Context, event *models.AlertHisEvent) *models.AlertCurEvent {
cur := event.ToCur()
return cur
}
func (rt *Router) alertHisEventGet(c *gin.Context) {
@@ -141,8 +152,8 @@ func (rt *Router) alertHisEventGet(c *gin.Context) {
event.NotifyVersion, err = GetEventNotifyVersion(rt.Ctx, event.RuleId, event.NotifyRuleIds)
ginx.Dangerous(err)
event.NotifyRules, err = GetEventNorifyRuleNames(rt.Ctx, event.NotifyRuleIds)
ginx.NewRender(c).Data(event, err)
event.NotifyRules, err = GetEventNotifyRuleNames(rt.Ctx, event.NotifyRuleIds)
ginx.NewRender(c).Data(TransferEventToCur(rt.Ctx, event), err)
}
func GetBusinessGroupIds(c *gin.Context, ctx *ctx.Context, onlySelfGroupView bool, myGroups bool) ([]int64, error) {

View File

@@ -11,15 +11,17 @@ import (
"gopkg.in/yaml.v2"
"github.com/ccfos/nightingale/v6/alert/mute"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pushgw/pconf"
"github.com/ccfos/nightingale/v6/pushgw/writer"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/jinzhu/copier"
"github.com/pkg/errors"
"github.com/prometheus/prometheus/prompb"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
)
@@ -33,13 +35,13 @@ func (rt *Router) alertRuleGets(c *gin.Context) {
cache := make(map[int64]*models.UserGroup)
for i := 0; i < len(ars); i++ {
ars[i].FillNotifyGroups(rt.Ctx, cache)
ars[i].FillSeverities()
}
models.FillUpdateByNicknames(rt.Ctx, ars)
}
ginx.NewRender(c).Data(ars, err)
}
func getAlertCueEventTimeRange(c *gin.Context) (stime, etime int64) {
func GetAlertCueEventTimeRange(c *gin.Context) (stime, etime int64) {
stime = ginx.QueryInt64(c, "stime", 0)
etime = ginx.QueryInt64(c, "etime", 0)
if etime == 0 {
@@ -75,20 +77,17 @@ func (rt *Router) alertRuleGetsByGids(c *gin.Context) {
if err == nil {
cache := make(map[int64]*models.UserGroup)
rids := make([]int64, 0, len(ars))
names := make([]string, 0, len(ars))
for i := 0; i < len(ars); i++ {
ars[i].FillNotifyGroups(rt.Ctx, cache)
ars[i].FillSeverities()
if len(ars[i].DatasourceQueries) != 0 {
ars[i].DatasourceIdsJson = rt.DatasourceCache.GetIDsByDsCateAndQueries(ars[i].Cate, ars[i].DatasourceQueries)
}
rids = append(rids, ars[i].Id)
names = append(names, ars[i].UpdateBy)
}
stime, etime := getAlertCueEventTimeRange(c)
stime, etime := GetAlertCueEventTimeRange(c)
cnt := models.AlertCurEventCountByRuleId(rt.Ctx, rids, stime, etime)
if cnt != nil {
for i := 0; i < len(ars); i++ {
@@ -96,14 +95,7 @@ func (rt *Router) alertRuleGetsByGids(c *gin.Context) {
}
}
users := models.UserMapGet(rt.Ctx, "username in (?)", names)
if users != nil {
for i := 0; i < len(ars); i++ {
if user, exist := users[ars[i].UpdateBy]; exist {
ars[i].UpdateByNickname = user.Nickname
}
}
}
models.FillUpdateByNicknames(rt.Ctx, ars)
}
ginx.NewRender(c).Data(ars, err)
}
@@ -135,6 +127,7 @@ func (rt *Router) alertRulesGetByService(c *gin.Context) {
ars[i].DatasourceIdsJson = rt.DatasourceCache.GetIDsByDsCateAndQueries(ars[i].Cate, ars[i].DatasourceQueries)
}
}
models.FillUpdateByNicknames(rt.Ctx, ars)
}
ginx.NewRender(c).Data(ars, err)
}
@@ -157,6 +150,120 @@ func (rt *Router) alertRuleAddByFE(c *gin.Context) {
ginx.NewRender(c).Data(reterr, nil)
}
type AlertRuleTryRunForm struct {
EventId int64 `json:"event_id" binding:"required"`
AlertRuleConfig models.AlertRule `json:"config" binding:"required"`
}
func (rt *Router) alertRuleNotifyTryRun(c *gin.Context) {
// check notify channels of old version
var f AlertRuleTryRunForm
ginx.BindJSON(c, &f)
hisEvent, err := models.AlertHisEventGetById(rt.Ctx, f.EventId)
ginx.Dangerous(err)
if hisEvent == nil {
ginx.Bomb(http.StatusNotFound, "event not found")
}
curEvent := *hisEvent.ToCur()
curEvent.SetTagsMap()
if f.AlertRuleConfig.NotifyVersion == 1 {
for _, id := range f.AlertRuleConfig.NotifyRuleIds {
notifyRule, err := models.GetNotifyRule(rt.Ctx, id)
ginx.Dangerous(err)
for _, notifyConfig := range notifyRule.NotifyConfigs {
_, err = SendNotifyChannelMessage(rt.Ctx, rt.UserCache, rt.UserGroupCache, notifyConfig, []*models.AlertCurEvent{&curEvent})
ginx.Dangerous(err)
}
}
ginx.NewRender(c).Data("notification test ok", nil)
return
}
if len(f.AlertRuleConfig.NotifyChannelsJSON) == 0 {
ginx.Bomb(http.StatusOK, "no notify channels selected")
}
if len(f.AlertRuleConfig.NotifyGroupsJSON) == 0 {
ginx.Bomb(http.StatusOK, "no notify groups selected")
}
ancs := make([]string, 0, len(curEvent.NotifyChannelsJSON))
ugids := f.AlertRuleConfig.NotifyGroupsJSON
ngids := make([]int64, 0)
for i := 0; i < len(ugids); i++ {
if gid, err := strconv.ParseInt(ugids[i], 10, 64); err == nil {
ngids = append(ngids, gid)
}
}
userGroups := rt.UserGroupCache.GetByUserGroupIds(ngids)
uids := make([]int64, 0)
for i := range userGroups {
uids = append(uids, userGroups[i].UserIds...)
}
users := rt.UserCache.GetByUserIds(uids)
for _, NotifyChannels := range curEvent.NotifyChannelsJSON {
flag := true
// ignore non-default channels
switch NotifyChannels {
case models.Dingtalk, models.Wecom, models.Feishu, models.Mm,
models.Telegram, models.Email, models.FeishuCard:
// do nothing
default:
continue
}
// default channels
for ui := range users {
if _, b := users[ui].ExtractToken(NotifyChannels); b {
flag = false
break
}
}
if flag {
ancs = append(ancs, NotifyChannels)
}
}
if len(ancs) > 0 {
ginx.Dangerous(errors.New(fmt.Sprintf("All users are missing notify channel configurations. Please check for missing tokens (each channel should be configured with at least one user). %v", ancs)))
}
ginx.NewRender(c).Data("notification test ok", nil)
}
func (rt *Router) alertRuleEnableTryRun(c *gin.Context) {
// check notify channels of old version
var f AlertRuleTryRunForm
ginx.BindJSON(c, &f)
hisEvent, err := models.AlertHisEventGetById(rt.Ctx, f.EventId)
ginx.Dangerous(err)
if hisEvent == nil {
ginx.Bomb(http.StatusNotFound, "event not found")
}
curEvent := *hisEvent.ToCur()
curEvent.SetTagsMap()
if f.AlertRuleConfig.Disabled == 1 {
ginx.Bomb(http.StatusOK, "rule is disabled")
}
if mute.TimeSpanMuteStrategy(&f.AlertRuleConfig, &curEvent) {
ginx.Bomb(http.StatusOK, "event is not match for period of time")
}
if mute.BgNotMatchMuteStrategy(&f.AlertRuleConfig, &curEvent, rt.TargetCache) {
ginx.Bomb(http.StatusOK, "event target busi group not match rule busi group")
}
ginx.NewRender(c).Data("event is effective", nil)
}
func (rt *Router) alertRuleAddByImport(c *gin.Context) {
username := c.MustGet("username").(string)
@@ -174,6 +281,15 @@ func (rt *Router) alertRuleAddByImport(c *gin.Context) {
models.DataSourceQueryAll,
}
}
// 将导入的规则统一转为新版本的通知规则配置
lst[i].NotifyVersion = 1
lst[i].NotifyChannelsJSON = []string{}
lst[i].NotifyGroupsJSON = []string{}
lst[i].NotifyChannels = ""
lst[i].NotifyGroups = ""
lst[i].Callbacks = ""
lst[i].CallbacksJSON = []string{}
}
bgid := ginx.UrlParamInt64(c, "id")
@@ -192,19 +308,52 @@ func (rt *Router) alertRuleAddByImportPromRule(c *gin.Context) {
var f promRuleForm
ginx.Dangerous(c.BindJSON(&f))
// 首先尝试解析带 groups 的格式
var pr struct {
Groups []models.PromRuleGroup `yaml:"groups"`
}
err := yaml.Unmarshal([]byte(f.Payload), &pr)
if err != nil {
ginx.Bomb(http.StatusBadRequest, "invalid yaml format, please use the example format. err: %v", err)
var groups []models.PromRuleGroup
if err != nil || len(pr.Groups) == 0 {
// 如果解析失败或没有 groups尝试解析规则数组格式
var rules []models.PromRule
err = yaml.Unmarshal([]byte(f.Payload), &rules)
if err != nil {
// 最后尝试解析单个规则格式
var singleRule models.PromRule
err = yaml.Unmarshal([]byte(f.Payload), &singleRule)
if err != nil {
ginx.Bomb(http.StatusBadRequest, "invalid yaml format. err: %v", err)
}
// 验证单个规则是否有效
if singleRule.Alert == "" && singleRule.Record == "" {
ginx.Bomb(http.StatusBadRequest, "input yaml is empty or invalid")
}
rules = []models.PromRule{singleRule}
}
// 验证规则数组是否为空
if len(rules) == 0 {
ginx.Bomb(http.StatusBadRequest, "input yaml contains no rules")
}
// 将规则数组包装成 group
groups = []models.PromRuleGroup{
{
Name: "imported_rules",
Rules: rules,
},
}
} else {
// 使用已解析的 groups
groups = pr.Groups
}
if len(pr.Groups) == 0 {
ginx.Bomb(http.StatusBadRequest, "input yaml is empty")
}
lst := models.DealPromGroup(pr.Groups, f.DatasourceQueries, f.Disabled)
lst := models.DealPromGroup(groups, f.DatasourceQueries, f.Disabled)
username := c.MustGet("username").(string)
bgid := ginx.UrlParamInt64(c, "id")
ginx.NewRender(c).Data(rt.alertRuleAdd(lst, username, bgid, c.GetHeader("X-Language")), nil)
@@ -349,8 +498,8 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
ginx.Bomb(http.StatusBadRequest, "fields empty")
}
f.Fields["update_by"] = c.MustGet("username").(string)
f.Fields["update_at"] = time.Now().Unix()
updateBy := c.MustGet("username").(string)
updateAt := time.Now().Unix()
for i := 0; i < len(f.Ids); i++ {
ar, err := models.AlertRuleGetById(rt.Ctx, f.Ids[i])
@@ -367,7 +516,6 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
b, err := json.Marshal(originRule)
ginx.Dangerous(err)
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"rule_config": string(b)}))
continue
}
}
@@ -380,7 +528,6 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
b, err := json.Marshal(ar.AnnotationsJSON)
ginx.Dangerous(err)
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"annotations": string(b)}))
continue
}
}
@@ -393,7 +540,6 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
b, err := json.Marshal(ar.AnnotationsJSON)
ginx.Dangerous(err)
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"annotations": string(b)}))
continue
}
}
@@ -403,7 +549,6 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
callback := callbacks.(string)
if !strings.Contains(ar.Callbacks, callback) {
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"callbacks": ar.Callbacks + " " + callback}))
continue
}
}
}
@@ -413,7 +558,6 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
if callbacks, has := f.Fields["callbacks"]; has {
callback := callbacks.(string)
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"callbacks": strings.ReplaceAll(ar.Callbacks, callback, "")}))
continue
}
}
@@ -423,7 +567,6 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
bytes, err := json.Marshal(datasourceQueries)
ginx.Dangerous(err)
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"datasource_queries": bytes}))
continue
}
}
@@ -439,6 +582,12 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
ginx.Dangerous(ar.UpdateColumn(rt.Ctx, k, v))
}
}
// 统一更新更新时间和更新人,只有更新时间变了,告警规则才会被引擎拉取
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{
"update_by": updateBy,
"update_at": updateAt,
}))
}
ginx.NewRender(c).Message(nil)
@@ -733,3 +882,27 @@ func (rt *Router) batchAlertRuleClone(c *gin.Context) {
ginx.NewRender(c).Data(reterr, nil)
}
func (rt *Router) timezonesGet(c *gin.Context) {
// 返回常用时区列表(按时差去重,每个时差只保留一个代表性时区)
timezones := []string{
"UTC",
"Asia/Shanghai", // UTC+8 (代表 Asia/Hong_Kong, Asia/Singapore 等)
"Asia/Tokyo", // UTC+9 (代表 Asia/Seoul 等)
"Asia/Dubai", // UTC+4
"Asia/Kolkata", // UTC+5:30
"Asia/Bangkok", // UTC+7 (代表 Asia/Jakarta 等)
"Europe/London", // UTC+0 (代表 UTC)
"Europe/Paris", // UTC+1 (代表 Europe/Berlin, Europe/Rome, Europe/Madrid 等)
"Europe/Moscow", // UTC+3
"America/New_York", // UTC-5 (代表 America/Toronto 等)
"America/Chicago", // UTC-6 (代表 America/Mexico_City 等)
"America/Denver", // UTC-7
"America/Los_Angeles", // UTC-8
"America/Sao_Paulo", // UTC-3
"Australia/Sydney", // UTC+10 (代表 Australia/Melbourne 等)
"Pacific/Auckland", // UTC+12
}
ginx.NewRender(c).Data(timezones, nil)
}

View File

@@ -2,13 +2,17 @@ package router
import (
"net/http"
"strconv"
"strings"
"time"
"github.com/ccfos/nightingale/v6/alert/common"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
)
// Return all, front-end search and paging
@@ -26,6 +30,7 @@ func (rt *Router) alertSubscribeGets(c *gin.Context) {
ginx.Dangerous(lst[i].FillDatasourceIds(rt.Ctx))
ginx.Dangerous(lst[i].DB2FE())
}
models.FillUpdateByNicknames(rt.Ctx, lst)
ginx.NewRender(c).Data(lst, err)
}
@@ -62,6 +67,7 @@ func (rt *Router) alertSubscribeGetsByGids(c *gin.Context) {
ginx.Dangerous(lst[i].FillDatasourceIds(rt.Ctx))
ginx.Dangerous(lst[i].DB2FE())
}
models.FillUpdateByNicknames(rt.Ctx, lst)
ginx.NewRender(c).Data(lst, err)
}
@@ -104,6 +110,148 @@ func (rt *Router) alertSubscribeAdd(c *gin.Context) {
ginx.NewRender(c).Message(f.Add(rt.Ctx))
}
type SubscribeTryRunForm struct {
EventId int64 `json:"event_id" binding:"required"`
SubscribeConfig models.AlertSubscribe `json:"config" binding:"required"`
}
func (rt *Router) alertSubscribeTryRun(c *gin.Context) {
var f SubscribeTryRunForm
ginx.BindJSON(c, &f)
ginx.Dangerous(f.SubscribeConfig.Verify())
hisEvent, err := models.AlertHisEventGetById(rt.Ctx, f.EventId)
ginx.Dangerous(err)
if hisEvent == nil {
ginx.Bomb(http.StatusNotFound, "event not found")
}
curEvent := *hisEvent.ToCur()
curEvent.SetTagsMap()
lang := c.GetHeader("X-Language")
// 先判断匹配条件
if !f.SubscribeConfig.MatchCluster(curEvent.DatasourceId) {
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "event datasource not match"))
}
if len(f.SubscribeConfig.RuleIds) != 0 {
match := false
for _, rid := range f.SubscribeConfig.RuleIds {
if rid == curEvent.RuleId {
match = true
break
}
}
if !match {
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "event rule id not match"))
}
}
// 匹配 tag
f.SubscribeConfig.Parse()
if !common.MatchTags(curEvent.TagsMap, f.SubscribeConfig.ITags) {
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "event tags not match"))
}
// 匹配group name
if !common.MatchGroupsName(curEvent.GroupName, f.SubscribeConfig.IBusiGroups) {
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "event group name not match"))
}
// 检查严重级别Severity匹配
if len(f.SubscribeConfig.SeveritiesJson) != 0 {
match := false
for _, s := range f.SubscribeConfig.SeveritiesJson {
if s == curEvent.Severity || s == 0 {
match = true
break
}
}
if !match {
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "event severity not match"))
}
}
// 新版本通知规则
if f.SubscribeConfig.NotifyVersion == 1 {
if len(f.SubscribeConfig.NotifyRuleIds) == 0 {
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "no notify rules selected"))
}
for _, id := range f.SubscribeConfig.NotifyRuleIds {
notifyRule, err := models.GetNotifyRule(rt.Ctx, id)
if err != nil {
ginx.Bomb(http.StatusNotFound, i18n.Sprintf(lang, "subscribe notify rule not found: %v", err))
}
for _, notifyConfig := range notifyRule.NotifyConfigs {
_, err = SendNotifyChannelMessage(rt.Ctx, rt.UserCache, rt.UserGroupCache, notifyConfig, []*models.AlertCurEvent{&curEvent})
if err != nil {
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "notify rule send error: %v", err))
}
}
}
ginx.NewRender(c).Data(i18n.Sprintf(lang, "event match subscribe and notification test ok"), nil)
return
}
// 旧版通知方式
f.SubscribeConfig.ModifyEvent(&curEvent)
if len(curEvent.NotifyChannelsJSON) == 0 {
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "no notify channels selected"))
}
if len(curEvent.NotifyGroupsJSON) == 0 {
ginx.Bomb(http.StatusOK, i18n.Sprintf(lang, "no notify groups selected"))
}
ancs := make([]string, 0, len(curEvent.NotifyChannelsJSON))
ugids := strings.Fields(f.SubscribeConfig.UserGroupIds)
ngids := make([]int64, 0)
for i := 0; i < len(ugids); i++ {
if gid, err := strconv.ParseInt(ugids[i], 10, 64); err == nil {
ngids = append(ngids, gid)
}
}
userGroups := rt.UserGroupCache.GetByUserGroupIds(ngids)
uids := make([]int64, 0)
for i := range userGroups {
uids = append(uids, userGroups[i].UserIds...)
}
users := rt.UserCache.GetByUserIds(uids)
for _, NotifyChannels := range curEvent.NotifyChannelsJSON {
flag := true
// ignore non-default channels
switch NotifyChannels {
case models.Dingtalk, models.Wecom, models.Feishu, models.Mm,
models.Telegram, models.Email, models.FeishuCard:
// do nothing
default:
continue
}
// default channels
for ui := range users {
if _, b := users[ui].ExtractToken(NotifyChannels); b {
flag = false
break
}
}
if flag {
ancs = append(ancs, NotifyChannels)
}
}
if len(ancs) > 0 {
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "all users missing notify channel configurations: %v", ancs))
}
ginx.NewRender(c).Data(i18n.Sprintf(lang, "event match subscribe and notify settings ok"), nil)
}
func (rt *Router) alertSubscribePut(c *gin.Context) {
var fs []models.AlertSubscribe
ginx.BindJSON(c, &fs)
@@ -142,6 +290,7 @@ func (rt *Router) alertSubscribePut(c *gin.Context) {
"busi_groups",
"note",
"notify_rule_ids",
"notify_version",
))
}

View File

@@ -7,9 +7,9 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
)
@@ -17,6 +17,7 @@ type boardForm struct {
Name string `json:"name"`
Ident string `json:"ident"`
Tags string `json:"tags"`
Note string `json:"note"`
Configs string `json:"configs"`
Public int `json:"public"`
PublicCate int `json:"public_cate"`
@@ -34,6 +35,7 @@ func (rt *Router) boardAdd(c *gin.Context) {
Name: f.Name,
Ident: f.Ident,
Tags: f.Tags,
Note: f.Note,
Configs: f.Configs,
CreateBy: me.Username,
UpdateBy: me.Username,
@@ -115,6 +117,10 @@ func (rt *Router) boardPureGet(c *gin.Context) {
ginx.Bomb(http.StatusNotFound, "No such dashboard")
}
// 清除创建者和更新者信息
board.CreateBy = ""
board.UpdateBy = ""
ginx.NewRender(c).Data(board, nil)
}
@@ -180,10 +186,11 @@ func (rt *Router) boardPut(c *gin.Context) {
bo.Name = f.Name
bo.Ident = f.Ident
bo.Tags = f.Tags
bo.Note = f.Note
bo.UpdateBy = me.Username
bo.UpdateAt = time.Now().Unix()
err = bo.Update(rt.Ctx, "name", "ident", "tags", "update_by", "update_at")
err = bo.Update(rt.Ctx, "name", "ident", "tags", "note", "update_by", "update_at")
ginx.NewRender(c).Data(bo, err)
}
@@ -253,6 +260,9 @@ func (rt *Router) boardGets(c *gin.Context) {
query := ginx.QueryStr(c, "query", "")
boards, err := models.BoardGetsByGroupId(rt.Ctx, bgid, query)
if err == nil {
models.FillUpdateByNicknames(rt.Ctx, boards)
}
ginx.NewRender(c).Data(boards, err)
}
@@ -266,6 +276,9 @@ func (rt *Router) publicBoardGets(c *gin.Context) {
ginx.Dangerous(err)
boards, err := models.BoardGets(rt.Ctx, "", "public=1 and (public_cate in (?) or id in (?))", []int64{0, 1}, boardIds)
if err == nil {
models.FillUpdateByNicknames(rt.Ctx, boards)
}
ginx.NewRender(c).Data(boards, err)
}
@@ -305,6 +318,7 @@ func (rt *Router) boardGetsByGids(c *gin.Context) {
boards[i].Bgids = ids
}
}
models.FillUpdateByNicknames(rt.Ctx, boards)
ginx.NewRender(c).Data(boards, err)
}

View File

@@ -8,10 +8,10 @@ import (
"strings"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/file"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/runner"
)

View File

@@ -5,9 +5,9 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"gorm.io/gorm"
)

View File

@@ -3,8 +3,8 @@ package router
import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/prom"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) metricFilterGets(c *gin.Context) {
@@ -27,6 +27,8 @@ func (rt *Router) metricFilterGets(c *gin.Context) {
}
}
models.FillUpdateByNicknames(rt.Ctx, arr)
ginx.NewRender(c).Data(arr, err)
}

View File

@@ -2,12 +2,14 @@ package router
import (
"net/http"
"sort"
"time"
"github.com/ccfos/nightingale/v6/center/integration"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
)
@@ -29,7 +31,7 @@ func (rt *Router) builtinMetricsAdd(c *gin.Context) {
reterr := make(map[string]string)
for i := 0; i < count; i++ {
lst[i].Lang = lang
lst[i].UUID = time.Now().UnixNano()
lst[i].UUID = time.Now().UnixMicro()
if err := lst[i].Add(rt.Ctx, username); err != nil {
reterr[lst[i].Name] = i18n.Sprintf(c.GetHeader("X-Language"), err.Error())
}
@@ -48,11 +50,12 @@ func (rt *Router) builtinMetricsGets(c *gin.Context) {
lang = "zh_CN"
}
bm, err := models.BuiltinMetricGets(rt.Ctx, lang, collector, typ, query, unit, limit, ginx.Offset(c, limit))
bmInDB, err := models.BuiltinMetricGets(rt.Ctx, "", collector, typ, query, unit)
ginx.Dangerous(err)
total, err := models.BuiltinMetricCount(rt.Ctx, lang, collector, typ, query, unit)
bm, total, err := integration.BuiltinPayloadInFile.BuiltinMetricGets(bmInDB, lang, collector, typ, query, unit, limit, ginx.Offset(c, limit))
ginx.Dangerous(err)
ginx.NewRender(c).Data(gin.H{
"list": bm,
"total": total,
@@ -100,8 +103,26 @@ func (rt *Router) builtinMetricsTypes(c *gin.Context) {
query := ginx.QueryStr(c, "query", "")
lang := c.GetHeader("X-Language")
metricTypeList, err := models.BuiltinMetricTypes(rt.Ctx, lang, collector, query)
ginx.NewRender(c).Data(metricTypeList, err)
metricTypeListInDB, err := models.BuiltinMetricTypes(rt.Ctx, lang, collector, query)
ginx.Dangerous(err)
metricTypeListInFile := integration.BuiltinPayloadInFile.BuiltinMetricTypes(lang, collector, query)
typeMap := make(map[string]struct{})
for _, metricType := range metricTypeListInDB {
typeMap[metricType] = struct{}{}
}
for _, metricType := range metricTypeListInFile {
typeMap[metricType] = struct{}{}
}
metricTypeList := make([]string, 0, len(typeMap))
for metricType := range typeMap {
metricTypeList = append(metricTypeList, metricType)
}
sort.Strings(metricTypeList)
ginx.NewRender(c).Data(metricTypeList, nil)
}
func (rt *Router) builtinMetricsCollectors(c *gin.Context) {
@@ -109,5 +130,24 @@ func (rt *Router) builtinMetricsCollectors(c *gin.Context) {
query := ginx.QueryStr(c, "query", "")
lang := c.GetHeader("X-Language")
ginx.NewRender(c).Data(models.BuiltinMetricCollectors(rt.Ctx, lang, typ, query))
collectorListInDB, err := models.BuiltinMetricCollectors(rt.Ctx, lang, typ, query)
ginx.Dangerous(err)
collectorListInFile := integration.BuiltinPayloadInFile.BuiltinMetricCollectors(lang, typ, query)
collectorMap := make(map[string]struct{})
for _, collector := range collectorListInDB {
collectorMap[collector] = struct{}{}
}
for _, collector := range collectorListInFile {
collectorMap[collector] = struct{}{}
}
collectorList := make([]string, 0, len(collectorMap))
for collector := range collectorMap {
collectorList = append(collectorList, collector)
}
sort.Strings(collectorList)
ginx.NewRender(c).Data(collectorList, nil)
}

View File

@@ -7,9 +7,10 @@ import (
"time"
"github.com/BurntSushi/toml"
"github.com/ccfos/nightingale/v6/center/integration"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
)
@@ -18,6 +19,7 @@ type Board struct {
Tags string `json:"tags"`
Configs interface{} `json:"configs"`
UUID int64 `json:"uuid"`
Note string `json:"note"`
}
func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
@@ -128,6 +130,7 @@ func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
Name: dashboard.Name,
Tags: dashboard.Tags,
UUID: dashboard.UUID,
Note: dashboard.Note,
Content: string(contentBytes),
CreatedBy: username,
UpdatedBy: username,
@@ -163,6 +166,7 @@ func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
Name: dashboard.Name,
Tags: dashboard.Tags,
UUID: dashboard.UUID,
Note: dashboard.Note,
Content: string(contentBytes),
CreatedBy: username,
UpdatedBy: username,
@@ -192,13 +196,26 @@ func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
func (rt *Router) builtinPayloadsGets(c *gin.Context) {
typ := ginx.QueryStr(c, "type", "")
if typ == "" {
ginx.Bomb(http.StatusBadRequest, "type is required")
return
}
ComponentID := ginx.QueryInt64(c, "component_id", 0)
cate := ginx.QueryStr(c, "cate", "")
query := ginx.QueryStr(c, "query", "")
lst, err := models.BuiltinPayloadGets(rt.Ctx, uint64(ComponentID), typ, cate, query)
ginx.NewRender(c).Data(lst, err)
ginx.Dangerous(err)
lstInFile, err := integration.BuiltinPayloadInFile.GetBuiltinPayload(typ, cate, query, uint64(ComponentID))
ginx.Dangerous(err)
if len(lstInFile) > 0 {
lst = append(lst, lstInFile...)
}
ginx.NewRender(c).Data(lst, nil)
}
func (rt *Router) builtinPayloadcatesGet(c *gin.Context) {
@@ -206,21 +223,31 @@ func (rt *Router) builtinPayloadcatesGet(c *gin.Context) {
ComponentID := ginx.QueryInt64(c, "component_id", 0)
cates, err := models.BuiltinPayloadCates(rt.Ctx, typ, uint64(ComponentID))
ginx.NewRender(c).Data(cates, err)
}
ginx.Dangerous(err)
func (rt *Router) builtinPayloadGet(c *gin.Context) {
id := ginx.UrlParamInt64(c, "id")
catesInFile, err := integration.BuiltinPayloadInFile.GetBuiltinPayloadCates(typ, uint64(ComponentID))
ginx.Dangerous(err)
bp, err := models.BuiltinPayloadGet(rt.Ctx, "id = ?", id)
if err != nil {
ginx.Bomb(http.StatusInternalServerError, err.Error())
}
if bp == nil {
ginx.Bomb(http.StatusNotFound, "builtin payload not found")
// 使用 map 进行去重
cateMap := make(map[string]bool)
// 添加数据库中的分类
for _, cate := range cates {
cateMap[cate] = true
}
ginx.NewRender(c).Data(bp, nil)
// 添加文件中的分类
for _, cate := range catesInFile {
cateMap[cate] = true
}
// 将去重后的结果转换回切片
result := make([]string, 0, len(cateMap))
for cate := range cateMap {
result = append(result, cate)
}
ginx.NewRender(c).Data(result, nil)
}
func (rt *Router) builtinPayloadsPut(c *gin.Context) {
@@ -251,6 +278,7 @@ func (rt *Router) builtinPayloadsPut(c *gin.Context) {
req.Name = dashboard.Name
req.Tags = dashboard.Tags
req.Note = dashboard.Note
} else if req.Type == "collect" {
c := make(map[string]interface{})
if _, err := toml.Decode(req.Content, &c); err != nil {
@@ -273,14 +301,15 @@ func (rt *Router) builtinPayloadsDel(c *gin.Context) {
ginx.NewRender(c).Message(models.BuiltinPayloadDels(rt.Ctx, req.Ids))
}
func (rt *Router) builtinPayloadsGetByUUIDOrID(c *gin.Context) {
uuid := ginx.QueryInt64(c, "uuid", 0)
// 优先以 uuid 为准
if uuid != 0 {
ginx.NewRender(c).Data(models.BuiltinPayloadGet(rt.Ctx, "uuid = ?", uuid))
return
}
func (rt *Router) builtinPayloadsGetByUUID(c *gin.Context) {
uuid := ginx.QueryInt64(c, "uuid")
id := ginx.QueryInt64(c, "id", 0)
ginx.NewRender(c).Data(models.BuiltinPayloadGet(rt.Ctx, "id = ?", id))
bp, err := models.BuiltinPayloadGet(rt.Ctx, "uuid = ?", uuid)
ginx.Dangerous(err)
if bp != nil {
ginx.NewRender(c).Data(bp, nil)
} else {
ginx.NewRender(c).Data(integration.BuiltinPayloadInFile.IndexData[uuid], nil)
}
}

View File

@@ -5,9 +5,9 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
@@ -119,6 +119,9 @@ func (rt *Router) busiGroupGets(c *gin.Context) {
if len(lst) == 0 {
lst = []models.BusiGroup{}
}
if err == nil {
models.FillUpdateByNicknames(rt.Ctx, lst)
}
ginx.NewRender(c).Data(lst, err)
}

View File

@@ -5,9 +5,9 @@ import (
"time"
"github.com/ccfos/nightingale/v6/storage"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
captcha "github.com/mojocn/base64Captcha"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)

View File

@@ -5,9 +5,9 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) chartShareGets(c *gin.Context) {

View File

@@ -4,9 +4,9 @@ import (
"encoding/json"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) notifyChannelsGets(c *gin.Context) {

View File

@@ -4,9 +4,9 @@ import (
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
const EMBEDDEDDASHBOARD = "embedded-dashboards"
@@ -15,6 +15,9 @@ func (rt *Router) configsGet(c *gin.Context) {
prefix := ginx.QueryStr(c, "prefix", "")
limit := ginx.QueryInt(c, "limit", 10)
configs, err := models.ConfigsGets(rt.Ctx, prefix, limit, ginx.Offset(c, limit))
if err == nil {
models.FillUpdateByNicknames(rt.Ctx, configs)
}
ginx.NewRender(c).Data(configs, err)
}

View File

@@ -2,9 +2,9 @@ package router
import (
"github.com/ccfos/nightingale/v6/pkg/secu"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
type confPropCrypto struct {

View File

@@ -7,9 +7,9 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func checkAnnotationPermission(c *gin.Context, ctx *ctx.Context, dashboardId int64) {

View File

@@ -1,17 +1,23 @@
package router
import (
"context"
"crypto/tls"
"encoding/base64"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"
"github.com/ccfos/nightingale/v6/datasource/opensearch"
"github.com/ccfos/nightingale/v6/dskit/clickhouse"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
"github.com/toolkits/pkg/logger"
)
@@ -47,9 +53,41 @@ func (rt *Router) datasourceList(c *gin.Context) {
func (rt *Router) datasourceGetsByService(c *gin.Context) {
typ := ginx.QueryStr(c, "typ", "")
lst, err := models.GetDatasourcesGetsBy(rt.Ctx, typ, "", "", "")
openRsa := rt.Center.RSA.OpenRSA
for _, item := range lst {
if err := item.Encrypt(openRsa, rt.HTTP.RSA.RSAPublicKey); err != nil {
logger.Errorf("datasource %+v encrypt failed: %v", item, err)
continue
}
}
ginx.NewRender(c).Data(lst, err)
}
func (rt *Router) datasourceRsaConfigGet(c *gin.Context) {
if rt.Center.RSA.OpenRSA {
publicKey := ""
privateKey := ""
if len(rt.HTTP.RSA.RSAPublicKey) > 0 {
publicKey = base64.StdEncoding.EncodeToString(rt.HTTP.RSA.RSAPublicKey)
}
if len(rt.HTTP.RSA.RSAPrivateKey) > 0 {
privateKey = base64.StdEncoding.EncodeToString(rt.HTTP.RSA.RSAPrivateKey)
}
logger.Debugf("OpenRSA=%v", rt.Center.RSA.OpenRSA)
ginx.NewRender(c).Data(models.RsaConfig{
OpenRSA: rt.Center.RSA.OpenRSA,
RSAPublicKey: publicKey,
RSAPrivateKey: privateKey,
RSAPassWord: rt.HTTP.RSA.RSAPassWord,
}, nil)
} else {
ginx.NewRender(c).Data(models.RsaConfig{
OpenRSA: rt.Center.RSA.OpenRSA,
}, nil)
}
}
func (rt *Router) datasourceBriefs(c *gin.Context) {
var dss []*models.Datasource
list, err := models.GetDatasourcesGetsBy(rt.Ctx, "", "", "", "")
@@ -100,7 +138,7 @@ func (rt *Router) datasourceUpsert(c *gin.Context) {
if !req.ForceSave {
if req.PluginType == models.PROMETHEUS || req.PluginType == models.LOKI || req.PluginType == models.TDENGINE {
err = DatasourceCheck(req)
err = DatasourceCheck(c, req)
if err != nil {
Dangerous(c, err)
return
@@ -108,6 +146,121 @@ func (rt *Router) datasourceUpsert(c *gin.Context) {
}
}
for k, v := range req.SettingsJson {
if strings.Contains(k, "cluster_name") {
req.ClusterName = v.(string)
break
}
}
if req.PluginType == models.OPENSEARCH {
b, err := json.Marshal(req.SettingsJson)
if err != nil {
logger.Warningf("marshal settings fail: %v", err)
return
}
var os opensearch.OpenSearch
err = json.Unmarshal(b, &os)
if err != nil {
logger.Warningf("unmarshal settings fail: %v", err)
return
}
if len(os.Nodes) == 0 {
logger.Warningf("nodes empty, %+v", req)
return
}
req.HTTPJson = models.HTTP{
Timeout: os.Timeout,
Url: os.Nodes[0],
Headers: os.Headers,
TLS: models.TLS{
SkipTlsVerify: os.TLS.SkipTlsVerify,
},
}
req.AuthJson = models.Auth{
BasicAuth: os.Basic.Enable,
BasicAuthUser: os.Basic.Username,
BasicAuthPassword: os.Basic.Password,
}
}
if req.PluginType == models.CLICKHOUSE {
b, err := json.Marshal(req.SettingsJson)
if err != nil {
logger.Warningf("marshal clickhouse settings failed: %v", err)
Dangerous(c, err)
return
}
var ckConfig clickhouse.Clickhouse
err = json.Unmarshal(b, &ckConfig)
if err != nil {
logger.Warningf("unmarshal clickhouse settings failed: %v", err)
Dangerous(c, err)
return
}
// 检查ckconfig的nodes不应该以http://或https://开头
for _, addr := range ckConfig.Nodes {
if strings.HasPrefix(addr, "http://") || strings.HasPrefix(addr, "https://") {
err = fmt.Errorf("clickhouse node address should not start with http:// or https:// : %s", addr)
logger.Warningf("clickhouse node address invalid: %v", err)
Dangerous(c, err)
return
}
}
// InitCli 会自动检测并选择 HTTP 或 Native 协议
err = ckConfig.InitCli()
if err != nil {
logger.Warningf("clickhouse connection failed: %v", err)
Dangerous(c, err)
return
}
// 执行 SHOW DATABASES 测试连通性
_, err = ckConfig.ShowDatabases(context.Background())
if err != nil {
logger.Warningf("clickhouse test query failed: %v", err)
Dangerous(c, err)
return
}
}
if req.PluginType == models.ELASTICSEARCH {
skipAuto := false
// 若用户输入了versionversion字符串存在且不为空则不自动获取
if req.SettingsJson != nil {
if v, ok := req.SettingsJson["version"]; ok {
switch vv := v.(type) {
case string:
if strings.TrimSpace(vv) != "" {
skipAuto = true
}
default:
if strings.TrimSpace(fmt.Sprint(vv)) != "" {
skipAuto = true
}
}
}
}
if !skipAuto {
version, err := getElasticsearchVersion(req, 10*time.Second)
if err != nil {
logger.Warningf("failed to get elasticsearch version: %v", err)
} else {
if req.SettingsJson == nil {
req.SettingsJson = make(map[string]interface{})
}
req.SettingsJson["version"] = version
}
}
}
if req.Id == 0 {
req.CreatedBy = username
req.Status = "enabled"
@@ -123,13 +276,13 @@ func (rt *Router) datasourceUpsert(c *gin.Context) {
}
err = req.Add(rt.Ctx)
} else {
err = req.Update(rt.Ctx, "name", "identifier", "description", "cluster_name", "settings", "http", "auth", "updated_by", "updated_at", "is_default")
err = req.Update(rt.Ctx, "name", "identifier", "description", "cluster_name", "settings", "http", "auth", "updated_by", "updated_at", "is_default", "weight")
}
Render(c, nil, err)
}
func DatasourceCheck(ds models.Datasource) error {
func DatasourceCheck(c *gin.Context, ds models.Datasource) error {
if ds.PluginType == models.PROMETHEUS || ds.PluginType == models.LOKI || ds.PluginType == models.TDENGINE {
if ds.HTTPJson.Url == "" {
return fmt.Errorf("url is empty")
@@ -140,11 +293,15 @@ func DatasourceCheck(ds models.Datasource) error {
}
}
// 使用 TLS 配置(支持 mTLS
tlsConfig, err := ds.HTTPJson.TLS.TLSConfig()
if err != nil {
return fmt.Errorf("failed to create TLS config: %v", err)
}
client := &http.Client{
Transport: &http.Transport{
TLSClientConfig: &tls.Config{
InsecureSkipVerify: ds.HTTPJson.TLS.SkipTlsVerify,
},
TLSClientConfig: tlsConfig,
},
}
@@ -188,6 +345,10 @@ func DatasourceCheck(ds models.Datasource) error {
req, err = http.NewRequest("GET", fullURL, nil)
if err != nil {
logger.Errorf("Error creating request: %v", err)
if !strings.Contains(ds.HTTPJson.Url, "/loki") {
lang := c.GetHeader("X-Language")
return fmt.Errorf(i18n.Sprintf(lang, "/loki suffix is miss, please add /loki to the url: %s", ds.HTTPJson.Url+"/loki"))
}
return fmt.Errorf("request url:%s failed: %v", fullURL, err)
}
}
@@ -209,6 +370,10 @@ func DatasourceCheck(ds models.Datasource) error {
if resp.StatusCode != 200 {
logger.Errorf("Error making request: %v\n", resp.StatusCode)
if resp.StatusCode == 404 && ds.PluginType == models.LOKI && !strings.Contains(ds.HTTPJson.Url, "/loki") {
lang := c.GetHeader("X-Language")
return fmt.Errorf(i18n.Sprintf(lang, "/loki suffix is miss, please add /loki to the url: %s", ds.HTTPJson.Url+"/loki"))
}
body, _ := io.ReadAll(resp.Body)
return fmt.Errorf("request url:%s failed code:%d body:%s", fullURL, resp.StatusCode, string(body))
}
@@ -294,3 +459,82 @@ func (rt *Router) datasourceQuery(c *gin.Context) {
}
ginx.NewRender(c).Data(req, err)
}
// getElasticsearchVersion 该函数尝试从提供的Elasticsearch数据源中获取版本号遍历所有URL
// 直到成功获取版本号或所有URL均尝试失败为止。
func getElasticsearchVersion(ds models.Datasource, timeout time.Duration) (string, error) {
client := &http.Client{
Timeout: timeout,
Transport: &http.Transport{
TLSClientConfig: &tls.Config{
InsecureSkipVerify: ds.HTTPJson.TLS.SkipTlsVerify,
},
},
}
urls := make([]string, 0)
if len(ds.HTTPJson.Urls) > 0 {
urls = append(urls, ds.HTTPJson.Urls...)
}
if ds.HTTPJson.Url != "" {
urls = append(urls, ds.HTTPJson.Url)
}
if len(urls) == 0 {
return "", fmt.Errorf("no url provided")
}
var lastErr error
for _, raw := range urls {
baseURL := strings.TrimRight(raw, "/") + "/"
req, err := http.NewRequest("GET", baseURL, nil)
if err != nil {
lastErr = err
continue
}
if ds.AuthJson.BasicAuthUser != "" {
req.SetBasicAuth(ds.AuthJson.BasicAuthUser, ds.AuthJson.BasicAuthPassword)
}
for k, v := range ds.HTTPJson.Headers {
req.Header.Set(k, v)
}
resp, err := client.Do(req)
if err != nil {
lastErr = err
continue
}
body, err := io.ReadAll(resp.Body)
resp.Body.Close()
if err != nil {
lastErr = err
continue
}
if resp.StatusCode != 200 {
lastErr = fmt.Errorf("request to %s failed with status: %d body:%s", baseURL, resp.StatusCode, string(body))
continue
}
var result map[string]interface{}
if err := json.Unmarshal(body, &result); err != nil {
lastErr = err
continue
}
if version, ok := result["version"].(map[string]interface{}); ok {
if number, ok := version["number"].(string); ok && number != "" {
return number, nil
}
}
lastErr = fmt.Errorf("version not found in response from %s", baseURL)
}
if lastErr != nil {
return "", lastErr
}
return "", fmt.Errorf("failed to get elasticsearch version")
}

View File

@@ -6,10 +6,10 @@ import (
"github.com/ccfos/nightingale/v6/dscache"
"github.com/ccfos/nightingale/v6/dskit/types"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/ccfos/nightingale/v6/pkg/logx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
func (rt *Router) ShowDatabases(c *gin.Context) {
@@ -18,7 +18,7 @@ func (rt *Router) ShowDatabases(c *gin.Context) {
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logger.Warningf("cluster:%d not exists", f.DatasourceId)
logx.Warningf(c.Request.Context(), "cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
@@ -48,7 +48,7 @@ func (rt *Router) ShowTables(c *gin.Context) {
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logger.Warningf("cluster:%d not exists", f.DatasourceId)
logx.Warningf(c.Request.Context(), "cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
@@ -60,8 +60,8 @@ func (rt *Router) ShowTables(c *gin.Context) {
}
switch plug.(type) {
case TableShower:
if len(f.Querys) > 0 {
database, ok := f.Querys[0].(string)
if len(f.Queries) > 0 {
database, ok := f.Queries[0].(string)
if ok {
tables, err = plug.(TableShower).ShowTables(c.Request.Context(), database)
}
@@ -78,7 +78,7 @@ func (rt *Router) DescribeTable(c *gin.Context) {
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logger.Warningf("cluster:%d not exists", f.DatasourceId)
logx.Warningf(c.Request.Context(), "cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
// 只接受一个入参
@@ -90,8 +90,8 @@ func (rt *Router) DescribeTable(c *gin.Context) {
switch plug.(type) {
case TableDescriber:
client := plug.(TableDescriber)
if len(f.Querys) > 0 {
columns, err = client.DescribeTable(c.Request.Context(), f.Querys[0])
if len(f.Queries) > 0 {
columns, err = client.DescribeTable(c.Request.Context(), f.Queries[0])
}
default:
ginx.Bomb(200, "datasource not exists")

View File

@@ -5,14 +5,15 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) embeddedProductGets(c *gin.Context) {
products, err := models.EmbeddedProductGets(rt.Ctx)
ginx.Dangerous(err)
models.FillUpdateByNicknames(rt.Ctx, products)
// 获取当前用户可访问的Group ID 列表
me := c.MustGet("user").(*models.User)

View File

@@ -3,10 +3,10 @@ package router
import (
"github.com/ccfos/nightingale/v6/datasource/es"
"github.com/ccfos/nightingale/v6/dscache"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/ccfos/nightingale/v6/pkg/logx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
type IndexReq struct {
@@ -34,7 +34,7 @@ func (rt *Router) QueryIndices(c *gin.Context) {
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logger.Warningf("cluster:%d not exists", f.DatasourceId)
logx.Warningf(c.Request.Context(), "cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
@@ -50,7 +50,7 @@ func (rt *Router) QueryFields(c *gin.Context) {
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logger.Warningf("cluster:%d not exists", f.DatasourceId)
logx.Warningf(c.Request.Context(), "cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
@@ -66,7 +66,7 @@ func (rt *Router) QueryESVariable(c *gin.Context) {
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logger.Warningf("cluster:%d not exists", f.DatasourceId)
logx.Warningf(c.Request.Context(), "cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}

View File

@@ -5,8 +5,8 @@ import (
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
// 创建 ES Index Pattern
@@ -69,6 +69,10 @@ func (rt *Router) esIndexPatternGetList(c *gin.Context) {
lst, err = models.EsIndexPatternGets(rt.Ctx, "")
}
if err == nil {
models.FillUpdateByNicknames(rt.Ctx, lst)
}
ginx.NewRender(c).Data(lst, err)
}

View File

@@ -0,0 +1,149 @@
package router
import (
"encoding/json"
"fmt"
"io"
"net/http"
"strconv"
"time"
"github.com/ccfos/nightingale/v6/alert/naming"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/loggrep"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
)
// eventDetailPage renders an HTML log viewer page (for pages group).
func (rt *Router) eventDetailPage(c *gin.Context) {
hash := ginx.UrlParamStr(c, "hash")
if !loggrep.IsValidHash(hash) {
c.String(http.StatusBadRequest, "invalid hash format")
return
}
logs, instance, err := rt.getEventLogs(hash)
if err != nil {
c.String(http.StatusInternalServerError, "Error: %v", err)
return
}
c.Header("Content-Type", "text/html; charset=utf-8")
err = loggrep.RenderHTML(c.Writer, loggrep.PageData{
Hash: hash,
Instance: instance,
Logs: logs,
Total: len(logs),
})
if err != nil {
c.String(http.StatusInternalServerError, "render error: %v", err)
}
}
// eventDetailJSON returns JSON (for service group).
func (rt *Router) eventDetailJSON(c *gin.Context) {
hash := ginx.UrlParamStr(c, "hash")
if !loggrep.IsValidHash(hash) {
ginx.Bomb(200, "invalid hash format")
}
logs, instance, err := rt.getEventLogs(hash)
ginx.Dangerous(err)
ginx.NewRender(c).Data(loggrep.EventDetailResp{
Logs: logs,
Instance: instance,
}, nil)
}
// getNodeForDatasource returns the alert engine instance responsible for the given
// datasource and primary key. It first checks the local hashring, and falls back
// to querying the database for active instances if the hashring is empty
// (e.g. when the datasource belongs to another engine cluster).
func (rt *Router) getNodeForDatasource(datasourceId int64, pk string) (string, error) {
dsIdStr := strconv.FormatInt(datasourceId, 10)
node, err := naming.DatasourceHashRing.GetNode(dsIdStr, pk)
if err == nil {
return node, nil
}
// Hashring is empty for this datasource (likely belongs to another engine cluster).
// Query the DB for active instances.
servers, dbErr := models.AlertingEngineGetsInstances(rt.Ctx,
"datasource_id = ? and clock > ?",
datasourceId, time.Now().Unix()-30)
if dbErr != nil {
return "", dbErr
}
if len(servers) == 0 {
return "", fmt.Errorf("no active instances for datasource %d", datasourceId)
}
ring := naming.NewConsistentHashRing(int32(naming.NodeReplicas), servers)
return ring.Get(pk)
}
// getEventLogs resolves the target instance and retrieves logs.
func (rt *Router) getEventLogs(hash string) ([]string, string, error) {
event, err := models.AlertHisEventGetByHash(rt.Ctx, hash)
if err != nil {
return nil, "", err
}
if event == nil {
return nil, "", fmt.Errorf("no such alert event")
}
ruleId := strconv.FormatInt(event.RuleId, 10)
instance := fmt.Sprintf("%s:%d", rt.Alert.Heartbeat.IP, rt.HTTP.Port)
node, err := rt.getNodeForDatasource(event.DatasourceId, ruleId)
if err != nil || node == instance {
// hashring not ready or target is self, handle locally
logs, err := loggrep.GrepLogDir(rt.LogDir, hash)
return logs, instance, err
}
// forward to the target alert instance
return rt.forwardEventDetail(node, hash)
}
func (rt *Router) forwardEventDetail(node, hash string) ([]string, string, error) {
url := fmt.Sprintf("http://%s/v1/n9e/event-detail/%s", node, hash)
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, node, err
}
for user, pass := range rt.HTTP.APIForService.BasicAuth {
req.SetBasicAuth(user, pass)
break
}
client := &http.Client{Timeout: 15 * time.Second}
resp, err := client.Do(req)
if err != nil {
return nil, node, fmt.Errorf("forward to %s failed: %v", node, err)
}
defer resp.Body.Close()
body, err := io.ReadAll(io.LimitReader(resp.Body, 10*1024*1024)) // 10MB limit
if err != nil {
return nil, node, err
}
var result struct {
Dat loggrep.EventDetailResp `json:"dat"`
Err string `json:"err"`
}
if err := json.Unmarshal(body, &result); err != nil {
return nil, node, err
}
if result.Err != "" {
return nil, node, fmt.Errorf("%s", result.Err)
}
return result.Dat.Logs, result.Dat.Instance, nil
}

View File

@@ -1,13 +1,18 @@
package router
import (
"encoding/json"
"fmt"
"net/http"
"time"
"github.com/ccfos/nightingale/v6/alert/pipeline/engine"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/google/uuid"
"github.com/toolkits/pkg/i18n"
"github.com/toolkits/pkg/logger"
)
@@ -27,18 +32,38 @@ func (rt *Router) eventPipelinesList(c *gin.Context) {
for _, tid := range pipeline.TeamIds {
pipeline.TeamNames = append(pipeline.TeamNames, ugMap[tid])
}
// 兼容处理:自动填充工作流字段
pipeline.FillWorkflowFields()
}
models.FillUpdateByNicknames(rt.Ctx, pipelines)
gids, err := models.MyGroupIdsMap(rt.Ctx, me.Id)
ginx.Dangerous(err)
if me.IsAdmin() {
for _, pipeline := range pipelines {
if pipeline.TriggerMode == "" {
pipeline.TriggerMode = models.TriggerModeEvent
}
if pipeline.UseCase == "" {
pipeline.UseCase = models.UseCaseEventPipeline
}
}
ginx.NewRender(c).Data(pipelines, nil)
return
}
res := make([]*models.EventPipeline, 0)
for _, pipeline := range pipelines {
if pipeline.TriggerMode == "" {
pipeline.TriggerMode = models.TriggerModeEvent
}
if pipeline.UseCase == "" {
pipeline.UseCase = models.UseCaseEventPipeline
}
for _, tid := range pipeline.TeamIds {
if _, ok := gids[tid]; ok {
res = append(res, pipeline)
@@ -61,6 +86,15 @@ func (rt *Router) getEventPipeline(c *gin.Context) {
err = pipeline.FillTeamNames(rt.Ctx)
ginx.Dangerous(err)
// 兼容处理:自动填充工作流字段
pipeline.FillWorkflowFields()
if pipeline.TriggerMode == "" {
pipeline.TriggerMode = models.TriggerModeEvent
}
if pipeline.UseCase == "" {
pipeline.UseCase = models.UseCaseEventPipeline
}
ginx.NewRender(c).Data(pipeline, nil)
}
@@ -131,7 +165,9 @@ func (rt *Router) tryRunEventPipeline(c *gin.Context) {
var f struct {
EventId int64 `json:"event_id"`
PipelineConfig models.EventPipeline `json:"pipeline_config"`
InputVariables map[string]string `json:"input_variables,omitempty"`
}
ginx.BindJSON(c, &f)
hisEvent, err := models.AlertHisEventGetById(rt.Ctx, f.EventId)
@@ -140,18 +176,35 @@ func (rt *Router) tryRunEventPipeline(c *gin.Context) {
}
event := hisEvent.ToCur()
for _, p := range f.PipelineConfig.ProcessorConfigs {
processor, err := models.GetProcessorByType(p.Typ, p.Config)
if err != nil {
ginx.Bomb(http.StatusBadRequest, "get processor: %+v err: %+v", p, err)
}
event = processor.Process(rt.Ctx, event)
if event == nil {
ginx.Bomb(http.StatusBadRequest, "event is dropped")
}
lang := c.GetHeader("X-Language")
me := c.MustGet("user").(*models.User)
// 统一使用工作流引擎执行(兼容线性模式和工作流模式)
workflowEngine := engine.NewWorkflowEngine(rt.Ctx)
triggerCtx := &models.WorkflowTriggerContext{
Mode: models.TriggerModeAPI,
TriggerBy: me.Username,
InputsOverrides: f.InputVariables,
}
ginx.NewRender(c).Data(event, nil)
resultEvent, result, err := workflowEngine.Execute(&f.PipelineConfig, event, triggerCtx)
if err != nil {
ginx.Bomb(http.StatusBadRequest, "pipeline execute error: %v", err)
}
m := map[string]interface{}{
"event": resultEvent,
"result": i18n.Sprintf(lang, result.Message),
"status": result.Status,
"node_results": result.NodeResults,
}
if resultEvent == nil {
m["result"] = i18n.Sprintf(lang, "event is dropped")
}
ginx.NewRender(c).Data(m, nil)
}
// 测试事件处理器
@@ -170,15 +223,22 @@ func (rt *Router) tryRunEventProcessor(c *gin.Context) {
processor, err := models.GetProcessorByType(f.ProcessorConfig.Typ, f.ProcessorConfig.Config)
if err != nil {
ginx.Bomb(http.StatusBadRequest, "get processor err: %+v", err)
ginx.Bomb(200, "get processor err: %+v", err)
}
event = processor.Process(rt.Ctx, event)
logger.Infof("processor %+v result: %+v", f.ProcessorConfig, event)
if event == nil {
ginx.Bomb(http.StatusBadRequest, "event is dropped")
wfCtx := &models.WorkflowContext{
Event: event,
Vars: make(map[string]interface{}),
}
wfCtx, res, err := processor.Process(rt.Ctx, wfCtx)
if err != nil {
ginx.Bomb(200, "processor err: %+v", err)
}
ginx.NewRender(c).Data(event, nil)
lang := c.GetHeader("X-Language")
ginx.NewRender(c).Data(map[string]interface{}{
"event": wfCtx.Event,
"result": i18n.Sprintf(lang, res),
}, nil)
}
func (rt *Router) tryRunEventProcessorByNotifyRule(c *gin.Context) {
@@ -206,23 +266,374 @@ func (rt *Router) tryRunEventProcessorByNotifyRule(c *gin.Context) {
ginx.Bomb(http.StatusBadRequest, "processors not found")
}
wfCtx := &models.WorkflowContext{
Event: event,
Vars: make(map[string]interface{}),
}
for _, pl := range pipelines {
for _, p := range pl.ProcessorConfigs {
processor, err := models.GetProcessorByType(p.Typ, p.Config)
if err != nil {
ginx.Bomb(http.StatusBadRequest, "get processor: %+v err: %+v", p, err)
}
event = processor.Process(rt.Ctx, event)
if event == nil {
ginx.Bomb(http.StatusBadRequest, "event is dropped")
wfCtx, _, err = processor.Process(rt.Ctx, wfCtx)
if err != nil {
ginx.Bomb(http.StatusBadRequest, "processor: %+v err: %+v", p, err)
}
if wfCtx == nil || wfCtx.Event == nil {
lang := c.GetHeader("X-Language")
ginx.NewRender(c).Data(map[string]interface{}{
"event": nil,
"result": i18n.Sprintf(lang, "event is dropped"),
}, nil)
return
}
}
}
ginx.NewRender(c).Data(event, nil)
ginx.NewRender(c).Data(wfCtx.Event, nil)
}
func (rt *Router) eventPipelinesListByService(c *gin.Context) {
pipelines, err := models.ListEventPipelines(rt.Ctx)
ginx.NewRender(c).Data(pipelines, err)
}
type EventPipelineRequest struct {
// 事件数据(可选,如果不传则使用空事件)
Event *models.AlertCurEvent `json:"event,omitempty"`
// 输入参数覆盖
InputsOverrides map[string]string `json:"inputs_overrides,omitempty"`
Username string `json:"username,omitempty"`
}
// executePipelineTrigger 执行 Pipeline 触发的公共逻辑
func (rt *Router) executePipelineTrigger(pipeline *models.EventPipeline, req *EventPipelineRequest, triggerBy string) (string, error) {
// 准备事件数据
var event *models.AlertCurEvent
if req.Event != nil {
event = req.Event
} else {
// 创建空事件
event = &models.AlertCurEvent{
TriggerTime: time.Now().Unix(),
}
}
// 生成执行ID
executionID := uuid.New().String()
// 创建触发上下文
triggerCtx := &models.WorkflowTriggerContext{
Mode: models.TriggerModeAPI,
TriggerBy: triggerBy,
InputsOverrides: req.InputsOverrides,
RequestID: executionID,
}
// 异步执行工作流
go func() {
workflowEngine := engine.NewWorkflowEngine(rt.Ctx)
_, _, err := workflowEngine.Execute(pipeline, event, triggerCtx)
if err != nil {
logger.Errorf("async workflow execute error: pipeline_id=%d execution_id=%s err=%v",
pipeline.ID, executionID, err)
}
}()
return executionID, nil
}
// triggerEventPipelineByService Service 调用触发工作流执行
func (rt *Router) triggerEventPipelineByService(c *gin.Context) {
pipelineID := ginx.UrlParamInt64(c, "id")
var f EventPipelineRequest
ginx.BindJSON(c, &f)
// 获取 Pipeline
pipeline, err := models.GetEventPipeline(rt.Ctx, pipelineID)
if err != nil {
ginx.Bomb(http.StatusNotFound, "pipeline not found: %v", err)
}
executionID, err := rt.executePipelineTrigger(pipeline, &f, f.Username)
if err != nil {
ginx.Bomb(http.StatusBadRequest, "%v", err)
}
ginx.NewRender(c).Data(gin.H{
"execution_id": executionID,
"message": "workflow execution started",
}, nil)
}
// triggerEventPipelineByAPI API 触发工作流执行
func (rt *Router) triggerEventPipelineByAPI(c *gin.Context) {
pipelineID := ginx.UrlParamInt64(c, "id")
var f EventPipelineRequest
ginx.BindJSON(c, &f)
// 获取 Pipeline
pipeline, err := models.GetEventPipeline(rt.Ctx, pipelineID)
if err != nil {
ginx.Bomb(http.StatusNotFound, "pipeline not found: %v", err)
}
// 检查权限
me := c.MustGet("user").(*models.User)
ginx.Dangerous(me.CheckGroupPermission(rt.Ctx, pipeline.TeamIds))
executionID, err := rt.executePipelineTrigger(pipeline, &f, me.Username)
if err != nil {
ginx.Bomb(http.StatusBadRequest, err.Error())
}
ginx.NewRender(c).Data(gin.H{
"execution_id": executionID,
"message": "workflow execution started",
}, nil)
}
func (rt *Router) listAllEventPipelineExecutions(c *gin.Context) {
pipelineId := ginx.QueryInt64(c, "pipeline_id", 0)
pipelineName := ginx.QueryStr(c, "pipeline_name", "")
mode := ginx.QueryStr(c, "mode", "")
status := ginx.QueryStr(c, "status", "")
limit := ginx.QueryInt(c, "limit", 20)
offset := ginx.QueryInt(c, "p", 1)
if limit <= 0 || limit > 1000 {
limit = 20
}
if offset <= 0 {
offset = 1
}
executions, total, err := models.ListAllEventPipelineExecutions(rt.Ctx, pipelineId, pipelineName, mode, status, limit, (offset-1)*limit)
ginx.Dangerous(err)
ginx.NewRender(c).Data(gin.H{
"list": executions,
"total": total,
}, nil)
}
func (rt *Router) listEventPipelineExecutions(c *gin.Context) {
pipelineID := ginx.UrlParamInt64(c, "id")
mode := ginx.QueryStr(c, "mode", "")
status := ginx.QueryStr(c, "status", "")
limit := ginx.QueryInt(c, "limit", 20)
offset := ginx.QueryInt(c, "p", 1)
if limit <= 0 || limit > 1000 {
limit = 20
}
if offset <= 0 {
offset = 1
}
executions, total, err := models.ListEventPipelineExecutions(rt.Ctx, pipelineID, mode, status, limit, (offset-1)*limit)
ginx.Dangerous(err)
ginx.NewRender(c).Data(gin.H{
"list": executions,
"total": total,
}, nil)
}
func (rt *Router) getEventPipelineExecution(c *gin.Context) {
execID := ginx.UrlParamStr(c, "exec_id")
detail, err := models.GetEventPipelineExecutionDetail(rt.Ctx, execID)
if err != nil {
ginx.Bomb(http.StatusNotFound, "execution not found: %v", err)
}
ginx.NewRender(c).Data(detail, nil)
}
func (rt *Router) getEventPipelineExecutionStats(c *gin.Context) {
pipelineID := ginx.UrlParamInt64(c, "id")
stats, err := models.GetEventPipelineExecutionStatistics(rt.Ctx, pipelineID)
ginx.Dangerous(err)
ginx.NewRender(c).Data(stats, nil)
}
func (rt *Router) cleanEventPipelineExecutions(c *gin.Context) {
var f struct {
BeforeDays int `json:"before_days"`
}
ginx.BindJSON(c, &f)
if f.BeforeDays <= 0 {
f.BeforeDays = 30
}
beforeTime := time.Now().AddDate(0, 0, -f.BeforeDays).Unix()
affected, err := models.DeleteEventPipelineExecutions(rt.Ctx, beforeTime)
ginx.Dangerous(err)
ginx.NewRender(c).Data(gin.H{
"deleted": affected,
}, nil)
}
func (rt *Router) streamEventPipeline(c *gin.Context) {
pipelineID := ginx.UrlParamInt64(c, "id")
var f EventPipelineRequest
ginx.BindJSON(c, &f)
pipeline, err := models.GetEventPipeline(rt.Ctx, pipelineID)
if err != nil {
ginx.Bomb(http.StatusNotFound, "pipeline not found: %v", err)
}
me := c.MustGet("user").(*models.User)
ginx.Dangerous(me.CheckGroupPermission(rt.Ctx, pipeline.TeamIds))
var event *models.AlertCurEvent
if f.Event != nil {
event = f.Event
} else {
event = &models.AlertCurEvent{
TriggerTime: time.Now().Unix(),
}
}
triggerCtx := &models.WorkflowTriggerContext{
Mode: models.TriggerModeAPI,
TriggerBy: me.Username,
InputsOverrides: f.InputsOverrides,
RequestID: uuid.New().String(),
Stream: true, // 流式端点强制启用流式输出
}
workflowEngine := engine.NewWorkflowEngine(rt.Ctx)
_, result, err := workflowEngine.Execute(pipeline, event, triggerCtx)
if err != nil {
ginx.Bomb(http.StatusInternalServerError, "execute failed: %v", err)
}
if result.Stream && result.StreamChan != nil {
rt.handleStreamResponse(c, result, triggerCtx.RequestID)
return
}
ginx.NewRender(c).Data(result, nil)
}
func (rt *Router) handleStreamResponse(c *gin.Context, result *models.WorkflowResult, requestID string) {
// 设置 SSE 响应头
c.Header("Content-Type", "text/event-stream")
c.Header("Cache-Control", "no-cache")
c.Header("Connection", "keep-alive")
c.Header("X-Accel-Buffering", "no") // 禁用 nginx 缓冲
c.Header("X-Request-ID", requestID)
flusher, ok := c.Writer.(http.Flusher)
if !ok {
ginx.Bomb(http.StatusInternalServerError, "streaming not supported")
return
}
// 发送初始连接成功消息
initData := fmt.Sprintf(`{"type":"connected","request_id":"%s","timestamp":%d}`, requestID, time.Now().UnixMilli())
fmt.Fprintf(c.Writer, "data: %s\n\n", initData)
flusher.Flush()
// 从 channel 读取并发送 SSE
timeout := time.After(30 * time.Minute) // 最长流式输出时间
for {
select {
case chunk, ok := <-result.StreamChan:
if !ok {
// channel 关闭,发送结束标记
return
}
data, err := json.Marshal(chunk)
if err != nil {
logger.Errorf("stream: failed to marshal chunk: %v", err)
continue
}
fmt.Fprintf(c.Writer, "data: %s\n\n", data)
flusher.Flush()
if chunk.Done {
return
}
case <-c.Request.Context().Done():
// 客户端断开连接
logger.Infof("stream: client disconnected, request_id=%s", requestID)
return
case <-timeout:
logger.Errorf("stream: timeout, request_id=%s", requestID)
return
}
}
}
func (rt *Router) streamEventPipelineByService(c *gin.Context) {
pipelineID := ginx.UrlParamInt64(c, "id")
var f EventPipelineRequest
ginx.BindJSON(c, &f)
pipeline, err := models.GetEventPipeline(rt.Ctx, pipelineID)
if err != nil {
ginx.Bomb(http.StatusNotFound, "pipeline not found: %v", err)
}
var event *models.AlertCurEvent
if f.Event != nil {
event = f.Event
} else {
event = &models.AlertCurEvent{
TriggerTime: time.Now().Unix(),
}
}
triggerCtx := &models.WorkflowTriggerContext{
Mode: models.TriggerModeAPI,
TriggerBy: f.Username,
InputsOverrides: f.InputsOverrides,
RequestID: uuid.New().String(),
Stream: true, // 流式端点强制启用流式输出
}
workflowEngine := engine.NewWorkflowEngine(rt.Ctx)
_, result, err := workflowEngine.Execute(pipeline, event, triggerCtx)
if err != nil {
ginx.Bomb(http.StatusInternalServerError, "execute failed: %v", err)
}
// 检查是否是流式输出
if result.Stream && result.StreamChan != nil {
rt.handleStreamResponse(c, result, triggerCtx.RequestID)
return
}
ginx.NewRender(c).Data(result, nil)
}
// eventPipelineExecutionAdd 接收 edge 节点同步的 Pipeline 执行记录
func (rt *Router) eventPipelineExecutionAdd(c *gin.Context) {
var execution models.EventPipelineExecution
ginx.BindJSON(c, &execution)
if execution.ID == "" {
ginx.Bomb(http.StatusBadRequest, "id is required")
}
if execution.PipelineID <= 0 {
ginx.Bomb(http.StatusBadRequest, "pipeline_id is required")
}
ginx.NewRender(c).Message(models.DB(rt.Ctx).Create(&execution).Error)
}

View File

@@ -7,9 +7,9 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
const defaultLimit = 300
@@ -128,6 +128,12 @@ func UserGroup(ctx *ctx.Context, id int64) *models.UserGroup {
ginx.Bomb(http.StatusNotFound, "No such UserGroup")
}
bgids, err := models.BusiGroupIds(ctx, []int64{id})
ginx.Dangerous(err)
obj.BusiGroups, err = models.BusiGroupGetByIds(ctx, bgids)
ginx.Dangerous(err)
return obj
}

View File

@@ -15,9 +15,9 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pushgw/idents"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)

View File

@@ -2,23 +2,29 @@ package router
import (
"encoding/base64"
"encoding/json"
"fmt"
"net/http"
"strconv"
"strings"
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/cas"
"github.com/ccfos/nightingale/v6/pkg/dingtalk"
"github.com/ccfos/nightingale/v6/pkg/feishu"
"github.com/ccfos/nightingale/v6/pkg/ldapx"
"github.com/ccfos/nightingale/v6/pkg/logx"
"github.com/ccfos/nightingale/v6/pkg/oauth2x"
"github.com/ccfos/nightingale/v6/pkg/oidcx"
"github.com/ccfos/nightingale/v6/pkg/secu"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/dgrijalva/jwt-go"
"github.com/gin-gonic/gin"
"github.com/pelletier/go-toml/v2"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
"github.com/pkg/errors"
"gorm.io/gorm"
)
type loginForm struct {
@@ -31,7 +37,9 @@ type loginForm struct {
func (rt *Router) loginPost(c *gin.Context) {
var f loginForm
ginx.BindJSON(c, &f)
logger.Infof("username:%s login from:%s", f.Username, c.ClientIP())
rctx := c.Request.Context()
logx.Infof(rctx, "username:%s login from:%s", f.Username, c.ClientIP())
if rt.HTTP.ShowCaptcha.Enable {
if !CaptchaVerify(f.Captchaid, f.Verifyvalue) {
@@ -44,23 +52,25 @@ func (rt *Router) loginPost(c *gin.Context) {
if rt.HTTP.RSA.OpenRSA {
decPassWord, err := secu.Decrypt(f.Password, rt.HTTP.RSA.RSAPrivateKey, rt.HTTP.RSA.RSAPassWord)
if err != nil {
logger.Errorf("RSA Decrypt failed: %v username: %s", err, f.Username)
logx.Errorf(rctx, "RSA Decrypt failed: %v username: %s", err, f.Username)
ginx.NewRender(c).Message(err)
return
}
authPassWord = decPassWord
}
reqCtx := rt.Ctx.WithContext(rctx)
var user *models.User
var err error
lc := rt.Sso.LDAP.Copy()
if lc.Enable {
user, err = ldapx.LdapLogin(rt.Ctx, f.Username, authPassWord, lc.DefaultRoles, lc.DefaultTeams, lc)
user, err = ldapx.LdapLogin(reqCtx, f.Username, authPassWord, lc.DefaultRoles, lc.DefaultTeams, lc)
if err != nil {
logger.Debugf("ldap login failed: %v username: %s", err, f.Username)
logx.Debugf(rctx, "ldap login failed: %v username: %s", err, f.Username)
var errLoginInN9e error
// to use n9e as the minimum guarantee for login
if user, errLoginInN9e = models.PassLogin(rt.Ctx, rt.Redis, f.Username, authPassWord); errLoginInN9e != nil {
if user, errLoginInN9e = models.PassLogin(reqCtx, rt.Redis, f.Username, authPassWord); errLoginInN9e != nil {
ginx.NewRender(c).Message("ldap login failed: %v; n9e login failed: %v", err, errLoginInN9e)
return
}
@@ -68,7 +78,7 @@ func (rt *Router) loginPost(c *gin.Context) {
user.RolesLst = strings.Fields(user.Roles)
}
} else {
user, err = models.PassLogin(rt.Ctx, rt.Redis, f.Username, authPassWord)
user, err = models.PassLogin(reqCtx, rt.Redis, f.Username, authPassWord)
ginx.Dangerous(err)
}
@@ -92,7 +102,8 @@ func (rt *Router) loginPost(c *gin.Context) {
}
func (rt *Router) logoutPost(c *gin.Context) {
logger.Infof("username:%s logout from:%s", c.GetString("username"), c.ClientIP())
rctx := c.Request.Context()
logx.Infof(rctx, "username:%s logout from:%s", c.GetString("username"), c.ClientIP())
metadata, err := rt.extractTokenMetadata(c.Request)
if err != nil {
ginx.NewRender(c, http.StatusBadRequest).Message("failed to parse jwt token")
@@ -107,9 +118,20 @@ func (rt *Router) logoutPost(c *gin.Context) {
var logoutAddr string
user := c.MustGet("user").(*models.User)
// 获取用户的 id_token
idToken, err := rt.fetchIdToken(c.Request.Context(), user.Id)
if err != nil {
logx.Debugf(rctx, "fetch id_token failed: %v, user_id: %d", err, user.Id)
idToken = "" // 如果获取失败,使用空字符串
}
// 删除 id_token
rt.deleteIdToken(c.Request.Context(), user.Id)
switch user.Belong {
case "oidc":
logoutAddr = rt.Sso.OIDC.GetSsoLogoutAddr()
logoutAddr = rt.Sso.OIDC.GetSsoLogoutAddr(idToken)
case "cas":
logoutAddr = rt.Sso.CAS.GetSsoLogoutAddr()
case "oauth2":
@@ -199,6 +221,14 @@ func (rt *Router) refreshPost(c *gin.Context) {
ginx.Dangerous(err)
ginx.Dangerous(rt.createAuth(c.Request.Context(), userIdentity, ts))
// 延长 id_token 的过期时间,使其与新的 refresh token 生命周期保持一致
// 注意:这里不会获取新的 id_token只是延长 Redis 中现有 id_token 的 TTL
if idToken, err := rt.fetchIdToken(c.Request.Context(), userid); err == nil && idToken != "" {
if err := rt.saveIdToken(c.Request.Context(), userid, idToken); err != nil {
logx.Debugf(c.Request.Context(), "refresh id_token ttl failed: %v, user_id: %d", err, userid)
}
}
ginx.NewRender(c).Data(gin.H{
"access_token": ts.AccessToken,
"refresh_token": ts.RefreshToken,
@@ -246,12 +276,13 @@ type CallbackOutput struct {
}
func (rt *Router) loginCallback(c *gin.Context) {
rctx := c.Request.Context()
code := ginx.QueryStr(c, "code", "")
state := ginx.QueryStr(c, "state", "")
ret, err := rt.Sso.OIDC.Callback(rt.Redis, c.Request.Context(), code, state)
ret, err := rt.Sso.OIDC.Callback(rt.Redis, rctx, code, state)
if err != nil {
logger.Errorf("sso_callback fail. code:%s, state:%s, get ret: %+v. error: %v", code, state, ret, err)
logx.Errorf(rctx, "sso_callback fail. code:%s, state:%s, get ret: %+v. error: %v", code, state, ret, err)
ginx.NewRender(c).Data(CallbackOutput{}, err)
return
}
@@ -274,7 +305,7 @@ func (rt *Router) loginCallback(c *gin.Context) {
for _, gid := range rt.Sso.OIDC.DefaultTeams {
err = models.UserGroupMemberAdd(rt.Ctx, gid, user.Id)
if err != nil {
logger.Errorf("user:%v UserGroupMemberAdd: %s", user, err)
logx.Errorf(rctx, "user:%v UserGroupMemberAdd: %s", user, err)
}
}
}
@@ -284,7 +315,14 @@ func (rt *Router) loginCallback(c *gin.Context) {
userIdentity := fmt.Sprintf("%d-%s", user.Id, user.Username)
ts, err := rt.createTokens(rt.HTTP.JWTAuth.SigningKey, userIdentity)
ginx.Dangerous(err)
ginx.Dangerous(rt.createAuth(c.Request.Context(), userIdentity, ts))
ginx.Dangerous(rt.createAuth(rctx, userIdentity, ts))
// 保存 id_token 到 Redis用于登出时使用
if ret.IdToken != "" {
if err := rt.saveIdToken(rctx, user.Id, ret.IdToken); err != nil {
logx.Errorf(rctx, "save id_token failed: %v, user_id: %d", err, user.Id)
}
}
redirect := "/"
if ret.Redirect != "/login" {
@@ -323,7 +361,7 @@ func (rt *Router) loginRedirectCas(c *gin.Context) {
}
if !rt.Sso.CAS.Enable {
logger.Error("cas is not enable")
logx.Errorf(c.Request.Context(), "cas is not enable")
ginx.NewRender(c).Data("", nil)
return
}
@@ -338,17 +376,18 @@ func (rt *Router) loginRedirectCas(c *gin.Context) {
}
func (rt *Router) loginCallbackCas(c *gin.Context) {
rctx := c.Request.Context()
ticket := ginx.QueryStr(c, "ticket", "")
state := ginx.QueryStr(c, "state", "")
ret, err := rt.Sso.CAS.ValidateServiceTicket(c.Request.Context(), ticket, state, rt.Redis)
ret, err := rt.Sso.CAS.ValidateServiceTicket(rctx, ticket, state, rt.Redis)
if err != nil {
logger.Errorf("ValidateServiceTicket: %s", err)
logx.Errorf(rctx, "ValidateServiceTicket: %s", err)
ginx.NewRender(c).Data("", err)
return
}
user, err := models.UserGet(rt.Ctx, "username=?", ret.Username)
if err != nil {
logger.Errorf("UserGet: %s", err)
logx.Errorf(rctx, "UserGet: %s", err)
}
ginx.Dangerous(err)
if user != nil {
@@ -367,10 +406,10 @@ func (rt *Router) loginCallbackCas(c *gin.Context) {
userIdentity := fmt.Sprintf("%d-%s", user.Id, user.Username)
ts, err := rt.createTokens(rt.HTTP.JWTAuth.SigningKey, userIdentity)
if err != nil {
logger.Errorf("createTokens: %s", err)
logx.Errorf(rctx, "createTokens: %s", err)
}
ginx.Dangerous(err)
ginx.Dangerous(rt.createAuth(c.Request.Context(), userIdentity, ts))
ginx.Dangerous(rt.createAuth(rctx, userIdentity, ts))
redirect := "/"
if ret.Redirect != "/login" {
@@ -413,13 +452,180 @@ func (rt *Router) loginRedirectOAuth(c *gin.Context) {
ginx.NewRender(c).Data(redirect, err)
}
func (rt *Router) loginCallbackOAuth(c *gin.Context) {
func (rt *Router) loginRedirectDingTalk(c *gin.Context) {
redirect := ginx.QueryStr(c, "redirect", "/")
v, exists := c.Get("userid")
if exists {
userid := v.(int64)
user, err := models.UserGetById(rt.Ctx, userid)
ginx.Dangerous(err)
if user == nil {
ginx.Bomb(200, "user not found")
}
if user.Username != "" { // already login
ginx.NewRender(c).Data(redirect, nil)
return
}
}
if !rt.Sso.DingTalk.Enable {
ginx.NewRender(c).Data("", nil)
return
}
redirect, err := rt.Sso.DingTalk.Authorize(rt.Redis, redirect)
ginx.Dangerous(err)
ginx.NewRender(c).Data(redirect, err)
}
func (rt *Router) loginCallbackDingTalk(c *gin.Context) {
rctx := c.Request.Context()
code := ginx.QueryStr(c, "code", "")
state := ginx.QueryStr(c, "state", "")
ret, err := rt.Sso.OAuth2.Callback(rt.Redis, c.Request.Context(), code, state)
ret, err := rt.Sso.DingTalk.Callback(rt.Redis, rctx, code, state)
if err != nil {
logger.Debugf("sso.callback() get ret %+v error %v", ret, err)
logx.Errorf(rctx, "sso_callback DingTalk fail. code:%s, state:%s, get ret: %+v. error: %v", code, state, ret, err)
ginx.NewRender(c).Data(CallbackOutput{}, err)
return
}
user, err := models.UserGet(rt.Ctx, "username=?", ret.Username)
ginx.Dangerous(err)
if user != nil {
if rt.Sso.DingTalk.DingTalkConfig.CoverAttributes {
updatedFields := user.UpdateSsoFields(dingtalk.SsoTypeName, ret.Nickname, ret.Phone, ret.Email)
ginx.Dangerous(user.Update(rt.Ctx, "update_at", updatedFields...))
}
} else {
user = new(models.User)
user.FullSsoFields(dingtalk.SsoTypeName, ret.Username, ret.Nickname, ret.Phone, ret.Email, rt.Sso.DingTalk.DingTalkConfig.DefaultRoles)
// create user from dingtalk
ginx.Dangerous(user.Add(rt.Ctx))
}
// set user login state
userIdentity := fmt.Sprintf("%d-%s", user.Id, user.Username)
ts, err := rt.createTokens(rt.HTTP.JWTAuth.SigningKey, userIdentity)
ginx.Dangerous(err)
ginx.Dangerous(rt.createAuth(c.Request.Context(), userIdentity, ts))
redirect := "/"
if ret.Redirect != "/login" {
redirect = ret.Redirect
}
ginx.NewRender(c).Data(CallbackOutput{
Redirect: redirect,
User: user,
AccessToken: ts.AccessToken,
RefreshToken: ts.RefreshToken,
}, nil)
}
func (rt *Router) loginRedirectFeiShu(c *gin.Context) {
redirect := ginx.QueryStr(c, "redirect", "/")
v, exists := c.Get("userid")
if exists {
userid := v.(int64)
user, err := models.UserGetById(rt.Ctx, userid)
ginx.Dangerous(err)
if user == nil {
ginx.Bomb(200, "user not found")
}
if user.Username != "" { // already login
ginx.NewRender(c).Data(redirect, nil)
return
}
}
if rt.Sso.FeiShu == nil || !rt.Sso.FeiShu.Enable {
ginx.NewRender(c).Data("", nil)
return
}
redirect, err := rt.Sso.FeiShu.Authorize(rt.Redis, redirect)
ginx.Dangerous(err)
ginx.NewRender(c).Data(redirect, err)
}
func (rt *Router) loginCallbackFeiShu(c *gin.Context) {
rctx := c.Request.Context()
code := ginx.QueryStr(c, "code", "")
state := ginx.QueryStr(c, "state", "")
ret, err := rt.Sso.FeiShu.Callback(rt.Redis, rctx, code, state)
if err != nil {
logx.Errorf(rctx, "sso_callback FeiShu fail. code:%s, state:%s, get ret: %+v. error: %v", code, state, ret, err)
ginx.NewRender(c).Data(CallbackOutput{}, err)
return
}
user, err := models.UserGet(rt.Ctx, "username=?", ret.Username)
ginx.Dangerous(err)
if user != nil {
if rt.Sso.FeiShu != nil && rt.Sso.FeiShu.FeiShuConfig != nil && rt.Sso.FeiShu.FeiShuConfig.CoverAttributes {
updatedFields := user.UpdateSsoFields(feishu.SsoTypeName, ret.Nickname, ret.Phone, ret.Email)
ginx.Dangerous(user.Update(rt.Ctx, "update_at", updatedFields...))
}
} else {
user = new(models.User)
defaultRoles := []string{}
defaultUserGroups := []int64{}
if rt.Sso.FeiShu != nil && rt.Sso.FeiShu.FeiShuConfig != nil {
defaultRoles = rt.Sso.FeiShu.FeiShuConfig.DefaultRoles
defaultUserGroups = rt.Sso.FeiShu.FeiShuConfig.DefaultUserGroups
}
user.FullSsoFields(feishu.SsoTypeName, ret.Username, ret.Nickname, ret.Phone, ret.Email, defaultRoles)
ginx.Dangerous(user.Add(rt.Ctx))
if len(defaultUserGroups) > 0 {
err = user.AddToUserGroups(rt.Ctx, defaultUserGroups)
if err != nil {
logx.Errorf(rctx, "sso feishu add user group error %v %v", ret, err)
}
}
}
// set user login state
userIdentity := fmt.Sprintf("%d-%s", user.Id, user.Username)
ts, err := rt.createTokens(rt.HTTP.JWTAuth.SigningKey, userIdentity)
ginx.Dangerous(err)
ginx.Dangerous(rt.createAuth(c.Request.Context(), userIdentity, ts))
redirect := "/"
if ret.Redirect != "/login" {
redirect = ret.Redirect
}
ginx.NewRender(c).Data(CallbackOutput{
Redirect: redirect,
User: user,
AccessToken: ts.AccessToken,
RefreshToken: ts.RefreshToken,
}, nil)
}
func (rt *Router) loginCallbackOAuth(c *gin.Context) {
rctx := c.Request.Context()
code := ginx.QueryStr(c, "code", "")
state := ginx.QueryStr(c, "state", "")
ret, err := rt.Sso.OAuth2.Callback(rt.Redis, rctx, code, state)
if err != nil {
logx.Debugf(rctx, "sso.callback() get ret %+v error %v", ret, err)
ginx.NewRender(c).Data(CallbackOutput{}, err)
return
}
@@ -459,13 +665,15 @@ func (rt *Router) loginCallbackOAuth(c *gin.Context) {
}
type SsoConfigOutput struct {
OidcDisplayName string `json:"oidcDisplayName"`
CasDisplayName string `json:"casDisplayName"`
OauthDisplayName string `json:"oauthDisplayName"`
OidcDisplayName string `json:"oidcDisplayName"`
CasDisplayName string `json:"casDisplayName"`
OauthDisplayName string `json:"oauthDisplayName"`
DingTalkDisplayName string `json:"dingTalkDisplayName"`
FeiShuDisplayName string `json:"feishuDisplayName"`
}
func (rt *Router) ssoConfigNameGet(c *gin.Context) {
var oidcDisplayName, casDisplayName, oauthDisplayName string
var oidcDisplayName, casDisplayName, oauthDisplayName, dingTalkDisplayName, feiShuDisplayName string
if rt.Sso.OIDC != nil {
oidcDisplayName = rt.Sso.OIDC.GetDisplayName()
}
@@ -478,23 +686,117 @@ func (rt *Router) ssoConfigNameGet(c *gin.Context) {
oauthDisplayName = rt.Sso.OAuth2.GetDisplayName()
}
if rt.Sso.DingTalk != nil {
dingTalkDisplayName = rt.Sso.DingTalk.GetDisplayName()
}
if rt.Sso.FeiShu != nil {
feiShuDisplayName = rt.Sso.FeiShu.GetDisplayName()
}
ginx.NewRender(c).Data(SsoConfigOutput{
OidcDisplayName: oidcDisplayName,
CasDisplayName: casDisplayName,
OauthDisplayName: oauthDisplayName,
OidcDisplayName: oidcDisplayName,
CasDisplayName: casDisplayName,
OauthDisplayName: oauthDisplayName,
DingTalkDisplayName: dingTalkDisplayName,
FeiShuDisplayName: feiShuDisplayName,
}, nil)
}
func (rt *Router) ssoConfigGets(c *gin.Context) {
ginx.NewRender(c).Data(models.SsoConfigGets(rt.Ctx))
var ssoConfigs []models.SsoConfig
lst, err := models.SsoConfigGets(rt.Ctx)
ginx.Dangerous(err)
if len(lst) == 0 {
ginx.NewRender(c).Data(ssoConfigs, nil)
return
}
// TODO: dingTalkExist 为了兼容当前前端配置, 后期单点登陆统一调整后不在预先设置默认内容
dingTalkExist := false
feiShuExist := false
for _, config := range lst {
var ssoReqConfig models.SsoConfig
ssoReqConfig.Id = config.Id
ssoReqConfig.Name = config.Name
ssoReqConfig.UpdateAt = config.UpdateAt
switch config.Name {
case dingtalk.SsoTypeName:
dingTalkExist = true
err := json.Unmarshal([]byte(config.Content), &ssoReqConfig.SettingJson)
ginx.Dangerous(err)
case feishu.SsoTypeName:
feiShuExist = true
err := json.Unmarshal([]byte(config.Content), &ssoReqConfig.SettingJson)
ginx.Dangerous(err)
default:
ssoReqConfig.Content = config.Content
}
ssoConfigs = append(ssoConfigs, ssoReqConfig)
}
// TODO: dingTalkExist 为了兼容当前前端配置, 后期单点登陆统一调整后不在预先设置默认内容
if !dingTalkExist {
var ssoConfig models.SsoConfig
ssoConfig.Name = dingtalk.SsoTypeName
ssoConfigs = append(ssoConfigs, ssoConfig)
}
if !feiShuExist {
var ssoConfig models.SsoConfig
ssoConfig.Name = feishu.SsoTypeName
ssoConfigs = append(ssoConfigs, ssoConfig)
}
ginx.NewRender(c).Data(ssoConfigs, nil)
}
func (rt *Router) ssoConfigUpdate(c *gin.Context) {
var f models.SsoConfig
ginx.BindJSON(c, &f)
var ssoConfig models.SsoConfig
ginx.BindJSON(c, &ssoConfig)
err := f.Update(rt.Ctx)
ginx.Dangerous(err)
switch ssoConfig.Name {
case dingtalk.SsoTypeName:
f.Name = ssoConfig.Name
setting, err := json.Marshal(ssoConfig.SettingJson)
ginx.Dangerous(err)
f.Content = string(setting)
f.UpdateAt = time.Now().Unix()
sso, err := f.Query(rt.Ctx)
if !errors.Is(err, gorm.ErrRecordNotFound) {
ginx.Dangerous(err)
}
if errors.Is(err, gorm.ErrRecordNotFound) {
err = f.Create(rt.Ctx)
} else {
f.Id = sso.Id
err = f.Update(rt.Ctx)
}
ginx.Dangerous(err)
case feishu.SsoTypeName:
f.Name = ssoConfig.Name
setting, err := json.Marshal(ssoConfig.SettingJson)
ginx.Dangerous(err)
f.Content = string(setting)
f.UpdateAt = time.Now().Unix()
sso, err := f.Query(rt.Ctx)
if !errors.Is(err, gorm.ErrRecordNotFound) {
ginx.Dangerous(err)
}
if errors.Is(err, gorm.ErrRecordNotFound) {
err = f.Create(rt.Ctx)
} else {
f.Id = sso.Id
err = f.Update(rt.Ctx)
}
ginx.Dangerous(err)
default:
f.Id = ssoConfig.Id
f.Name = ssoConfig.Name
f.Content = ssoConfig.Content
err := f.Update(rt.Ctx)
ginx.Dangerous(err)
}
switch f.Name {
case "LDAP":
@@ -518,6 +820,22 @@ func (rt *Router) ssoConfigUpdate(c *gin.Context) {
err := toml.Unmarshal([]byte(f.Content), &config)
ginx.Dangerous(err)
rt.Sso.OAuth2.Reload(config)
case dingtalk.SsoTypeName:
var config dingtalk.Config
err := json.Unmarshal([]byte(f.Content), &config)
ginx.Dangerous(err)
if rt.Sso.DingTalk == nil {
rt.Sso.DingTalk = dingtalk.New(config)
}
rt.Sso.DingTalk.Reload(config)
case feishu.SsoTypeName:
var config feishu.Config
err := json.Unmarshal([]byte(f.Content), &config)
ginx.Dangerous(err)
if rt.Sso.FeiShu == nil {
rt.Sso.FeiShu = feishu.New(config)
}
rt.Sso.FeiShu.Reload(config)
}
ginx.NewRender(c).Message(nil)

View File

@@ -12,10 +12,10 @@ import (
"github.com/ccfos/nightingale/v6/pkg/slice"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pkg/tplx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) messageTemplatesAdd(c *gin.Context) {
@@ -154,6 +154,7 @@ func (rt *Router) messageTemplatesGet(c *gin.Context) {
lst, err := models.MessageTemplatesGetBy(rt.Ctx, notifyChannelIdents)
ginx.Dangerous(err)
models.FillUpdateByNicknames(rt.Ctx, lst)
if me.IsAdmin() {
ginx.NewRender(c).Data(lst, nil)
@@ -193,10 +194,9 @@ func (rt *Router) eventsMessage(c *gin.Context) {
events[i] = he.ToCur()
}
var defs = []string{
"{{$events := .}}",
"{{$event := index . 0}}",
}
renderData := make(map[string]interface{})
renderData["events"] = events
defs := models.GetDefs(renderData)
ret := make(map[string]string, len(req.Tpl.Content))
for k, v := range req.Tpl.Content {
text := strings.Join(append(defs, v), "")
@@ -207,7 +207,7 @@ func (rt *Router) eventsMessage(c *gin.Context) {
}
var buf bytes.Buffer
err = tpl.Execute(&buf, events)
err = tpl.Execute(&buf, renderData)
if err != nil {
ret[k] = err.Error()
continue

View File

@@ -2,9 +2,9 @@ package router
import (
"github.com/ccfos/nightingale/v6/center/cconf"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) metricsDescGetFile(c *gin.Context) {

View File

@@ -4,9 +4,9 @@ import (
"net/http"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
// no param

View File

@@ -1,7 +1,6 @@
package router
import (
"math"
"net/http"
"strings"
"time"
@@ -10,15 +9,22 @@ import (
"github.com/ccfos/nightingale/v6/alert/mute"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
)
// Return all, front-end search and paging
func (rt *Router) alertMuteGetsByBG(c *gin.Context) {
bgid := ginx.UrlParamInt64(c, "id")
lst, err := models.AlertMuteGetsByBG(rt.Ctx, bgid)
prods := strings.Fields(ginx.QueryStr(c, "prods", ""))
query := ginx.QueryStr(c, "query", "")
expired := ginx.QueryInt(c, "expired", -1)
lst, err := models.AlertMuteGets(rt.Ctx, prods, bgid, -1, expired, query)
if err == nil {
models.FillUpdateByNicknames(rt.Ctx, lst)
}
ginx.NewRender(c).Data(lst, err)
}
@@ -44,6 +50,9 @@ func (rt *Router) alertMuteGetsByGids(c *gin.Context) {
}
lst, err := models.AlertMuteGetsByBGIds(rt.Ctx, gids)
if err == nil {
models.FillUpdateByNicknames(rt.Ctx, lst)
}
ginx.NewRender(c).Data(lst, err)
}
@@ -53,11 +62,20 @@ func (rt *Router) alertMuteGets(c *gin.Context) {
bgid := ginx.QueryInt64(c, "bgid", -1)
query := ginx.QueryStr(c, "query", "")
disabled := ginx.QueryInt(c, "disabled", -1)
lst, err := models.AlertMuteGets(rt.Ctx, prods, bgid, disabled, query)
expired := ginx.QueryInt(c, "expired", -1)
lst, err := models.AlertMuteGets(rt.Ctx, prods, bgid, disabled, expired, query)
if err == nil {
models.FillUpdateByNicknames(rt.Ctx, lst)
}
ginx.NewRender(c).Data(lst, err)
}
func (rt *Router) activeAlertMuteGets(c *gin.Context) {
lst, err := models.AlertMuteGetsAll(rt.Ctx)
ginx.NewRender(c).Data(lst, err)
}
func (rt *Router) alertMuteAdd(c *gin.Context) {
var f models.AlertMute
@@ -67,18 +85,21 @@ func (rt *Router) alertMuteAdd(c *gin.Context) {
f.CreateBy = username
f.UpdateBy = username
f.GroupId = ginx.UrlParamInt64(c, "id")
ginx.NewRender(c).Message(f.Add(rt.Ctx))
ginx.Dangerous(f.Add(rt.Ctx))
ginx.NewRender(c).Data(f.Id, nil)
}
type MuteTestForm struct {
EventId int64 `json:"event_id" binding:"required"`
AlertMute models.AlertMute `json:"mute_config" binding:"required"`
EventId int64 `json:"event_id" binding:"required"`
AlertMute models.AlertMute `json:"config" binding:"required"`
PassTimeCheck bool `json:"pass_time_check"`
}
func (rt *Router) alertMuteTryRun(c *gin.Context) {
var f MuteTestForm
ginx.BindJSON(c, &f)
ginx.Dangerous(f.AlertMute.Verify())
hisEvent, err := models.AlertHisEventGetById(rt.Ctx, f.EventId)
ginx.Dangerous(err)
@@ -90,18 +111,30 @@ func (rt *Router) alertMuteTryRun(c *gin.Context) {
curEvent := *hisEvent.ToCur()
curEvent.SetTagsMap()
// 绕过时间范围检查设置时间范围为全量0 到 int64 最大值),仅验证其他匹配条件(如标签、策略类型等)
f.AlertMute.MuteTimeType = models.TimeRange
f.AlertMute.Btime = 0 // 最小可能值(如 Unix 时间戳起点)
f.AlertMute.Etime = math.MaxInt64 // 最大可能值int64 上限)
if f.PassTimeCheck {
f.AlertMute.MuteTimeType = models.Periodic
f.AlertMute.PeriodicMutesJson = []models.PeriodicMute{
{
EnableDaysOfWeek: "0 1 2 3 4 5 6",
EnableStime: "00:00",
EnableEtime: "00:00",
},
}
}
if !mute.MatchMute(&curEvent, &f.AlertMute) {
ginx.NewRender(c).Data("not match", nil)
match, err := mute.MatchMute(&curEvent, &f.AlertMute)
if err != nil {
// 对错误信息进行 i18n 翻译
translatedErr := i18n.Sprintf(c.GetHeader("X-Language"), err.Error())
ginx.Bomb(http.StatusBadRequest, translatedErr)
}
if !match {
ginx.NewRender(c).Data("event not match mute", nil)
return
}
ginx.NewRender(c).Data("mute test match", nil)
ginx.NewRender(c).Data("event match mute", nil)
}
// Preview events (alert_cur_event) that match the mute strategy based on the following criteria:

View File

@@ -11,11 +11,11 @@ import (
"github.com/ccfos/nightingale/v6/center/cstats"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/golang-jwt/jwt"
"github.com/google/uuid"
"github.com/toolkits/pkg/ginx"
)
const (
@@ -453,6 +453,30 @@ func (rt *Router) wrapJwtKey(key string) string {
return rt.HTTP.JWTAuth.RedisKeyPrefix + key
}
func (rt *Router) wrapIdTokenKey(userId int64) string {
return fmt.Sprintf("n9e_id_token_%d", userId)
}
// saveIdToken 保存用户的 id_token 到 Redis
func (rt *Router) saveIdToken(ctx context.Context, userId int64, idToken string) error {
if idToken == "" {
return nil
}
// id_token 的过期时间应该与 RefreshToken 保持一致,确保在整个会话期间都可用于登出
expiration := time.Minute * time.Duration(rt.HTTP.JWTAuth.RefreshExpired)
return rt.Redis.Set(ctx, rt.wrapIdTokenKey(userId), idToken, expiration).Err()
}
// fetchIdToken 从 Redis 获取用户的 id_token
func (rt *Router) fetchIdToken(ctx context.Context, userId int64) (string, error) {
return rt.Redis.Get(ctx, rt.wrapIdTokenKey(userId)).Result()
}
// deleteIdToken 从 Redis 删除用户的 id_token
func (rt *Router) deleteIdToken(ctx context.Context, userId int64) error {
return rt.Redis.Del(ctx, rt.wrapIdTokenKey(userId)).Err()
}
type TokenDetails struct {
AccessToken string
RefreshToken string

View File

@@ -6,9 +6,9 @@ import (
"github.com/ccfos/nightingale/v6/alert/sender"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
@@ -33,7 +33,7 @@ type Record struct {
// notificationRecordAdd
func (rt *Router) notificationRecordAdd(c *gin.Context) {
var req []*models.NotificaitonRecord
var req []*models.NotificationRecord
ginx.BindJSON(c, &req)
err := sender.PushNotifyRecords(req)
ginx.Dangerous(err, 429)
@@ -43,14 +43,14 @@ func (rt *Router) notificationRecordAdd(c *gin.Context) {
func (rt *Router) notificationRecordList(c *gin.Context) {
eid := ginx.UrlParamInt64(c, "eid")
lst, err := models.NotificaitonRecordsGetByEventId(rt.Ctx, eid)
lst, err := models.NotificationRecordsGetByEventId(rt.Ctx, eid)
ginx.Dangerous(err)
response := buildNotificationResponse(rt.Ctx, lst)
ginx.NewRender(c).Data(response, nil)
}
func buildNotificationResponse(ctx *ctx.Context, nl []*models.NotificaitonRecord) NotificationResponse {
func buildNotificationResponse(ctx *ctx.Context, nl []*models.NotificationRecord) NotificationResponse {
response := NotificationResponse{
SubRules: []SubRule{},
Notifies: make(map[string][]Record),

View File

@@ -11,8 +11,8 @@ import (
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) notifyChannelsAdd(c *gin.Context) {
@@ -118,6 +118,9 @@ func (rt *Router) notifyChannelGetBy(c *gin.Context) {
func (rt *Router) notifyChannelsGet(c *gin.Context) {
lst, err := models.NotifyChannelsGet(rt.Ctx, "", nil)
if err == nil {
models.FillUpdateByNicknames(rt.Ctx, lst)
}
ginx.NewRender(c).Data(lst, err)
}
@@ -162,21 +165,6 @@ func (rt *Router) notifyChannelIdentsGet(c *gin.Context) {
ginx.NewRender(c).Data(lst, nil)
}
type flushDutyChannelsResponse struct {
Error struct {
Code string `json:"code"`
Message string `json:"message"`
} `json:"error"`
Data struct {
Items []struct {
ChannelID int `json:"channel_id"`
ChannelName string `json:"channel_name"`
Status string `json:"status"`
} `json:"items"`
Total int `json:"total"`
} `json:"data"`
}
func (rt *Router) flashDutyNotifyChannelsGet(c *gin.Context) {
cid := ginx.UrlParamInt64(c, "id")
nc, err := models.NotifyChannelGet(rt.Ctx, "id = ?", cid)
@@ -196,18 +184,31 @@ func (rt *Router) flashDutyNotifyChannelsGet(c *gin.Context) {
jsonData = []byte(fmt.Sprintf(`{"member_name":"%s","email":"%s","phone":"%s"}`, me.Username, me.Email, me.Phone))
}
items, err := getFlashDutyChannels(nc.RequestConfig.FlashDutyRequestConfig.IntegrationUrl, jsonData)
items, err := getFlashDutyChannels(nc.RequestConfig.FlashDutyRequestConfig.IntegrationUrl, jsonData, time.Duration(nc.RequestConfig.FlashDutyRequestConfig.Timeout)*time.Millisecond)
ginx.Dangerous(err)
ginx.NewRender(c).Data(items, nil)
}
// getFlashDutyChannels 从FlashDuty API获取频道列表
func getFlashDutyChannels(integrationUrl string, jsonData []byte) ([]struct {
type flushDutyChannelsResponse struct {
Error struct {
Code string `json:"code"`
Message string `json:"message"`
} `json:"error"`
Data struct {
Items []FlashDutyChannel `json:"items"`
Total int `json:"total"`
} `json:"data"`
}
type FlashDutyChannel struct {
ChannelID int `json:"channel_id"`
ChannelName string `json:"channel_name"`
Status string `json:"status"`
}, error) {
}
// getFlashDutyChannels 从FlashDuty API获取频道列表
func getFlashDutyChannels(integrationUrl string, jsonData []byte, timeout time.Duration) ([]FlashDutyChannel, error) {
// 解析URL提取baseUrl和参数
baseUrl, integrationKey, err := parseIntegrationUrl(integrationUrl)
if err != nil {
@@ -227,7 +228,9 @@ func getFlashDutyChannels(integrationUrl string, jsonData []byte) ([]struct {
}
req.Header.Set("Content-Type", "application/json")
httpResp, err := (&http.Client{}).Do(req)
httpResp, err := (&http.Client{
Timeout: timeout,
}).Do(req)
if err != nil {
return nil, err
}
@@ -266,3 +269,149 @@ func parseIntegrationUrl(urlStr string) (baseUrl string, integrationKey string,
return host, integrationKey, nil
}
func (rt *Router) pagerDutyNotifyServicesGet(c *gin.Context) {
cid := ginx.UrlParamInt64(c, "id")
nc, err := models.NotifyChannelGet(rt.Ctx, "id = ?", cid)
ginx.Dangerous(err)
if err != nil || nc == nil {
ginx.Bomb(http.StatusNotFound, "notify channel not found")
}
items, err := getPagerDutyServices(nc.RequestConfig.PagerDutyRequestConfig.ApiKey, time.Duration(nc.RequestConfig.PagerDutyRequestConfig.Timeout)*time.Millisecond)
if err != nil {
ginx.Bomb(http.StatusInternalServerError, fmt.Sprintf("failed to get pagerduty services: %v", err))
}
// 服务: []集成,扁平化为服务-集成
var flattenedItems []map[string]string
for _, svc := range items {
for _, integ := range svc.Integrations {
flattenedItems = append(flattenedItems, map[string]string{
"service_id": svc.ID,
"service_name": svc.Name,
"integration_summary": integ.Summary,
"integration_id": integ.ID,
"integration_url": integ.Self,
})
}
}
ginx.NewRender(c).Data(flattenedItems, nil)
}
func (rt *Router) pagerDutyIntegrationKeyGet(c *gin.Context) {
serviceId := ginx.UrlParamStr(c, "service_id")
integrationId := ginx.UrlParamStr(c, "integration_id")
cid := ginx.UrlParamInt64(c, "id")
nc, err := models.NotifyChannelGet(rt.Ctx, "id = ?", cid)
ginx.Dangerous(err)
if err != nil || nc == nil {
ginx.Bomb(http.StatusNotFound, "notify channel not found")
}
integrationUrl := fmt.Sprintf("https://api.pagerduty.com/services/%s/integrations/%s", serviceId, integrationId)
integrationKey, err := getPagerDutyIntegrationKey(integrationUrl, nc.RequestConfig.PagerDutyRequestConfig.ApiKey, time.Duration(nc.RequestConfig.PagerDutyRequestConfig.Timeout)*time.Millisecond)
if err != nil {
ginx.Bomb(http.StatusInternalServerError, fmt.Sprintf("failed to get pagerduty integration key: %v", err))
}
ginx.NewRender(c).Data(map[string]string{
"integration_key": integrationKey,
}, nil)
}
type PagerDutyIntegration struct {
ID string `json:"id"`
IntegrationKey string `json:"integration_key"`
Self string `json:"self"` // integration 的 API URL
Summary string `json:"summary"`
}
type PagerDutyService struct {
Name string `json:"name"`
ID string `json:"id"`
Integrations []PagerDutyIntegration `json:"integrations"`
}
// getPagerDutyServices 从 PagerDuty API 分页获取所有服务及其集成信息
func getPagerDutyServices(apiKey string, timeout time.Duration) ([]PagerDutyService, error) {
const limit = 100 // 每页最大数量
var offset uint // 分页偏移量
var allServices []PagerDutyService
for {
// 构建带分页参数的 URL
url := fmt.Sprintf("https://api.pagerduty.com/services?limit=%d&offset=%d", limit, offset)
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, err
}
req.Header.Set("Authorization", fmt.Sprintf("Token token=%s", apiKey))
req.Header.Set("Accept", "application/vnd.pagerduty+json;version=2")
httpResp, err := (&http.Client{Timeout: timeout}).Do(req)
if err != nil {
return nil, err
}
body, err := io.ReadAll(httpResp.Body)
httpResp.Body.Close()
if err != nil {
return nil, err
}
// 定义包含分页信息的响应结构
var serviceRes struct {
Services []PagerDutyService `json:"services"`
More bool `json:"more"` // 是否还有更多数据
Limit uint `json:"limit"`
Offset uint `json:"offset"`
}
if err := json.Unmarshal(body, &serviceRes); err != nil {
return nil, err
}
allServices = append(allServices, serviceRes.Services...)
// 判断是否还有更多数据
if !serviceRes.More || len(serviceRes.Services) < int(limit) {
break
}
offset += limit // 准备请求下一页
}
return allServices, nil
}
// getPagerDutyIntegrationKey 通过 integration 的 API URL 获取 integration key
func getPagerDutyIntegrationKey(integrationUrl, apiKey string, timeout time.Duration) (string, error) {
req, err := http.NewRequest("GET", integrationUrl, nil)
if err != nil {
return "", err
}
req.Header.Set("Authorization", fmt.Sprintf("Token token=%s", apiKey))
httpResp, err := (&http.Client{
Timeout: timeout,
}).Do(req)
if err != nil {
return "", err
}
defer httpResp.Body.Close()
body, err := io.ReadAll(httpResp.Body)
if err != nil {
return "", err
}
var integRes struct {
Integration struct {
IntegrationKey string `json:"integration_key"`
} `json:"integration"`
}
if err := json.Unmarshal(body, &integRes); err != nil {
return "", err
}
return integRes.Integration.IntegrationKey, nil
}

View File

@@ -11,7 +11,7 @@ func TestGetFlashDutyChannels(t *testing.T) {
jsonData := []byte(`{}`)
// 调用被测试的函数
channels, err := getFlashDutyChannels(integrationUrl, jsonData)
channels, err := getFlashDutyChannels(integrationUrl, jsonData, 5000)
fmt.Println(channels, err)
}

View File

@@ -10,10 +10,10 @@ import (
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/tplx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/pelletier/go-toml/v2"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/str"
)
@@ -162,7 +162,7 @@ func (rt *Router) notifyConfigPut(c *gin.Context) {
ginx.Bomb(200, "key %s can not modify", f.Ckey)
}
username := c.MustGet("username").(string)
//insert or update build-in config
//insert or update built-in config
ginx.Dangerous(models.ConfigsSetWithUname(rt.Ctx, f.Ckey, f.Cval, username))
if f.Ckey == models.SMTP {
// 重置邮件发送器
@@ -219,8 +219,8 @@ func (rt *Router) notifyChannelConfigGets(c *gin.Context) {
id := ginx.QueryInt64(c, "id", 0)
name := ginx.QueryStr(c, "name", "")
ident := ginx.QueryStr(c, "ident", "")
eabled := ginx.QueryInt(c, "eabled", -1)
enabled := ginx.QueryInt(c, "enabled", -1)
notifyChannels, err := models.NotifyChannelGets(rt.Ctx, id, name, ident, eabled)
notifyChannels, err := models.NotifyChannelGets(rt.Ctx, id, name, ident, enabled)
ginx.NewRender(c).Data(notifyChannels, err)
}

View File

@@ -6,12 +6,13 @@ import (
"time"
"github.com/ccfos/nightingale/v6/alert/dispatch"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/slice"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/pkg/errors"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
@@ -117,6 +118,7 @@ func (rt *Router) notifyRulesGet(c *gin.Context) {
lst, err := models.NotifyRulesGet(rt.Ctx, "", nil)
ginx.Dangerous(err)
models.FillUpdateByNicknames(rt.Ctx, lst)
if me.IsAdmin() {
ginx.NewRender(c).Data(lst, nil)
return
@@ -152,103 +154,138 @@ func (rt *Router) notifyTest(c *gin.Context) {
for _, he := range hisEvents {
event := he.ToCur()
event.SetTagsMap()
if dispatch.NotifyRuleApplicable(&f.NotifyConfig, event) {
events = append(events, event)
if err := dispatch.NotifyRuleMatchCheck(&f.NotifyConfig, event); err != nil {
ginx.Bomb(http.StatusBadRequest, err.Error())
}
events = append(events, event)
}
if len(events) == 0 {
ginx.Bomb(http.StatusBadRequest, "not events applicable")
resp, err := SendNotifyChannelMessage(rt.Ctx, rt.UserCache, rt.UserGroupCache, f.NotifyConfig, events)
if resp == "" {
resp = "success"
}
ginx.NewRender(c).Data(resp, err)
}
func SendNotifyChannelMessage(ctx *ctx.Context, userCache *memsto.UserCacheType, userGroup *memsto.UserGroupCacheType, notifyConfig models.NotifyConfig, events []*models.AlertCurEvent) (string, error) {
notifyChannels, err := models.NotifyChannelGets(ctx, notifyConfig.ChannelID, "", "", -1)
if err != nil {
return "", fmt.Errorf("failed to get notify channels: %v", err)
}
notifyChannels, err := models.NotifyChannelGets(rt.Ctx, f.NotifyConfig.ChannelID, "", "", -1)
ginx.Dangerous(err)
if len(notifyChannels) == 0 {
ginx.Bomb(http.StatusBadRequest, "notify channel not found")
return "", fmt.Errorf("notify channel not found")
}
notifyChannel := notifyChannels[0]
if !notifyChannel.Enable {
ginx.Bomb(http.StatusBadRequest, "notify channel not enabled, please enable it first")
return "", fmt.Errorf("notify channel not enabled, please enable it first")
}
// 获取站点URL用于模板渲染
siteUrl, _ := models.ConfigsGetSiteUrl(ctx)
if siteUrl == "" {
siteUrl = "http://127.0.0.1:17000"
}
tplContent := make(map[string]interface{})
if notifyChannel.RequestType != "flashtudy" {
messageTemplates, err := models.MessageTemplateGets(rt.Ctx, f.NotifyConfig.TemplateID, "", "")
ginx.Dangerous(err)
if len(messageTemplates) == 0 {
ginx.Bomb(http.StatusBadRequest, "message template not found")
if notifyChannel.RequestType != "flashduty" {
messageTemplates, err := models.MessageTemplateGets(ctx, notifyConfig.TemplateID, "", "")
if err != nil {
return "", fmt.Errorf("failed to get message templates: %v", err)
}
tplContent = messageTemplates[0].RenderEvent(events)
}
if len(messageTemplates) == 0 {
return "", fmt.Errorf("message template not found")
}
tplContent = messageTemplates[0].RenderEvent(events, siteUrl)
}
var contactKey string
if notifyChannel.ParamConfig != nil && notifyChannel.ParamConfig.UserInfo != nil {
contactKey = notifyChannel.ParamConfig.UserInfo.ContactKey
}
sendtos, flashDutyChannelIDs, customParams := dispatch.GetNotifyConfigParams(&f.NotifyConfig, contactKey, rt.UserCache, rt.UserGroupCache)
sendtos, flashDutyChannelIDs, pagerDutyRoutingKeys, customParams := dispatch.GetNotifyConfigParams(&notifyConfig, contactKey, userCache, userGroup)
var resp string
switch notifyChannel.RequestType {
case "flashduty":
client, err := models.GetHTTPClient(notifyChannel)
ginx.Dangerous(err)
if err != nil {
return "", fmt.Errorf("failed to get http client: %v", err)
}
for i := range flashDutyChannelIDs {
resp, err = notifyChannel.SendFlashDuty(events, flashDutyChannelIDs[i], client)
if err != nil {
break
return "", fmt.Errorf("failed to send flashduty notify: %v", err)
}
}
logger.Infof("channel_name: %v, event:%+v, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0], tplContent, customParams, resp, err)
ginx.NewRender(c).Data(resp, err)
logger.Infof("channel_name: %v, event:%s, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0].Hash, tplContent, customParams, resp, err)
return resp, nil
case "pagerduty":
client, err := models.GetHTTPClient(notifyChannel)
if err != nil {
return "", fmt.Errorf("failed to get http client: %v", err)
}
for _, routingKey := range pagerDutyRoutingKeys {
resp, err = notifyChannel.SendPagerDuty(events, routingKey, siteUrl, client)
if err != nil {
return "", fmt.Errorf("failed to send pagerduty notify: %v", err)
}
}
logger.Infof("channel_name: %v, event:%s, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0].Hash, tplContent, customParams, resp, err)
return resp, nil
case "http":
client, err := models.GetHTTPClient(notifyChannel)
ginx.Dangerous(err)
if err != nil {
return "", fmt.Errorf("failed to get http client: %v", err)
}
if notifyChannel.RequestConfig == nil {
ginx.Bomb(http.StatusBadRequest, "request config not found")
return "", fmt.Errorf("request config is nil")
}
if notifyChannel.RequestConfig.HTTPRequestConfig == nil {
ginx.Bomb(http.StatusBadRequest, "http request config not found")
return "", fmt.Errorf("http request config is nil")
}
if dispatch.NeedBatchContacts(notifyChannel.RequestConfig.HTTPRequestConfig) || len(sendtos) == 0 {
resp, err = notifyChannel.SendHTTP(events, tplContent, customParams, sendtos, client)
logger.Infof("channel_name: %v, event:%+v, sendtos:%+v, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0], sendtos, tplContent, customParams, resp, err)
logger.Infof("channel_name: %v, event:%s, sendtos:%+v, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0].Hash, sendtos, tplContent, customParams, resp, err)
if err != nil {
logger.Errorf("failed to send http notify: %v", err)
return "", fmt.Errorf("failed to send http notify: %v", err)
}
ginx.NewRender(c).Data(resp, err)
return resp, nil
} else {
for i := range sendtos {
resp, err = notifyChannel.SendHTTP(events, tplContent, customParams, []string{sendtos[i]}, client)
logger.Infof("channel_name: %v, event:%+v, tplContent:%s, customParams:%v, sendto:%+v, respBody: %v, err: %v", notifyChannel.Name, events[0], tplContent, customParams, sendtos[i], resp, err)
logger.Infof("channel_name: %v, event:%s, tplContent:%s, customParams:%v, sendto:%+v, respBody: %v, err: %v", notifyChannel.Name, events[0].Hash, tplContent, customParams, sendtos[i], resp, err)
if err != nil {
logger.Errorf("failed to send http notify: %v", err)
ginx.NewRender(c).Message(err)
return
return "", fmt.Errorf("failed to send http notify: %v", err)
}
}
ginx.NewRender(c).Message(err)
return resp, nil
}
case "smtp":
if len(sendtos) == 0 {
ginx.Bomb(http.StatusBadRequest, "No valid email address in the user and team")
return "", fmt.Errorf("no valid email address in the user and team")
}
err := notifyChannel.SendEmailNow(events, tplContent, sendtos)
ginx.NewRender(c).Message(err)
if err != nil {
return "", fmt.Errorf("failed to send email notify: %v", err)
}
return resp, nil
case "script":
resp, _, err := notifyChannel.SendScript(events, tplContent, customParams, sendtos)
logger.Infof("channel_name: %v, event:%+v, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0], tplContent, customParams, resp, err)
ginx.NewRender(c).Data(resp, err)
logger.Infof("channel_name: %v, event:%s, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0].Hash, tplContent, customParams, resp, err)
return resp, err
default:
logger.Errorf("unsupported request type: %v", notifyChannel.RequestType)
ginx.NewRender(c).Message(errors.New("unsupported request type"))
return "", fmt.Errorf("unsupported request type")
}
}
@@ -302,8 +339,8 @@ func (rt *Router) notifyRuleCustomParamsGet(c *gin.Context) {
filterKey := ""
for key, value := range nc.Params {
// 找到在通知媒介中的自定义变量配置项,进行 cname 转换
cname, exsits := keyMap[key]
if exsits {
cname, exists := keyMap[key]
if exists {
list = append(list, paramList{
Name: key,
CName: cname,

View File

@@ -11,9 +11,9 @@ import (
"github.com/ccfos/nightingale/v6/center/cconf"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/tplx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/str"
)
@@ -25,11 +25,14 @@ func (rt *Router) notifyTplGets(c *gin.Context) {
m[models.EmailSubject] = struct{}{}
lst, err := models.NotifyTplGets(rt.Ctx)
ginx.Dangerous(err)
for i := 0; i < len(lst); i++ {
if _, exists := m[lst[i].Channel]; exists {
lst[i].BuiltIn = true
}
}
models.FillUpdateByNicknames(rt.Ctx, lst)
ginx.NewRender(c).Data(lst, err)
}
@@ -200,6 +203,9 @@ func (rt *Router) messageTemplateGets(c *gin.Context) {
ident := ginx.QueryStr(c, "ident", "")
tpls, err := models.MessageTemplateGets(rt.Ctx, id, name, ident)
if err == nil {
models.FillUpdateByNicknames(rt.Ctx, tpls)
}
ginx.NewRender(c).Data(tpls, err)
}

View File

@@ -0,0 +1,58 @@
package router
import (
"github.com/ccfos/nightingale/v6/datasource/opensearch"
"github.com/ccfos/nightingale/v6/dscache"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/logger"
)
func (rt *Router) QueryOSIndices(c *gin.Context) {
var f IndexReq
ginx.BindJSON(c, &f)
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logger.Warningf("cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
indices, err := plug.(*opensearch.OpenSearch).QueryIndices()
ginx.Dangerous(err)
ginx.NewRender(c).Data(indices, nil)
}
func (rt *Router) QueryOSFields(c *gin.Context) {
var f IndexReq
ginx.BindJSON(c, &f)
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logger.Warningf("cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
fields, err := plug.(*opensearch.OpenSearch).QueryFields([]string{f.Index})
ginx.Dangerous(err)
ginx.NewRender(c).Data(fields, nil)
}
func (rt *Router) QueryOSVariable(c *gin.Context) {
var f FieldValueReq
ginx.BindJSON(c, &f)
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logger.Warningf("cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
fields, err := plug.(*opensearch.OpenSearch).QueryFieldValue([]string{f.Index}, f.Query.Field, f.Query.Query)
ginx.Dangerous(err)
ginx.NewRender(c).Data(fields, nil)
}

View File

@@ -2,21 +2,25 @@ package router
import (
"context"
"crypto/tls"
"fmt"
"net"
"net/http"
"net/http/httputil"
"regexp"
"strconv"
"strings"
"sync"
"time"
"github.com/ccfos/nightingale/v6/pkg/logx"
"github.com/ccfos/nightingale/v6/pkg/poster"
pkgprom "github.com/ccfos/nightingale/v6/pkg/prom"
"github.com/ccfos/nightingale/v6/prom"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/prometheus/common/model"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/net/httplib"
)
type QueryFormItem struct {
@@ -35,15 +39,16 @@ func (rt *Router) promBatchQueryRange(c *gin.Context) {
var f BatchQueryForm
ginx.Dangerous(c.BindJSON(&f))
lst, err := PromBatchQueryRange(rt.PromClients, f)
lst, err := PromBatchQueryRange(c.Request.Context(), rt.PromClients, f)
ginx.NewRender(c).Data(lst, err)
}
func PromBatchQueryRange(pc *prom.PromClientMap, f BatchQueryForm) ([]model.Value, error) {
func PromBatchQueryRange(ctx context.Context, pc *prom.PromClientMap, f BatchQueryForm) ([]model.Value, error) {
var lst []model.Value
cli := pc.GetCli(f.DatasourceId)
if cli == nil {
logx.Warningf(ctx, "no such datasource id: %d", f.DatasourceId)
return lst, fmt.Errorf("no such datasource id: %d", f.DatasourceId)
}
@@ -54,8 +59,9 @@ func PromBatchQueryRange(pc *prom.PromClientMap, f BatchQueryForm) ([]model.Valu
Step: time.Duration(item.Step) * time.Second,
}
resp, _, err := cli.QueryRange(context.Background(), item.Query, r)
resp, _, err := cli.QueryRange(ctx, item.Query, r)
if err != nil {
logx.Warningf(ctx, "query range error: query:%s err:%v", item.Query, err)
return lst, err
}
@@ -78,22 +84,23 @@ func (rt *Router) promBatchQueryInstant(c *gin.Context) {
var f BatchInstantForm
ginx.Dangerous(c.BindJSON(&f))
lst, err := PromBatchQueryInstant(rt.PromClients, f)
lst, err := PromBatchQueryInstant(c.Request.Context(), rt.PromClients, f)
ginx.NewRender(c).Data(lst, err)
}
func PromBatchQueryInstant(pc *prom.PromClientMap, f BatchInstantForm) ([]model.Value, error) {
func PromBatchQueryInstant(ctx context.Context, pc *prom.PromClientMap, f BatchInstantForm) ([]model.Value, error) {
var lst []model.Value
cli := pc.GetCli(f.DatasourceId)
if cli == nil {
logger.Warningf("no such datasource id: %d", f.DatasourceId)
logx.Warningf(ctx, "no such datasource id: %d", f.DatasourceId)
return lst, fmt.Errorf("no such datasource id: %d", f.DatasourceId)
}
for _, item := range f.Queries {
resp, _, err := cli.Query(context.Background(), item.Query, time.Unix(item.Time, 0))
resp, _, err := cli.Query(ctx, item.Query, time.Unix(item.Time, 0))
if err != nil {
logx.Warningf(ctx, "query instant error: query:%s err:%v", item.Query, err)
return lst, err
}
@@ -144,6 +151,8 @@ func (rt *Router) dsProxy(c *gin.Context) {
if ds.AuthJson.BasicAuthUser != "" {
req.SetBasicAuth(ds.AuthJson.BasicAuthUser, ds.AuthJson.BasicAuthPassword)
} else {
req.Header.Del("Authorization")
}
headerCount := len(ds.HTTPJson.Headers)
@@ -163,8 +172,15 @@ func (rt *Router) dsProxy(c *gin.Context) {
transport, has := transportGet(dsId, ds.UpdatedAt)
if !has {
// 使用 TLS 配置(支持 mTLS
tlsConfig, err := ds.HTTPJson.TLS.TLSConfig()
if err != nil {
c.String(http.StatusInternalServerError, "failed to create TLS config: %s", err.Error())
return
}
transport = &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: ds.HTTPJson.TLS.SkipTlsVerify},
TLSClientConfig: tlsConfig,
Proxy: http.ProxyFromEnvironment,
DialContext: (&net.Dialer{
Timeout: time.Duration(ds.HTTPJson.DialTimeout) * time.Millisecond,
@@ -177,7 +193,7 @@ func (rt *Router) dsProxy(c *gin.Context) {
modifyResponse := func(r *http.Response) error {
if r.StatusCode == http.StatusUnauthorized {
logger.Warningf("proxy path:%s unauthorized access ", c.Request.URL.Path)
logx.Warningf(c.Request.Context(), "proxy path:%s unauthorized access ", c.Request.URL.Path)
return fmt.Errorf("unauthorized access")
}
@@ -235,3 +251,94 @@ func transportPut(dsid, updatedat int64, tran http.RoundTripper) {
updatedAts[dsid] = updatedat
transportsLock.Unlock()
}
const (
DatasourceTypePrometheus = "Prometheus"
DatasourceTypeVictoriaMetrics = "VictoriaMetrics"
)
type deleteDatasourceSeriesForm struct {
DatasourceID int64 `json:"datasource_id"`
Match []string `json:"match"`
Start string `json:"start"`
End string `json:"end"`
}
func (rt *Router) deleteDatasourceSeries(c *gin.Context) {
var ddsf deleteDatasourceSeriesForm
ginx.BindJSON(c, &ddsf)
ds := rt.DatasourceCache.GetById(ddsf.DatasourceID)
if ds == nil {
ginx.Bomb(http.StatusBadRequest, "no such datasource")
return
}
// Get datasource type, now only support prometheus and victoriametrics
datasourceType, ok := ds.SettingsJson["prometheus.tsdb_type"]
if !ok {
ginx.Bomb(http.StatusBadRequest, "datasource type not found, please check your datasource settings")
return
}
target, err := ds.HTTPJson.ParseUrl()
if err != nil {
ginx.Bomb(http.StatusInternalServerError, "invalid urls: %s", ds.HTTPJson.GetUrls())
return
}
timeout := time.Duration(ds.HTTPJson.DialTimeout) * time.Millisecond
matchQueries := make([]string, 0)
for _, match := range ddsf.Match {
matchQueries = append(matchQueries, fmt.Sprintf("match[]=%s", match))
}
matchQuery := strings.Join(matchQueries, "&")
switch datasourceType {
case DatasourceTypePrometheus:
// Prometheus delete api need POST method
// https://prometheus.io/docs/prometheus/latest/querying/api/#delete-series
url := fmt.Sprintf("http://%s/api/v1/admin/tsdb/delete_series?%s&start=%s&end=%s", target.Host, matchQuery, ddsf.Start, ddsf.End)
go func() {
resp, _, err := poster.PostJSON(url, timeout, nil)
if err != nil {
logger.Errorf("delete series error datasource_id: %d, datasource_name: %s, match: %s, start: %s, end: %s, err: %v",
ddsf.DatasourceID, ds.Name, ddsf.Match, ddsf.Start, ddsf.End, err)
return
}
logger.Infof("delete datasource series datasource_id: %d, datasource_name: %s, match: %s, start: %s, end: %s, respBody: %s",
ddsf.DatasourceID, ds.Name, ddsf.Match, ddsf.Start, ddsf.End, string(resp))
}()
case DatasourceTypeVictoriaMetrics:
// Delete API doesnt support the deletion of specific time ranges.
// Refer: https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-delete-time-series
var url string
// Check VictoriaMetrics is single node or cluster
// Cluster will have /select/<accountID>/prometheus pattern
re := regexp.MustCompile(`/select/(\d+)/prometheus`)
matches := re.FindStringSubmatch(ds.HTTPJson.Url)
if len(matches) > 0 && matches[1] != "" {
accountID, err := strconv.Atoi(matches[1])
if err != nil {
ginx.Bomb(http.StatusInternalServerError, "invalid accountID: %s", matches[1])
}
url = fmt.Sprintf("http://%s/delete/%d/prometheus/api/v1/admin/tsdb/delete_series?%s", target.Host, accountID, matchQuery)
} else {
url = fmt.Sprintf("http://%s/api/v1/admin/tsdb/delete_series?%s", target.Host, matchQuery)
}
go func() {
resp, err := httplib.Get(url).SetTimeout(timeout).Response()
if err != nil {
logger.Errorf("delete series failed | datasource_id: %d, datasource_name: %s, match: %s, start: %s, end: %s, err: %v",
ddsf.DatasourceID, ds.Name, ddsf.Match, ddsf.Start, ddsf.End, err)
return
}
logger.Infof("sending delete series request | datasource_id: %d, datasource_name: %s, match: %s, start: %s, end: %s, respBody: %s",
ddsf.DatasourceID, ds.Name, ddsf.Match, ddsf.Start, ddsf.End, resp.Body)
}()
default:
ginx.Bomb(http.StatusBadRequest, "not support delete series yet")
}
ginx.NewRender(c).Data(nil, nil)
}

View File

@@ -5,14 +5,17 @@ import (
"sort"
"sync"
"github.com/ccfos/nightingale/v6/alert/eval"
"github.com/ccfos/nightingale/v6/dscache"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/logx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
func CheckDsPerm(c *gin.Context, dsId int64, cate string, q interface{}) bool {
type CheckDsPermFunc func(c *gin.Context, dsId int64, cate string, q interface{}) bool
var CheckDsPerm CheckDsPermFunc = func(c *gin.Context, dsId int64, cate string, q interface{}) bool {
// todo: 后续需要根据 cate 判断是否需要权限
return true
}
@@ -44,6 +47,7 @@ func QueryLogBatchConcurrently(anonymousAccess bool, ctx *gin.Context, f QueryFr
var mu sync.Mutex
var wg sync.WaitGroup
var errs []error
rctx := ctx.Request.Context()
for _, q := range f.Queries {
if !anonymousAccess && !CheckDsPerm(ctx, q.Did, q.DsCate, q) {
@@ -52,20 +56,27 @@ func QueryLogBatchConcurrently(anonymousAccess bool, ctx *gin.Context, f QueryFr
plug, exists := dscache.DsCache.Get(q.DsCate, q.Did)
if !exists {
logger.Warningf("cluster:%d not exists query:%+v", q.Did, q)
logx.Warningf(rctx, "cluster:%d not exists query:%+v", q.Did, q)
return LogResp{}, fmt.Errorf("cluster not exists")
}
// 根据数据源类型对 Query 进行模板渲染处理
err := eval.ExecuteQueryTemplate(q.DsCate, q.Query, nil)
if err != nil {
logx.Warningf(rctx, "query template execute error: %v", err)
return LogResp{}, fmt.Errorf("query template execute error: %v", err)
}
wg.Add(1)
go func(query Query) {
defer wg.Done()
data, total, err := plug.QueryLog(ctx.Request.Context(), query.Query)
data, total, err := plug.QueryLog(rctx, query.Query)
mu.Lock()
defer mu.Unlock()
if err != nil {
errMsg := fmt.Sprintf("query data error: %v query:%v\n ", err, query)
logger.Warningf(errMsg)
logx.Warningf(rctx, "%s", errMsg)
errs = append(errs, err)
return
}
@@ -111,15 +122,16 @@ func QueryDataConcurrently(anonymousAccess bool, ctx *gin.Context, f models.Quer
var mu sync.Mutex
var wg sync.WaitGroup
var errs []error
rctx := ctx.Request.Context()
for _, q := range f.Querys {
for _, q := range f.Queries {
if !anonymousAccess && !CheckDsPerm(ctx, f.DatasourceId, f.Cate, q) {
return nil, fmt.Errorf("forbidden")
}
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logger.Warningf("cluster:%d not exists", f.DatasourceId)
logx.Warningf(rctx, "cluster:%d not exists", f.DatasourceId)
return nil, fmt.Errorf("cluster not exists")
}
@@ -127,18 +139,18 @@ func QueryDataConcurrently(anonymousAccess bool, ctx *gin.Context, f models.Quer
go func(query interface{}) {
defer wg.Done()
datas, err := plug.QueryData(ctx.Request.Context(), query)
data, err := plug.QueryData(rctx, query)
if err != nil {
logger.Warningf("query data error: req:%+v err:%v", query, err)
logx.Warningf(rctx, "query data error: req:%+v err:%v", query, err)
mu.Lock()
errs = append(errs, err)
mu.Unlock()
return
}
logger.Debugf("query data: req:%+v resp:%+v", query, datas)
logx.Debugf(rctx, "query data: req:%+v resp:%+v", query, data)
mu.Lock()
resp = append(resp, datas...)
resp = append(resp, data...)
mu.Unlock()
}(q)
}
@@ -182,15 +194,16 @@ func QueryLogConcurrently(anonymousAccess bool, ctx *gin.Context, f models.Query
var mu sync.Mutex
var wg sync.WaitGroup
var errs []error
rctx := ctx.Request.Context()
for _, q := range f.Querys {
for _, q := range f.Queries {
if !anonymousAccess && !CheckDsPerm(ctx, f.DatasourceId, f.Cate, q) {
return LogResp{}, fmt.Errorf("forbidden")
}
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logger.Warningf("cluster:%d not exists query:%+v", f.DatasourceId, f)
logx.Warningf(rctx, "cluster:%d not exists query:%+v", f.DatasourceId, f)
return LogResp{}, fmt.Errorf("cluster not exists")
}
@@ -198,11 +211,11 @@ func QueryLogConcurrently(anonymousAccess bool, ctx *gin.Context, f models.Query
go func(query interface{}) {
defer wg.Done()
data, total, err := plug.QueryLog(ctx.Request.Context(), query)
logger.Debugf("query log: req:%+v resp:%+v", query, data)
data, total, err := plug.QueryLog(rctx, query)
logx.Debugf(rctx, "query log: req:%+v resp:%+v", query, data)
if err != nil {
errMsg := fmt.Sprintf("query data error: %v query:%v\n ", err, query)
logger.Warningf(errMsg)
logx.Warningf(rctx, "%s", errMsg)
mu.Lock()
errs = append(errs, err)
mu.Unlock()
@@ -240,22 +253,23 @@ func (rt *Router) QueryLogV2(c *gin.Context) {
func (rt *Router) QueryLog(c *gin.Context) {
var f models.QueryParam
ginx.BindJSON(c, &f)
rctx := c.Request.Context()
var resp []interface{}
for _, q := range f.Querys {
for _, q := range f.Queries {
if !rt.Center.AnonymousAccess.PromQuerier && !CheckDsPerm(c, f.DatasourceId, f.Cate, q) {
ginx.Bomb(200, "forbidden")
}
plug, exists := dscache.DsCache.Get("elasticsearch", f.DatasourceId)
if !exists {
logger.Warningf("cluster:%d not exists", f.DatasourceId)
logx.Warningf(rctx, "cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
data, _, err := plug.QueryLog(c.Request.Context(), q)
data, _, err := plug.QueryLog(rctx, q)
if err != nil {
logger.Warningf("query data error: %v", err)
logx.Warningf(rctx, "query data error: %v", err)
ginx.Bomb(200, "err:%v", err)
continue
}

View File

@@ -7,14 +7,17 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) recordingRuleGets(c *gin.Context) {
busiGroupId := ginx.UrlParamInt64(c, "id")
ars, err := models.RecordingRuleGets(rt.Ctx, busiGroupId)
if err == nil {
models.FillUpdateByNicknames(rt.Ctx, ars)
}
ginx.NewRender(c).Data(ars, err)
}
@@ -39,6 +42,9 @@ func (rt *Router) recordingRuleGetsByGids(c *gin.Context) {
}
ars, err := models.RecordingRuleGetsByBGIds(rt.Ctx, gids)
if err == nil {
models.FillUpdateByNicknames(rt.Ctx, ars)
}
ginx.NewRender(c).Data(ars, err)
}
@@ -112,6 +118,7 @@ func (rt *Router) recordingRulePutByFE(c *gin.Context) {
}
rt.bgrwCheck(c, ar.GroupId)
rt.bgroCheck(c, f.GroupId)
f.UpdateBy = c.MustGet("username").(string)
ginx.NewRender(c).Message(ar.Update(rt.Ctx, f))
@@ -149,6 +156,12 @@ func (rt *Router) recordingRulePutFields(c *gin.Context) {
f.Fields["datasource_queries"] = string(bytes)
}
if datasourceIds, ok := f.Fields["datasource_ids"]; ok {
bytes, err := json.Marshal(datasourceIds)
ginx.Dangerous(err)
f.Fields["datasource_ids"] = string(bytes)
}
for i := 0; i < len(f.Ids); i++ {
ar, err := models.RecordingRuleGetById(rt.Ctx, f.Ids[i])
ginx.Dangerous(err)

View File

@@ -6,9 +6,9 @@ import (
"github.com/ccfos/nightingale/v6/center/cconf"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) rolesGets(c *gin.Context) {

View File

@@ -5,8 +5,8 @@ import (
"github.com/ccfos/nightingale/v6/center/cconf"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
)

View File

@@ -0,0 +1,145 @@
package router
import (
"net/http"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/slice"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
)
func (rt *Router) savedViewGets(c *gin.Context) {
page := ginx.QueryStr(c, "page", "")
me := c.MustGet("user").(*models.User)
lst, err := models.SavedViewGets(rt.Ctx, page)
if err != nil {
ginx.NewRender(c).Data(nil, err)
return
}
models.FillUpdateByNicknames(rt.Ctx, lst)
userGids, err := models.MyGroupIds(rt.Ctx, me.Id)
if err != nil {
ginx.NewRender(c).Data(nil, err)
return
}
favoriteMap, err := models.SavedViewFavoriteGetByUserId(rt.Ctx, me.Id)
if err != nil {
ginx.NewRender(c).Data(nil, err)
return
}
favoriteViews := make([]models.SavedView, 0)
normalViews := make([]models.SavedView, 0)
for _, view := range lst {
visible := view.CreateBy == me.Username ||
view.PublicCate == 2 ||
(view.PublicCate == 1 && slice.HaveIntersection[int64](userGids, view.Gids))
if !visible {
continue
}
view.IsFavorite = favoriteMap[view.Id]
// 收藏的排前面
if view.IsFavorite {
favoriteViews = append(favoriteViews, view)
} else {
normalViews = append(normalViews, view)
}
}
ginx.NewRender(c).Data(append(favoriteViews, normalViews...), nil)
}
func (rt *Router) savedViewAdd(c *gin.Context) {
var f models.SavedView
ginx.BindJSON(c, &f)
me := c.MustGet("user").(*models.User)
f.Id = 0
f.CreateBy = me.Username
f.UpdateBy = me.Username
err := models.SavedViewAdd(rt.Ctx, &f)
ginx.NewRender(c).Data(f.Id, err)
}
func (rt *Router) savedViewPut(c *gin.Context) {
id := ginx.UrlParamInt64(c, "id")
view, err := models.SavedViewGetById(rt.Ctx, id)
if err != nil {
ginx.NewRender(c).Data(nil, err)
return
}
if view == nil {
ginx.NewRender(c, http.StatusNotFound).Message("saved view not found")
return
}
me := c.MustGet("user").(*models.User)
// 只有创建者可以更新
if view.CreateBy != me.Username && !me.IsAdmin() {
ginx.NewRender(c, http.StatusForbidden).Message("forbidden")
return
}
var f models.SavedView
ginx.BindJSON(c, &f)
view.Name = f.Name
view.Filter = f.Filter
view.PublicCate = f.PublicCate
view.Gids = f.Gids
err = models.SavedViewUpdate(rt.Ctx, view, me.Username)
ginx.NewRender(c).Message(err)
}
func (rt *Router) savedViewDel(c *gin.Context) {
id := ginx.UrlParamInt64(c, "id")
view, err := models.SavedViewGetById(rt.Ctx, id)
if err != nil {
ginx.NewRender(c).Data(nil, err)
return
}
if view == nil {
ginx.NewRender(c, http.StatusNotFound).Message("saved view not found")
return
}
me := c.MustGet("user").(*models.User)
// 只有创建者或管理员可以删除
if view.CreateBy != me.Username && !me.IsAdmin() {
ginx.NewRender(c, http.StatusForbidden).Message("forbidden")
return
}
err = models.SavedViewDel(rt.Ctx, id)
ginx.NewRender(c).Message(err)
}
func (rt *Router) savedViewFavoriteAdd(c *gin.Context) {
id := ginx.UrlParamInt64(c, "id")
me := c.MustGet("user").(*models.User)
err := models.UserViewFavoriteAdd(rt.Ctx, id, me.Id)
ginx.NewRender(c).Message(err)
}
func (rt *Router) savedViewFavoriteDel(c *gin.Context) {
id := ginx.UrlParamInt64(c, "id")
me := c.MustGet("user").(*models.User)
err := models.UserViewFavoriteDel(rt.Ctx, id, me.Id)
ginx.NewRender(c).Message(err)
}

View File

@@ -5,10 +5,10 @@ import (
"github.com/ccfos/nightingale/v6/pkg/flashduty"
"github.com/ccfos/nightingale/v6/pkg/ormx"
"github.com/ccfos/nightingale/v6/pkg/secu"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/google/uuid"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)

View File

@@ -4,9 +4,9 @@ import (
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) serversGet(c *gin.Context) {

View File

@@ -5,10 +5,10 @@ import (
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/google/uuid"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
// sourceTokenAdd 生成新的源令牌

View File

@@ -11,11 +11,12 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pushgw/idents"
"github.com/ccfos/nightingale/v6/storage"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/prometheus/common/model"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
@@ -37,6 +38,16 @@ func (rt *Router) targetGetsByHostFilter(c *gin.Context) {
total, err := models.TargetCountByFilter(rt.Ctx, query)
ginx.Dangerous(err)
models.FillTargetsBeatTime(rt.Redis, hosts)
now := time.Now().Unix()
for i := 0; i < len(hosts); i++ {
if now-hosts[i].BeatTime < 60 {
hosts[i].TargetUp = 2
} else if now-hosts[i].BeatTime < 180 {
hosts[i].TargetUp = 1
}
}
ginx.NewRender(c).Data(gin.H{
"list": hosts,
"total": total,
@@ -80,9 +91,24 @@ func (rt *Router) targetGets(c *gin.Context) {
models.BuildTargetWhereWithBgids(bgids),
models.BuildTargetWhereWithDsIds(dsIds),
models.BuildTargetWhereWithQuery(query),
models.BuildTargetWhereWithDowntime(downtime),
models.BuildTargetWhereWithHosts(hosts),
}
// downtime 筛选:从缓存获取心跳时间,选择较小的集合用 IN 或 NOT IN 过滤
if downtime != 0 {
downtimeOpt, hasMatch := rt.downtimeFilter(downtime)
if !hasMatch {
ginx.NewRender(c).Data(gin.H{
"list": []*models.Target{},
"total": 0,
}, nil)
return
}
if downtimeOpt != nil {
options = append(options, downtimeOpt)
}
}
total, err := models.TargetTotal(rt.Ctx, options...)
ginx.Dangerous(err)
@@ -101,14 +127,17 @@ func (rt *Router) targetGets(c *gin.Context) {
now := time.Now()
cache := make(map[int64]*models.BusiGroup)
// 从 Redis 补全 BeatTime
models.FillTargetsBeatTime(rt.Redis, list)
var keys []string
for i := 0; i < len(list); i++ {
ginx.Dangerous(list[i].FillGroup(rt.Ctx, cache))
keys = append(keys, models.WrapIdent(list[i].Ident))
if now.Unix()-list[i].UpdateAt < 60 {
if now.Unix()-list[i].BeatTime < 60 {
list[i].TargetUp = 2
} else if now.Unix()-list[i].UpdateAt < 180 {
} else if now.Unix()-list[i].BeatTime < 180 {
list[i].TargetUp = 1
}
}
@@ -147,6 +176,43 @@ func (rt *Router) targetGets(c *gin.Context) {
}, nil)
}
// downtimeFilter 从缓存获取心跳时间,生成 downtime 筛选条件
// 选择匹配集和非匹配集中较小的一方,用 IN 或 NOT IN 来减少 SQL 参数量
// 返回值:
// - option: 筛选条件nil 表示所有 target 都符合条件(无需过滤)
// - hasMatch: 是否有符合条件的 targetfalse 表示无匹配应返回空结果
func (rt *Router) downtimeFilter(downtime int64) (option models.BuildTargetWhereOption, hasMatch bool) {
now := time.Now().Unix()
targets := rt.TargetCache.GetAll()
var matchIdents, nonMatchIdents []string
for _, target := range targets {
matched := false
if downtime > 0 {
matched = target.BeatTime < now-downtime
} else if downtime < 0 {
matched = target.BeatTime > now+downtime
}
if matched {
matchIdents = append(matchIdents, target.Ident)
} else {
nonMatchIdents = append(nonMatchIdents, target.Ident)
}
}
if len(matchIdents) == 0 {
return nil, false
}
if len(nonMatchIdents) == 0 {
return nil, true
}
if len(matchIdents) <= len(nonMatchIdents) {
return models.BuildTargetWhereWithIdents(matchIdents), true
}
return models.BuildTargetWhereExcludeIdents(nonMatchIdents), true
}
func (rt *Router) targetExtendInfoByIdent(c *gin.Context) {
ident := ginx.QueryStr(c, "ident", "")
key := models.WrapExtendIdent(ident)
@@ -601,3 +667,10 @@ func (rt *Router) targetsOfHostQuery(c *gin.Context) {
ginx.NewRender(c).Data(lst, nil)
}
func (rt *Router) targetUpdate(c *gin.Context) {
var f idents.TargetUpdate
ginx.BindJSON(c, &f)
ginx.NewRender(c).Message(rt.IdentSet.UpdateTargets(f.Lst, f.Now))
}

View File

@@ -7,9 +7,9 @@ import (
"github.com/ccfos/nightingale/v6/alert/sender"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
)

View File

@@ -8,9 +8,9 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
"github.com/toolkits/pkg/str"
)
@@ -25,6 +25,7 @@ func (rt *Router) taskTplGets(c *gin.Context) {
list, err := models.TaskTplGets(rt.Ctx, []int64{groupId}, query, limit, ginx.Offset(c, limit))
ginx.Dangerous(err)
models.FillUpdateByNicknames(rt.Ctx, list)
ginx.NewRender(c).Data(gin.H{
"total": total,
@@ -60,6 +61,7 @@ func (rt *Router) taskTplGetsByGids(c *gin.Context) {
list, err := models.TaskTplGets(rt.Ctx, gids, query, limit, ginx.Offset(c, limit))
ginx.Dangerous(err)
models.FillUpdateByNicknames(rt.Ctx, list)
ginx.NewRender(c).Data(gin.H{
"total": total,

View File

@@ -2,13 +2,14 @@ package router
import (
"fmt"
"net/http"
"github.com/ccfos/nightingale/v6/center/cconf"
"github.com/ccfos/nightingale/v6/datasource/tdengine"
"github.com/ccfos/nightingale/v6/dscache"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"net/http"
)
type databasesQueryForm struct {

View File

@@ -0,0 +1,136 @@
package router
import (
"encoding/json"
"fmt"
"io"
"net/http"
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/ccfos/nightingale/v6/pkg/loggrep"
"github.com/toolkits/pkg/logger"
"github.com/gin-gonic/gin"
)
// traceLogsPage renders an HTML log viewer page for trace logs.
func (rt *Router) traceLogsPage(c *gin.Context) {
traceId := ginx.UrlParamStr(c, "traceid")
if !loggrep.IsValidTraceID(traceId) {
c.String(http.StatusBadRequest, "invalid trace id format")
return
}
logs, instance, err := rt.getTraceLogs(traceId)
if err != nil {
c.String(http.StatusInternalServerError, "Error: %v", err)
return
}
c.Header("Content-Type", "text/html; charset=utf-8")
err = loggrep.RenderTraceLogsHTML(c.Writer, loggrep.TraceLogsPageData{
TraceID: traceId,
Instance: instance,
Logs: logs,
Total: len(logs),
})
if err != nil {
c.String(http.StatusInternalServerError, "render error: %v", err)
}
}
// traceLogsJSON returns JSON for trace logs.
func (rt *Router) traceLogsJSON(c *gin.Context) {
traceId := ginx.UrlParamStr(c, "traceid")
if !loggrep.IsValidTraceID(traceId) {
ginx.Bomb(200, "invalid trace id format")
}
logs, instance, err := rt.getTraceLogs(traceId)
ginx.Dangerous(err)
ginx.NewRender(c).Data(loggrep.EventDetailResp{
Logs: logs,
Instance: instance,
}, nil)
}
// getTraceLogs finds the same-engine instances and queries each one
// until trace logs are found. Trace logs belong to a single instance.
func (rt *Router) getTraceLogs(traceId string) ([]string, string, error) {
keyword := "trace_id=" + traceId
instance := fmt.Sprintf("%s:%d", rt.Alert.Heartbeat.IP, rt.HTTP.Port)
engineName := rt.Alert.Heartbeat.EngineName
// try local first
logs, err := loggrep.GrepLatestLogFiles(rt.LogDir, keyword)
if err == nil && len(logs) > 0 {
return logs, instance, nil
}
// find all instances with the same engineName
servers, err := models.AlertingEngineGetsInstances(rt.Ctx,
"engine_cluster = ? and clock > ?",
engineName, time.Now().Unix()-30)
if err != nil {
return nil, "", err
}
// loop through remote instances until we find logs
for _, node := range servers {
if node == instance {
continue // already tried local
}
logs, nodeAddr, err := rt.forwardTraceLogs(node, traceId)
if err != nil {
logger.Errorf("forwardTraceLogs failed: %v", err)
continue
}
if len(logs) > 0 {
return logs, nodeAddr, nil
}
}
return nil, instance, nil
}
func (rt *Router) forwardTraceLogs(node, traceId string) ([]string, string, error) {
url := fmt.Sprintf("http://%s/v1/n9e/trace-logs/%s", node, traceId)
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, node, err
}
for user, pass := range rt.HTTP.APIForService.BasicAuth {
req.SetBasicAuth(user, pass)
break
}
client := &http.Client{Timeout: 15 * time.Second}
resp, err := client.Do(req)
if err != nil {
return nil, node, fmt.Errorf("forward to %s failed: %v", node, err)
}
defer resp.Body.Close()
body, err := io.ReadAll(io.LimitReader(resp.Body, 10*1024*1024))
if err != nil {
return nil, node, err
}
var result struct {
Dat loggrep.EventDetailResp `json:"dat"`
Err string `json:"err"`
}
if err := json.Unmarshal(body, &result); err != nil {
return nil, node, err
}
if result.Err != "" {
return nil, node, fmt.Errorf("%s", result.Err)
}
return result.Dat.Logs, result.Dat.Instance, nil
}

View File

@@ -1,6 +1,7 @@
package router
import (
"fmt"
"net/http"
"strings"
@@ -8,10 +9,11 @@ import (
"github.com/ccfos/nightingale/v6/pkg/flashduty"
"github.com/ccfos/nightingale/v6/pkg/ormx"
"github.com/ccfos/nightingale/v6/pkg/secu"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
"gorm.io/gorm"
)
func (rt *Router) userBusiGroupsGets(c *gin.Context) {
@@ -233,5 +235,239 @@ func (rt *Router) userDel(c *gin.Context) {
return
}
// 如果要删除的用户是 admin 角色,检查是否是最后一个 admin
if target.IsAdmin() {
adminCount, err := models.CountAdminUsers(rt.Ctx)
ginx.Dangerous(err)
if adminCount <= 1 {
ginx.Bomb(http.StatusBadRequest, "Cannot delete the last admin user")
}
}
ginx.NewRender(c).Message(target.Del(rt.Ctx))
}
func (rt *Router) installDateGet(c *gin.Context) {
rootUser, err := models.UserGetByUsername(rt.Ctx, "root")
if err != nil {
logger.Errorf("get root user failed: %v", err)
ginx.NewRender(c).Data(0, nil)
return
}
if rootUser == nil {
logger.Errorf("root user not found")
ginx.NewRender(c).Data(0, nil)
return
}
ginx.NewRender(c).Data(rootUser.CreateAt, nil)
}
// usersPhoneEncrypt 统一手机号加密
func (rt *Router) usersPhoneEncrypt(c *gin.Context) {
users, err := models.UserGetAll(rt.Ctx)
if err != nil {
ginx.NewRender(c).Message(fmt.Errorf("get users failed: %v", err))
return
}
// 获取RSA密钥
_, publicKey, _, err := models.GetRSAKeys(rt.Ctx)
if err != nil {
ginx.NewRender(c).Message(fmt.Errorf("get RSA keys failed: %v", err))
return
}
// 先启用手机号加密功能
err = models.SetPhoneEncryptionEnabled(rt.Ctx, true)
if err != nil {
ginx.NewRender(c).Message(fmt.Errorf("enable phone encryption failed: %v", err))
return
}
// 刷新配置缓存
err = models.RefreshPhoneEncryptionCache(rt.Ctx)
if err != nil {
logger.Errorf("Failed to refresh phone encryption cache: %v", err)
// 回滚配置
models.SetPhoneEncryptionEnabled(rt.Ctx, false)
ginx.NewRender(c).Message(fmt.Errorf("refresh cache failed: %v", err))
return
}
successCount := 0
failCount := 0
var failedUsers []string
// 使用事务处理所有用户的手机号加密
err = models.DB(rt.Ctx).Transaction(func(tx *gorm.DB) error {
// 对每个用户的手机号进行加密
for _, user := range users {
if user.Phone == "" {
continue
}
if isPhoneEncrypted(user.Phone) {
continue
}
encryptedPhone, err := secu.EncryptValue(user.Phone, publicKey)
if err != nil {
logger.Errorf("Failed to encrypt phone for user %s: %v", user.Username, err)
failCount++
failedUsers = append(failedUsers, user.Username)
continue
}
err = tx.Model(&models.User{}).Where("id = ?", user.Id).Update("phone", encryptedPhone).Error
if err != nil {
logger.Errorf("Failed to update phone for user %s: %v", user.Username, err)
failCount++
failedUsers = append(failedUsers, user.Username)
continue
}
successCount++
logger.Debugf("Successfully encrypted phone for user %s", user.Username)
}
// 如果有失败的用户,回滚事务
if failCount > 0 {
return fmt.Errorf("encrypt failed users: %d, failed users: %v", failCount, failedUsers)
}
return nil
})
if err != nil {
// 加密失败,回滚配置
models.SetPhoneEncryptionEnabled(rt.Ctx, false)
models.RefreshPhoneEncryptionCache(rt.Ctx)
ginx.NewRender(c).Message(fmt.Errorf("encrypt phone failed: %v", err))
return
}
ginx.NewRender(c).Data(gin.H{
"success_count": successCount,
"fail_count": failCount,
}, nil)
}
func (rt *Router) usersPhoneDecryptRefresh(c *gin.Context) {
err := models.RefreshPhoneEncryptionCache(rt.Ctx)
if err != nil {
ginx.NewRender(c).Message(fmt.Errorf("refresh phone encryption cache failed: %v", err))
return
}
ginx.NewRender(c).Message(nil)
}
// usersPhoneDecrypt 统一手机号解密
func (rt *Router) usersPhoneDecrypt(c *gin.Context) {
// 先关闭手机号加密功能
err := models.SetPhoneEncryptionEnabled(rt.Ctx, false)
if err != nil {
ginx.NewRender(c).Message(fmt.Errorf("disable phone encryption failed: %v", err))
return
}
// 刷新配置缓存
err = models.RefreshPhoneEncryptionCache(rt.Ctx)
if err != nil {
logger.Errorf("Failed to refresh phone encryption cache: %v", err)
// 回滚配置
models.SetPhoneEncryptionEnabled(rt.Ctx, true)
ginx.NewRender(c).Message(fmt.Errorf("refresh cache failed: %v", err))
return
}
// 获取所有用户(此时加密开关已关闭,直接读取数据库原始数据)
var users []*models.User
err = models.DB(rt.Ctx).Find(&users).Error
if err != nil {
// 回滚配置
models.SetPhoneEncryptionEnabled(rt.Ctx, true)
models.RefreshPhoneEncryptionCache(rt.Ctx)
ginx.NewRender(c).Message(fmt.Errorf("get users failed: %v", err))
return
}
// 获取RSA密钥
privateKey, _, password, err := models.GetRSAKeys(rt.Ctx)
if err != nil {
// 回滚配置
models.SetPhoneEncryptionEnabled(rt.Ctx, true)
models.RefreshPhoneEncryptionCache(rt.Ctx)
ginx.NewRender(c).Message(fmt.Errorf("get RSA keys failed: %v", err))
return
}
successCount := 0
failCount := 0
var failedUsers []string
// 使用事务处理所有用户的手机号解密
err = models.DB(rt.Ctx).Transaction(func(tx *gorm.DB) error {
// 对每个用户的手机号进行解密
for _, user := range users {
if user.Phone == "" {
continue
}
// 检查是否是加密的手机号
if !isPhoneEncrypted(user.Phone) {
continue
}
// 对手机号进行解密
decryptedPhone, err := secu.Decrypt(user.Phone, privateKey, password)
if err != nil {
logger.Errorf("Failed to decrypt phone for user %s: %v", user.Username, err)
failCount++
failedUsers = append(failedUsers, user.Username)
continue
}
// 直接更新数据库中的手机号字段绕过GORM钩子
err = tx.Model(&models.User{}).Where("id = ?", user.Id).Update("phone", decryptedPhone).Error
if err != nil {
logger.Errorf("Failed to update phone for user %s: %v", user.Username, err)
failCount++
failedUsers = append(failedUsers, user.Username)
continue
}
successCount++
logger.Debugf("Successfully decrypted phone for user %s", user.Username)
}
// 如果有失败的用户,回滚事务
if failCount > 0 {
return fmt.Errorf("decrypt failed users: %d, failed users: %v", failCount, failedUsers)
}
return nil
})
if err != nil {
// 解密失败,回滚配置
models.SetPhoneEncryptionEnabled(rt.Ctx, true)
models.RefreshPhoneEncryptionCache(rt.Ctx)
ginx.NewRender(c).Message(fmt.Errorf("decrypt phone failed: %v", err))
return
}
ginx.NewRender(c).Data(gin.H{
"success_count": successCount,
"fail_count": failCount,
}, nil)
}
// isPhoneEncrypted 检查手机号是否已经加密
func isPhoneEncrypted(phone string) bool {
// 检查是否有 "enc:" 前缀标记
return len(phone) > 4 && phone[:4] == "enc:"
}

Some files were not shown because too many files have changed in this diff Show More