Compare commits

...

286 Commits

Author SHA1 Message Date
ning
6bad0fc8c1 code refactor 2025-10-13 12:02:53 +08:00
ning
3a94612792 code refactor 2025-10-13 12:00:29 +08:00
ning
643de04c10 fix sql 2025-10-13 10:36:50 +08:00
Yening Qin
16b3cb1abc feat: support encrypt user phone (#2906)
* feature: add a new configuration option to encrypt user phone numbers in the database (#2902)

Co-authored-by: yuanzaiping_dxm <yuanzaiping@duxiaoman.com>
---------

Co-authored-by: zaipingY <30775871+zaipingy@users.noreply.github.com>
Co-authored-by: yuanzaiping_dxm <yuanzaiping@duxiaoman.com>
2025-10-11 14:37:14 +08:00
ning
32995c1b2d fix: event pipeline insert by PostgreSQL 2025-09-23 16:40:27 +08:00
ning
b4fa36fa0e refactor: update message tpl 2025-09-22 18:18:19 +08:00
ning
f412f82eb8 fix: event value is inf 2025-09-19 19:32:40 +08:00
ning
9da1cd506b fix: datasource sync 2025-09-19 17:41:57 +08:00
ning
99ea838863 refactor: optimize query target 2025-09-19 15:00:07 +08:00
ning
7feb003b72 refactor: change feishucard body 2025-09-19 10:37:58 +08:00
ning
b0a053361f refactor: change log 2025-09-17 20:28:22 +08:00
ning
959f75394b refactor: alert rule api 2025-09-17 12:10:43 +08:00
ning
03e95973b2 refactor: message tpl api 2025-09-16 20:13:25 +08:00
ning
e890705167 refactor: event notify 2025-09-16 19:24:05 +08:00
ning
6716f1bdf1 refactor: update event notify 2025-09-15 20:19:18 +08:00
ning
739b9406a4 Merge branch 'main' of github.com:ccfos/nightingale 2025-09-15 17:37:38 +08:00
ning
77f280d1cc fix: event delete api 2025-09-15 17:37:25 +08:00
pioneerlfn
04fe1b9dd6 for slice, when marshal, return [] instead null (#2878) 2025-09-15 17:28:57 +08:00
ning
552758e0e1 refactor: pushgw writer support async 2025-09-14 15:00:40 +08:00
ning
68bc474c1b refactor: update message tpl 2025-09-11 20:38:17 +08:00
ning
f692035deb refactor: change datasource log 2025-09-11 16:14:59 +08:00
ning
eb441353c3 refactor: message tpl 2025-09-11 15:33:02 +08:00
ning
b606b22ae6 fix: opensearch alert 2025-09-10 22:24:50 +08:00
ning
1de0428860 docs: update init.sql 2025-09-09 18:51:23 +08:00
ning
3d0c288c9f refactor: event api 2025-09-09 16:36:40 +08:00
ning
343814a802 refactor: notify rule update 2025-09-09 11:05:58 +08:00
ning
12e2761467 fix: dingtalk message tpl 2025-09-09 10:18:08 +08:00
Yening Qin
0edd5ee772 refactor: event notify (#2869) 2025-09-08 15:04:33 +08:00
ning
5e430cedc7 fix: build, router_target miss idents 2025-09-03 14:17:15 +08:00
ning
a791a9901e refactor: push ts to kafka 2025-09-03 12:17:32 +08:00
totoro
222cdd76f0 refactor : es support search_after (#2859) 2025-09-03 12:12:07 +08:00
arch3754
ed4e3937e0 feat: add target update (#2853) 2025-09-03 12:05:38 +08:00
Yening Qin
60f9e1c48e refactor: dscache sync add datasource process hook (#2792) 2025-09-02 18:12:36 +08:00
Ulric Qin
276dfe7372 update linux dash 2025-09-02 08:57:36 +08:00
Ulric Qin
4a6dacbe30 add host table ng 2025-09-02 08:54:54 +08:00
Ulric Qin
48eebba11a update linux dashboard 2025-09-02 08:52:58 +08:00
ning
eca82e5ec2 change ops update 2025-09-01 14:24:17 +08:00
Yening Qin
21478fcf3d fix: send http notify retry (#2849) 2025-08-30 01:57:32 +08:00
ulricqin
a87c856299 fix: call flashduty to push event (#2848) 2025-08-30 00:06:53 +08:00
ning
ba035a446d refactor: change some log 2025-08-29 16:32:57 +08:00
ning
bf840e6bb2 docs: update dashboard tpl 2025-08-29 10:14:59 +08:00
ning
cd01092aed refactor: update alert rule import api 2025-08-28 19:44:05 +08:00
ning
e202fd50c8 refactor: datasource api 2025-08-28 16:48:04 +08:00
ning
f0e5062485 refactor: optimize edge ident update 2025-08-28 16:24:33 +08:00
ning
861fe96de5 add UpdateDBTargetTimestampDisable 2025-08-27 19:12:56 +08:00
Ulric Qin
5b66ada96d Merge branch 'main' of https://github.com/ccfos/nightingale 2025-08-27 10:06:19 +08:00
Ulric Qin
d5a98debff upgrade ibex 2025-08-27 10:06:11 +08:00
ning
4977052a67 Merge branch 'main' of github.com:ccfos/nightingale 2025-08-22 15:03:17 +08:00
ning
dcc461e587 refactor: push writer support sync 2025-08-22 15:00:49 +08:00
Ulric Qin
f5ce1733bb update minio dashboard 2025-08-21 18:58:47 +08:00
Ulric Qin
436cf25409 Merge branch 'main' of https://github.com/ccfos/nightingale 2025-08-21 10:34:56 +08:00
Ulric Qin
038f68b0b7 add minio dashboard for new version 2025-08-21 10:34:50 +08:00
ning
96ef1895b7 refactor: event_script_notify_result log add stdin 2025-08-20 14:24:28 +08:00
zjxpsetp
eeaa7b46f1 update es dashboard for categraf version bigger than 0.3.102 2025-08-19 23:43:54 +08:00
zjxpsetp
dc525352f1 Merge remote-tracking branch 'origin/main' 2025-08-19 17:36:12 +08:00
zjxpsetp
98a3fe9375 update jmx dashboard in kubernetes 2025-08-19 17:35:55 +08:00
ning
74b0f802ec Merge branch 'main' of github.com:ccfos/nightingale 2025-08-18 11:19:07 +08:00
ning
85bd3148d5 refactor: add update db metric 2025-08-18 11:18:52 +08:00
ning
0931fa9603 fix: target update ts 2025-08-18 11:18:39 +08:00
zjxpsetp
65cdb2da9e 更新 jmx 的仪表盘,新的jmx Exporter 指标和之前有一些差别 2025-08-17 17:23:39 +08:00
ning
9ad6514af6 refactor: ds query api 2025-08-13 11:50:01 +08:00
ning
302c6549e4 refactor: ds query api 2025-08-13 11:12:57 +08:00
ning
a3122270e6 refactor: ds query api 2025-08-13 11:02:00 +08:00
Yening Qin
1245c453bb refactor: send flashduty (#2824) 2025-08-12 20:55:23 +08:00
Ulric Qin
9c5ccf0c8f fix: update update_at when batch-updating-rules 2025-08-06 20:21:57 +08:00
Ulric Qin
cd468af250 refactor batch updating rules 2025-08-06 17:16:02 +08:00
Ulric Qin
2d3449c0ec code refactor for batch updating 2025-08-06 15:45:36 +08:00
ning
e15bdbce92 refactor: optimize import prom alert rule 2025-08-06 11:09:53 +08:00
ning
3890243d42 fix: new mysql db client 2025-08-04 18:40:04 +08:00
ning
37fb4ee867 add case-insensitive search for builtin payload filtering 2025-08-04 16:31:28 +08:00
ning
6db63eafc1 refactor: change import prom rule 2025-08-01 19:00:06 +08:00
ning
1e9cbfc316 fix: event query log 2025-08-01 16:47:14 +08:00
ning
4f95554fe3 refactor: update msg tpl 2025-07-31 18:08:12 +08:00
ning
8eba9aa92f refactor: update msg tpl 2025-07-31 15:31:29 +08:00
ning
6ba74b8e21 fix: pgsql cross database query 2025-07-31 11:51:27 +08:00
ning
8ea4632681 refactor: update duty user sync 2025-07-28 14:36:08 +08:00
ning
f958f27de1 fix: AlertRuleExists 2025-07-27 13:28:46 +08:00
ning
1bdfa3e032 refactor: update TargetDel 2025-07-27 12:46:22 +08:00
ning
143880cd46 Merge branch 'main' of github.com:ccfos/nightingale 2025-07-25 13:21:55 +08:00
ning
38f0b4f1bb refactor: modify add loki api resp 2025-07-25 13:01:27 +08:00
dependabot[bot]
2bccd5be99 build(deps): bump golang.org/x/oauth2 from 0.23.0 to 0.27.0 (#2793)
Bumps [golang.org/x/oauth2](https://github.com/golang/oauth2) from 0.23.0 to 0.27.0.
- [Commits](https://github.com/golang/oauth2/compare/v0.23.0...v0.27.0)

---
updated-dependencies:
- dependency-name: golang.org/x/oauth2
  dependency-version: 0.27.0
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-07-24 21:21:29 +08:00
ning
7b328b3eaa refactor: update prom rule import 2025-07-24 21:17:12 +08:00
Ulric Qin
8bd5b90e94 fix: support old rule format when importing 2025-07-23 09:36:28 +08:00
ning
96629e284f refactor: add email notify record 2025-07-17 11:45:58 +08:00
ning
67d2875690 fix: update record rule datasource ids api 2025-07-15 16:29:30 +08:00
ning
238895a1f8 refactor: init tpl 2025-07-15 15:52:38 +08:00
ning
fb341b645d refactor: sub alert add host filter 2025-07-15 14:46:09 +08:00
Haobo Zhang
2d84fd8cf3 fix: ai summary customize parameter parse from interface (#2788) 2025-07-14 14:54:15 +08:00
ning
2611f87c41 refactor: drop builtin_components idx_ident 2025-07-11 19:12:24 +08:00
ning
a5b7aa7a26 refactor: drop builtin_components idx_ident 2025-07-11 18:57:03 +08:00
ning
0714a0f8f1 refactor: change log level 2025-07-11 16:41:14 +08:00
ning
063cc750e1 refactor: update notify channel api 2025-07-11 12:25:08 +08:00
ning
b2a912d72f refactor: log level 2025-07-11 12:06:18 +08:00
ning
4ba745f442 fix: alert rule batch update notify rule 2025-07-11 11:38:09 +08:00
smx_Morgan
fa7d46ecad fix: compatible user_token table with postgresql (#2785) 2025-07-10 11:03:33 +08:00
pioneerlfn
a5a43df44f refactor: doris search sql (#2778)
* doris:support search sql with macro

* Update doris.go

---------

Co-authored-by: Yening Qin <710leo@gmail.com>
2025-07-09 21:33:17 +08:00
smx_Morgan
fbf1d68b84 fix: update postgresql init sql (#2784) 2025-07-09 20:53:56 +08:00
ulricqin
ca712f62a4 fix execution of notify script (#2769) 2025-07-06 08:40:13 +08:00
ulricqin
84ee14d21e add img (#2767) 2025-07-03 19:48:39 +08:00
ning
c9cf1cfdd2 refactor: change alert rule update api 2025-07-02 20:32:56 +08:00
ning
9d1c01107f refactor: builtin tpl 2025-07-02 20:12:43 +08:00
ning
7ea31b5c6d refactor: builtin tpl 2025-07-02 19:37:25 +08:00
ning
e8e1c67cc8 refactor: event notify filter 2025-07-02 15:44:02 +08:00
ning
8079bcd288 docs: enbale token auth 2025-07-01 18:38:28 +08:00
ning
33b178ce82 refactor: roles api 2025-07-01 15:37:42 +08:00
ning
28c9cd7b43 refactor: change eval for duration check 2025-06-30 15:10:20 +08:00
ning
b771e8a3e8 refactor: change eval for duration check 2025-06-30 12:06:48 +08:00
Yening Qin
4945e98200 refactor: builtin tpl gets api (#2760) 2025-06-27 19:45:28 +08:00
ning
a938ea3e56 docs: update migrate.sql 2025-06-26 18:44:55 +08:00
ning
25c339025b Merge branch 'main' of github.com:ccfos/nightingale 2025-06-25 18:05:46 +08:00
ning
bb0ee35275 refactor: optimize notify rule check api 2025-06-25 18:05:34 +08:00
Ulric Qin
0fc54ad173 add some icon 2025-06-25 16:57:59 +08:00
Ulric Qin
1f95e2df94 Merge branch 'main' of https://github.com/ccfos/nightingale 2025-06-25 16:52:00 +08:00
Ulric Qin
d2969f34ef update READE. set en version as default 2025-06-25 16:51:52 +08:00
Yening Qin
d9a34959dc feat: support doris and opensearch alert (#2758) 2025-06-25 16:34:08 +08:00
smx_Morgan
bc6ff7f4ba fix : add offset to es date_histogam (#2757) 2025-06-25 15:37:51 +08:00
Asklv
514913a97a feat: support deletion in datasource series. (#2753) 2025-06-23 19:20:50 +08:00
ning
affc610b7b refactor: change rulename tag handling in alert processing 2025-06-23 16:04:37 +08:00
ning
a098d5d39c refactor: update subscribe api 2025-06-20 18:01:01 +08:00
ning
05c3f1e0e4 refactor: update message tpl 2025-06-20 16:31:30 +08:00
ning
d5740164f2 refactor: update mute tryrun api 2025-06-20 14:42:10 +08:00
ning
8c2383c410 refactor: api add i18n 2025-06-20 14:31:03 +08:00
ning
9af024fb99 refactor: api add i18n 2025-06-20 14:23:03 +08:00
ning
12f3cc21e1 refactor: change rule test api 2025-06-19 16:18:30 +08:00
Asklv
0b3bb54eb4 feat: add time cost in alert history. (#2744)
Signed-off-by: Asklv <boironic@gmail.com>
2025-06-18 21:57:53 +08:00
Yening Qin
da813e2b0c refactor: optimize event notify (#2750) 2025-06-18 18:39:33 +08:00
Ulric Qin
50fa2499b7 add some alert_eval log 2025-06-18 15:15:56 +08:00
Ulric Qin
2c5ae5b3a9 delete some info log 2025-06-18 15:14:58 +08:00
ning
522932aeb4 refactor: api auth check 2025-06-17 20:57:32 +08:00
Yening Qin
35ac0ddea5 fix: api for agent auth (#2749) 2025-06-17 20:52:55 +08:00
ning
26fa750309 refactor: event process test api 2025-06-17 11:58:39 +08:00
710leo
1eba607aeb feat: add install date api 2025-06-16 22:46:47 +08:00
ning
6aadd159af fix: optimize api for agent auth 2025-06-16 20:22:24 +08:00
xtan
b6ad87523e feat: support redis password encryption (#2739) 2025-06-16 20:11:28 +08:00
Yening Qin
ea5b6845de refactor: optimize event processor (#2742) 2025-06-16 16:46:53 +08:00
Yening Qin
5ba5096da2 feat: add mute and sub rule tryrun api (#2737) 2025-06-13 18:08:46 +08:00
Yening Qin
85786d985d feat: add ai summary event processor (#2734)
Co-authored-by: Haobo Zhang <43698160+haobo8@users.noreply.github.com>
2025-06-12 11:33:59 +08:00
Yening Qin
cff211364a feat: support postgresql alert (#2732) 2025-06-11 17:43:34 +08:00
Ulric Qin
0190b2b432 Merge branch 'main' of https://github.com/ccfos/nightingale 2025-06-11 11:50:47 +08:00
Ulric Qin
d8081129f1 replace blank in append_tags 2025-06-11 11:50:37 +08:00
ning
66d4d0c494 refactor: event api perm check 2025-06-11 11:37:18 +08:00
ning
d936d57863 refactor: event api perm check 2025-06-11 11:30:27 +08:00
ning
d819691b78 refactor: change event processor api log 2025-06-10 16:54:52 +08:00
ning
6f0b415821 refactor: mysql set default maxQueryRows 2025-06-09 17:36:33 +08:00
ning
f482efd9ce refactor: add alert rule func 2025-06-09 10:13:14 +08:00
ning
b39d5a742e refactor: event pipline tryrun api 2025-06-08 23:03:03 +08:00
ning
59c3d62c6b refactor: mysql datasource param 2025-06-06 19:23:49 +08:00
ning
624ae125d5 Merge branch 'main' of github.com:ccfos/nightingale 2025-06-06 19:08:10 +08:00
ning
b9c822b220 refactor: mysql datasource param 2025-06-06 19:07:42 +08:00
smx_Morgan
c13baf3a9d refactor : add smtp notify test (#2723) 2025-06-06 18:07:49 +08:00
ning
bc46ff1912 fix: original_tags is nil 2025-06-06 17:38:27 +08:00
ning
2f7c76c275 refactor: message tpl add 2025-06-06 15:45:25 +08:00
Yening Qin
1edf305952 feat: support mysql alert (#2725) 2025-06-06 15:26:22 +08:00
Ulric Qin
c026a6d2b2 update README 2025-06-06 08:47:19 +08:00
smx_Morgan
1853e89f7c feat: add alert history events delete api (#2720) 2025-06-05 19:02:46 +08:00
zjxpsetp
a41a00fba3 Merge remote-tracking branch 'origin/main' 2025-06-05 00:00:36 +08:00
zjxpsetp
ceb9a1d7ff update JAVA for jvm dashboard by opentelementry 2025-06-04 23:58:26 +08:00
710leo
0b5223acdb docs: update postgres sql 2025-06-04 23:02:30 +08:00
710leo
4b63c6b4b1 refactor: change event_pipeline column type 2025-06-04 22:51:19 +08:00
zjxpsetp
edd024306a update JAVA for jvm dashboard by opentelementry 2025-06-03 23:43:35 +08:00
ning
cddf5e7d37 refactor: event list api 2025-06-03 18:59:25 +08:00
ning
f07baa276e docs: update sql 2025-06-03 18:54:04 +08:00
Ulric Qin
2c2d5004f4 Merge branch 'main' of https://github.com/ccfos/nightingale 2025-06-03 11:27:44 +08:00
Ulric Qin
9982666e44 update dashboard 2025-06-03 11:27:36 +08:00
ning
2b448f738c refactor: change role ops 2025-06-02 09:34:20 +08:00
ning
e4c258de8e refactor: change user ops 2025-06-02 09:24:37 +08:00
Ulric Qin
4f128a9b44 rename Null to Others in active events page 2025-05-30 12:19:09 +08:00
Ulric Qin
deb85b9c68 update README img 2025-05-30 08:51:00 +08:00
ning
1b84324147 refactor: rm blockEventNotify 2025-05-29 21:49:57 +08:00
ning
c73b66848e fix: cur event api 2025-05-29 20:30:39 +08:00
ning
cd74442819 refactor: add UpdateBy field assignment in alertMuteAdd function 2025-05-29 17:15:58 +08:00
ning
252a8284f9 refactor: update datasource 2025-05-29 11:02:47 +08:00
ning
7d2e998078 refactor: merge 2025-05-29 10:30:54 +08:00
Yening Qin
69582bacdf feat: add source token api 2025-05-29 10:05:49 +08:00
ning
1bede4eeb8 refactor: build event tags 2025-05-28 16:09:53 +08:00
ning
16ed81020a Merge branch 'main' of github.com:ccfos/nightingale 2025-05-28 14:15:37 +08:00
ning
7b020ae238 refactor: datasource init add recover 2025-05-28 14:15:21 +08:00
ning
05eabcf00d refactor: handle ibex 2025-05-28 14:15:07 +08:00
ning
e316842022 fix: ibex after event relabel 2025-05-28 14:14:51 +08:00
Ulric Qin
8b3c4749aa Merge branch 'main' of https://github.com/ccfos/nightingale 2025-05-27 20:17:37 +08:00
Ulric Qin
16be04c3e9 use s3 as default card severity 2025-05-27 20:17:30 +08:00
ning
ccbadba9ff refactor: update send duty 2025-05-27 20:07:28 +08:00
ning
ce5bf2e473 refactor: event processor 2025-05-27 19:44:31 +08:00
Ulric Qin
80cdf9d0bb if eventcard.Severity < 1: set it to 1 2025-05-27 19:34:03 +08:00
ning
7514086ae6 fix: different notify channel use notify script 2025-05-27 14:27:00 +08:00
ning
116f8b1590 Merge branch 'main' of github.com:ccfos/nightingale 2025-05-27 14:14:31 +08:00
ning
0fb4e4b723 refactor: add eval duration 2025-05-27 14:14:16 +08:00
710leo
07fb427eea refactor: update relabel processor 2025-05-26 23:40:27 +08:00
ulricqin
d8f8fed95f Update README.md 2025-05-26 10:21:33 +08:00
ulricqin
f2e0ec10f7 更新 README.md 2025-05-25 13:09:37 +08:00
ulricqin
db467a8811 更新 README.md 2025-05-25 13:05:03 +08:00
Ulric Qin
b839bd3e16 code refactor 2025-05-24 21:45:47 +08:00
Ulric Qin
8033ca590b update README 2025-05-24 21:20:06 +08:00
Ulric Qin
0974f33d16 update README 2025-05-23 19:05:32 +08:00
Ulric Qin
d52a19b1f7 update README 2025-05-23 18:59:18 +08:00
Ulric Qin
f11c4dc87d Merge branch 'main' of https://github.com/ccfos/nightingale 2025-05-23 18:57:22 +08:00
Ulric Qin
d7f3bc8841 update README 2025-05-23 18:57:05 +08:00
ning
2ae8c35a50 refactor: change notify rule list sort 2025-05-23 16:00:27 +08:00
ning
da0697c5ce refactor: event api and event processors 2025-05-23 15:45:09 +08:00
ning
2eff1159e5 refactor: event add notify rule ids 2025-05-23 13:19:48 +08:00
ning
6c19c0adf4 refactor: update AlertCurEvent 2025-05-22 19:28:35 +08:00
ning
5e5525ef57 refactor: update AlertCurEvent 2025-05-22 19:00:57 +08:00
ning
58c2a3cc71 update event db2fe 2025-05-22 17:11:11 +08:00
ning
cef6d5fe49 refactor: alert_aggr_view delete format 2025-05-22 16:32:15 +08:00
ulricqin
49cda8b58a modify alerting aggr verify rules (#2694) 2025-05-22 15:45:36 +08:00
ning
d6a585ccbd refactor: update cur event api 2025-05-21 20:29:50 +08:00
ning
764c254833 fix: AlertAggrView update 2025-05-21 20:11:13 +08:00
ning
c427abdfa3 fix: AlertAggrView update 2025-05-21 20:04:28 +08:00
shardingHe
3749f62adc docs: add config for ntp (#2690) 2025-05-21 16:25:21 +08:00
Yening Qin
f932f93a94 feat: add new processors (#2688) 2025-05-20 18:27:07 +08:00
smx_Morgan
5bbc432db0 feat : add event_Ids to alert-cur-events/list (#2681) 2025-05-20 15:55:45 +08:00
Yening Qin
0712baa6e1 refactor: change TimeSpanMuteStrategy (#2686) 2025-05-20 15:51:37 +08:00
ning
b4d595d5f5 docs: update ops 2025-05-19 17:40:56 +08:00
Yening Qin
95090055e0 refactor: change redis cli timeout (#2684) 2025-05-19 11:12:46 +08:00
smx_Morgan
880b92bf36 fix: telegram notify channel template (#2683) 2025-05-17 21:42:41 +08:00
Yening Qin
744eb44f19 feat: add event pipelines (#2682) 2025-05-16 14:50:13 +08:00
Ulric Qin
6ddc78ea11 refactor n9e-v8 dashboard 2025-05-15 09:56:47 +08:00
Ulric Qin
823568081b update n9e-v8 dashboard 2025-05-15 08:42:56 +08:00
Ulric Qin
2f8e63f821 add some metrics to observe redis operations 2025-05-15 08:27:39 +08:00
Ulric Qin
bdc9fa4638 update target's update_at one by one 2025-05-15 08:01:11 +08:00
Ulric Qin
9e1d69c8b0 refactor pushgw metrics 2025-05-15 07:52:39 +08:00
Ulric Qin
85d8607be8 add some panel for n9e-v8 dashboard 2025-05-15 07:31:27 +08:00
Ulric Qin
ec6a4f134a update target's timestamp in redis support batch 2025-05-15 06:21:29 +08:00
Ulric Qin
798f9e5536 Merge branch 'main' of https://github.com/ccfos/nightingale 2025-05-15 04:43:39 +08:00
Ulric Qin
92095ea89c fix categraf-detail-dashboard: add filter for promql 2025-05-15 04:43:33 +08:00
Yening Qin
eb85c9c78b feat: add alert mute test function 2025-05-14 21:08:26 +08:00
Ulric Qin
bd8bf1cf9e use topk in linux-overview dashboard 2025-05-14 15:53:09 +08:00
Ulric Qin
b27ddf45cf Merge branch 'main' of https://github.com/ccfos/nightingale 2025-05-14 15:17:10 +08:00
Ulric Qin
c8e004ba51 update n9e_v8 dashboard 2025-05-14 15:16:54 +08:00
Yening Qin
eb330f00b2 feat: embedded product api (#2671) 2025-05-14 14:55:50 +08:00
Yening Qin
49d61bbd5d refactor: merge to main (#2670) 2025-05-14 14:46:05 +08:00
Ulric Qin
407a1b61a5 refactor linux dashboard 2025-05-14 11:58:28 +08:00
Ulric Qin
bc8a6f61be refactor node-exporter dashboard 2025-05-14 11:48:23 +08:00
Ulric Qin
94cd9796bf rename some dashboards of Linux 2025-05-13 20:45:24 +08:00
Ulric Qin
c3ee0143b2 refactor os dashboards 2025-05-13 20:43:14 +08:00
Ulric Qin
10d4faae4e refactor os dashboard 2025-05-13 20:21:38 +08:00
Yening Qin
ffac81a2ef fix: alert rule verify (#2668) 2025-05-13 18:53:08 +08:00
Yening Qin
d8d1a454b3 fix: default ds id update (#2664) 2025-05-13 15:39:42 +08:00
Yening Qin
94f9818fd2 docs: update k8s dashboards and fix alert rule name check (#2663) 2025-05-13 14:59:38 +08:00
Asklv
a5d820ddb3 fix: api panic when gomail dial tcp failed. (#2661) 2025-05-12 20:08:37 +08:00
smx_Morgan
da0224d010 fix: Solved the problem of NaN value of prom not parsing json (#2652) 2025-05-12 18:28:57 +08:00
Yening Qin
4a399a23c0 refactor: change log query api 2025-05-12 15:42:18 +08:00
Ulric Qin
95ecc61834 refactor ops and i18n 2025-05-08 18:43:57 +08:00
Ulric Qin
f72e29677f refactor test case 2025-05-08 17:41:28 +08:00
Ulric Qin
f876eb02e2 fix multi role_operation 2025-04-28 17:20:58 +08:00
Ulric Qin
cdcadefb03 Merge branch 'main' of https://github.com/ccfos/nightingale 2025-04-28 17:19:17 +08:00
Ulric Qin
582a3981fb delete Admon role_operation 2025-04-28 17:19:03 +08:00
smx_Morgan
8081c48450 fix :record rule name change is not synchronized (#2636) 2025-04-28 15:14:37 +08:00
Yening Qin
5e7541215a refactor: es add offset query and add es-index-pattern ops 2025-04-27 18:54:36 +08:00
ning
e95b5428b2 Merge branch 'main' of github.com:ccfos/nightingale 2025-04-25 23:32:39 +08:00
ning
8a47088d97 refactor: update datasource api 2025-04-25 23:32:23 +08:00
Ulric Qin
05ba5caf8a code refactor 2025-04-25 19:21:57 +08:00
Ulric Qin
dc7752c2af code refactor 2025-04-25 19:18:39 +08:00
smx_Morgan
a828603406 Fix: Fixed the issue of group synchronization flashduty (#2628) 2025-04-25 15:34:08 +08:00
Yening Qin
c5c4e00ab8 refactor: change query api (#2626) 2025-04-24 14:20:36 +08:00
ning
770e15db39 refactor: datasource model add identifier 2025-04-23 16:13:36 +08:00
ning
5096117b45 refactor: update api for agent auth 2025-04-23 15:53:40 +08:00
Yening Qin
dd3b68e4ab refactor: change notify channel api auth 2025-04-23 15:47:20 +08:00
Yening Qin
85947c08a8 refactor: sync user info to duty (#2615) 2025-04-18 17:07:54 +08:00
Ulric Qin
3f3c815171 set QueueWaterMark to 0.1 2025-04-17 19:30:21 +08:00
smx_Morgan
08f82e899a feat: support user get by emails and phones (#2613) 2025-04-16 19:07:07 +08:00
bowen
043628d4eb feat: add usernames query param for /api/n9e/users 2025-04-16 12:10:39 +08:00
smx_Morgan
ba33512d22 fix: prevent incorrect board matches caused by implicit type casting 2025-04-15 13:21:58 +08:00
YR Chen
a7cf658c1d refactor: allow valid Go template as rule name (#2549)
* fix: allow valid Go template as rule name

* feat: add back `str.Dangerous` check for texts in template

* Update models/alert_rule.go

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

---------

Co-authored-by: Yening Qin <710leo@gmail.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2025-04-14 18:28:42 +08:00
smx_Morgan
b62e6fda04 feat: return value add "avtivate" when get alert-mute (#2599) 2025-04-14 18:09:32 +08:00
Yening Qin
6243f9a05c refactor: update es event index (#2602) 2025-04-14 14:46:15 +08:00
Yening Qin
e8962b5646 refactor: change query tpl func (#2597) 2025-04-11 16:10:25 +08:00
ning
97a4ee2764 es_index_pattern add note 2025-04-10 17:28:06 +08:00
rechardwang
2fdb80f314 docs: add chinese verion rabbitmq dashboard 2025-04-08 22:07:24 +08:00
710leo
c0ab672cf7 change createTokens 2025-04-03 00:19:33 +08:00
Yening Qin
7664c15121 refactor: change get ids (#2583) 2025-04-02 18:50:01 +08:00
Ulric Qin
4059a2022c add logic: SingleLogin 2025-04-02 15:10:31 +08:00
smx_Morgan
e7263680a8 refactor: targets get api 2025-04-01 20:56:40 +08:00
alingse
4a67f7a108 fix: call errors.WithMessage with a nil value error err after check another error decryptErr (#2572) 2025-04-01 17:33:25 +08:00
ning
04ca6c5fd5 fix: auto migrate add cross_cluster_enabled of es_index_pattern 2025-03-27 15:45:47 +08:00
Ulric Qin
747211c78f fix builtin node-exporter-alerting-rules 2025-03-27 08:35:56 +08:00
ning
bf54fac1e8 refactor: change send script notify log 2025-03-26 16:09:35 +08:00
ning
76117ae440 refactor: change send script notify log 2025-03-26 16:00:02 +08:00
Yening Qin
9ad02075c6 refactor: es query add IgnoreUnavailable(true) (#2566) 2025-03-26 11:05:45 +08:00
ning
6d27ff673f docs: add migrate sql 2025-03-25 15:15:53 +08:00
kugarocks
ee4e2b3f7d chore: delete rule worker severity var (#2539) 2025-03-21 18:54:26 +08:00
smx_Morgan
e6de301c65 feat: verify host before add task (#2544)
Co-authored-by: Yening Qin <710leo@gmail.com>
2025-03-21 18:48:06 +08:00
Ulric Qin
d4f5871fba Merge branch 'main' of github.com:ccfos/nightingale 2025-03-21 18:39:05 +08:00
Ulric Qin
c2e61f3741 delete no use configurations in docker directory 2025-03-21 18:38:46 +08:00
Yening Qin
d26df3b331 refactor: es query add log (#2560) 2025-03-21 18:37:45 +08:00
Yening Qin
391c674d21 refactor: add metrics and change crontab (#2559) 2025-03-21 16:29:08 +08:00
241 changed files with 35821 additions and 14051 deletions

4
.gitignore vendored
View File

@@ -58,6 +58,10 @@ _test
.idea
.index
.vscode
.issue
.issue/*
.cursor
.claude
.DS_Store
.cache-loader
.payload

100
README.md
View File

@@ -3,7 +3,7 @@
<img src="doc/img/Nightingale_L_V.png" alt="nightingale - cloud native monitoring" width="100" /></a>
</p>
<p align="center">
<b>开源告警管理专家 一体化的可观测平台</b>
<b>Open-Source Alerting Expert</b>
</p>
<p align="center">
@@ -25,85 +25,91 @@
[English](./README_en.md) | [中文](./README.md)
[English](./README.md) | [中文](./README_zh.md)
## 夜莺 Nightingale 是什么
## 🎯 What is Nightingale
> 夜莺 Nightingale 是什么,解决什么问题?以大家都很熟悉的 Grafana 做个类比Grafana 擅长对接各种各样的数据源,然后提供灵活、强大、好看的可视化面板。夜莺则擅长对接各种多样的数据源,提供灵活、强大、高效的监控告警管理能力。从发展路径和定位来说,夜莺和 Grafana 很像,可以总结为一句话:可视化就用 Grafana监控告警就找夜莺。
>
> 在可视化领域Grafana 是毫无争议的领导者Grafana 在影响力、装机量、用户群、开发者数量等各个维度的数字上相比夜莺都是追赶的榜样。巨无霸往往都是从一个切入点打开局面的Grafana Labs 有了在可视化领域 Grafana 这个王牌,逐步扩展到整个可观测性方向,比如 Logging 维度有 LokiTracing 维度有 TempoProfiling 维度有收购来的 PyroscopeOn-call 维度有同样是收购来的 Grafana-OnCall 项目,还有时序数据库 Mimir、eBPF 采集器 Beyla、OpenTelemetry 采集器 Alloy、前端监控 SDK Faro最终构成了一个完整的可观测性工具矩阵但整个飞轮都是从 Grafana 项目开始转动起来的。
>
>夜莺,则是从监控告警这个切入点打开局面,也逐步横向做了相应扩展,比如夜莺也自研了可视化面板,如果你想有一个 all-in-one 的监控告警+可视化的工具,那么用夜莺也是正确的选择;比如 OnCall 方向,夜莺可以和 [Flashduty SaaS](https://flashcat.cloud/product/flashcat-duty/) 服务无缝的集成;在采集器方向,夜莺有配套的 [Categraf](https://flashcat.cloud/product/categraf),可以一个采集器中管理所有的 exporter并同时支持指标和日志的采集极大减轻工程师维护的采集器数量和工作量这个点太痛了你可能也遇到过业务团队吐槽采集器数量比业务应用进程数量还多的窘况吧
Nightingale is an open-source monitoring project that focuses on alerting. Similar to Grafana, Nightingale also connects with various existing data sources. However, while Grafana emphasizes visualization, Nightingale places greater emphasis on the alerting engine, as well as the processing and distribution of alarms.
夜莺 Nightingale 作为一款开源云原生监控工具,最初由滴滴开发和开源,并于 2022 年 5 月 11 日捐赠予中国计算机学会开源发展委员会CCF ODC为 CCF ODC 成立后接受捐赠的第一个开源项目。在 GitHub 上有超过 10000 颗星,是广受关注和使用的开源监控工具。夜莺的核心研发团队,也是 Open-Falcon 项目原核心研发人员,从 2014 年Open-Falcon 是 2014 年开源)算起来,也有 10 年了,只为把监控做到极致。
> The Nightingale project was initially developed and open-sourced by DiDi.inc. On May 11, 2022, it was donated to the Open Source Development Committee of the China Computer Federation (CCF ODC).
![](https://n9e.github.io/img/global/arch-bg.png)
## 快速开始
- 👉 [文档中心](https://flashcat.cloud/docs/) | [下载中心](https://flashcat.cloud/download/nightingale/)
- ❤️ [报告 Bug](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=&projects=&template=question.yml)
- 为了提供更快速的访问体验,上述文档和下载站点托管于 [FlashcatCloud](https://flashcat.cloud)
- 💡 前后端代码分离,前端代码仓库:[https://github.com/n9e/fe](https://github.com/n9e/fe)
## 💡 How Nightingale Works
## 功能特点
Many users have already collected metrics and log data. In this case, you can connect your storage repositories (such as VictoriaMetrics, ElasticSearch, etc.) as data sources in Nightingale. This allows you to configure alerting rules and notification rules within Nightingale, enabling the generation and distribution of alarms.
- 对接多种时序库,实现统一监控告警管理:支持对接的时序库包括 Prometheus、VictoriaMetrics、Thanos、Mimir、M3DB、TDengine 等。
- 对接日志库,实现针对日志的监控告警:支持对接的日志库包括 ElasticSearch、Loki 等。
- 专业告警能力:内置支持多种告警规则,可以扩展支持常见通知媒介,支持告警屏蔽/抑制/订阅/自愈、告警事件管理。
- 高性能可视化引擎:支持多种图表样式,内置众多 Dashboard 模版,也可导入 Grafana 模版,开箱即用,开源协议商业友好。
- 支持常见采集器:支持 [Categraf](https://flashcat.cloud/product/categraf)、Telegraf、Grafana-agent、Datadog-agent、各种 Exporter 作为采集器,没有什么数据是不能监控的。
- 👀无缝搭配 [Flashduty](https://flashcat.cloud/product/flashcat-duty/)实现告警聚合收敛、认领、升级、排班、IM集成确保告警处理不遗漏减少打扰高效协同。
![Nightingale Product Architecture](doc/img/readme/20240221152601.png)
Nightingale itself does not provide monitoring data collection capabilities. We recommend using [Categraf](https://github.com/flashcatcloud/categraf) as the collector, which integrates seamlessly with Nightingale.
## 截图演示
[Categraf](https://github.com/flashcatcloud/categraf) can collect monitoring data from operating systems, network devices, various middleware, and databases. It pushes this data to Nightingale via the `Prometheus Remote Write` protocol. Nightingale then stores the monitoring data in a time-series database (such as Prometheus, VictoriaMetrics, etc.) and provides alerting and visualization capabilities.
For certain edge data centers with poor network connectivity to the central Nightingale server, we offer a distributed deployment mode for the alerting engine. In this mode, even if the network is disconnected, the alerting functionality remains unaffected.
你可以在页面的右上角,切换语言和主题,目前我们支持英语、简体中文、繁体中文。
![Edge Deployment Mode](doc/img/readme/20240222102119.png)
![语言切换](doc/img/readme/n9e-switch-i18n.png)
> In the above diagram, Data Center A has a good network with the central data center, so it uses the Nightingale process in the central data center as the alerting engine. Data Center B has a poor network with the central data center, so it deploys `n9e-edge` as the alerting engine to handle alerting for its own data sources.
即时查询,类似 Prometheus 内置的查询分析页面,做 ad-hoc 查询,夜莺做了一些 UI 优化,同时提供了一些内置 promql 指标,让不太了解 promql 的用户也可以快速查询。
## 🔕 Alert Noise Reduction, Escalation, and Collaboration
![即时查询](doc/img/readme/20240513103305.png)
Nightingale focuses on being an alerting engine, responsible for generating alarms and flexibly distributing them based on rules. It supports 20 built-in notification medias (such as phone calls, SMS, email, DingTalk, Slack, etc.).
当然,也可以直接通过指标视图查看,有了指标视图,即时查询基本可以不用了,或者只有高端玩家使用即时查询,普通用户直接通过指标视图查询即可。
If you have more advanced requirements, such as:
- Want to consolidate events from multiple monitoring systems into one platform for unified noise reduction, response handling, and data analysis.
- Want to support personnel scheduling, practice on-call culture, and support alert escalation (to avoid missing alerts) and collaborative handling.
![指标视图](doc/img/readme/20240513103530.png)
Then Nightingale is not suitable. It is recommended that you choose on-call products such as PagerDuty and FlashDuty. These products are simple and easy to use.
夜莺内置了常用仪表盘,可以直接导入使用。也可以导入 Grafana 仪表盘,不过只能兼容 Grafana 基本图表,如果已经习惯了 Grafana 建议继续使用 Grafana 看图,把夜莺作为一个告警引擎使用。
## 🗨️ Communication Channels
![内置仪表盘](doc/img/readme/20240513103628.png)
- **Report Bugs:** It is highly recommended to submit issues via the [Nightingale GitHub Issue tracker](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Fbug&projects=&template=bug_report.yml).
- **Documentation:** For more information, we recommend thoroughly browsing the [Nightingale Documentation Site](https://n9e.github.io/).
除了内置的仪表盘,也内置了很多告警规则,开箱即用。
## 🔑 Key Features
![内置告警规则](doc/img/readme/20240513103825.png)
![Nightingale Alerting rules](doc/img/readme/2025-05-23_18-43-37.png)
- Nightingale supports alerting rules, mute rules, subscription rules, and notification rules. It natively supports 20 types of notification media and allows customization of message templates.
- It supports event pipelines for Pipeline processing of alarms, facilitating automated integration with in-house systems. For example, it can append metadata to alarms or perform relabeling on events.
- It introduces the concept of business groups and a permission system to manage various rules in a categorized manner.
- Many databases and middleware come with built-in alert rules that can be directly imported and used. It also supports direct import of Prometheus alerting rules.
- It supports alerting self-healing, which automatically triggers a script to execute predefined logic after an alarm is generated—such as cleaning up disk space or capturing the current system state.
![Nightingale Alarm Dashboard](doc/img/readme/2025-05-30_08-49-28.png)
## 产品架构
- Nightingale archives historical alarms and supports multi-dimensional query and statistics.
- It supports flexible aggregation grouping, allowing a clear view of the distribution of alarms across the company.
社区使用夜莺最多的场景就是使用夜莺做告警引擎,对接多套时序库,统一告警规则管理。绘图仍然使用 Grafana 居多。作为一个告警引擎,夜莺的产品架构如下:
![Nightingale Integration Center](doc/img/readme/2025-05-23_18-46-06.png)
![产品架构](doc/img/readme/20240221152601.png)
- Nightingale has built-in metric descriptions, dashboards, and alerting rules for common operating systems, middleware, and databases, which are contributed by the community with varying quality.
- It directly receives data via multiple protocols such as Remote Write, OpenTSDB, Datadog, and Falcon, integrates with various Agents.
- It supports data sources like Prometheus, ElasticSearch, Loki, ClickHouse, MySQL, Postgres, allowing alerting based on data from these sources.
- Nightingale can be easily embedded into internal enterprise systems (e.g. Grafana, CMDB), and even supports configuring menu visibility for these embedded systems.
对于个别边缘机房,如果和中心夜莺服务端网络链路不好,希望提升告警可用性,我们也提供边缘机房告警引擎下沉部署模式,这个模式下,即便网络割裂,告警功能也不受影响。
![Nightingale dashboards](doc/img/readme/2025-05-23_18-49-02.png)
![边缘部署模式](doc/img/readme/20240222102119.png)
- Nightingale supports dashboard functionality, including common chart types, and comes with pre-built dashboards. The image above is a screenshot of one of these dashboards.
- If you are already accustomed to Grafana, it is recommended to continue using Grafana for visualization, as Grafana has deeper expertise in this area.
- For machine-related monitoring data collected by Categraf, it is advisable to use Nightingale's built-in dashboards for viewing. This is because Categraf's metric naming follows Telegraf's convention, which differs from that of Node Exporter.
- Due to Nightingale's concept of business groups (where machines can belong to different groups), there may be scenarios where you only want to view machines within the current business group on the dashboard. Thus, Nightingale's dashboards can be linked with business groups for interactive filtering.
## 🌟 Stargazers over time
## 交流渠道
- 报告Bug优先推荐提交[夜莺GitHub Issue](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Fbug&projects=&template=bug_report.yml)
- 推荐完整浏览[夜莺文档站点](https://flashcat.cloud/docs/content/flashcat-monitor/nightingale-v7/introduction/),了解更多信息
- 加我微信:`picobyte`(我已关闭好友验证)拉入微信群,备注:`夜莺互助群`
## 广受关注
[![Stargazers over time](https://api.star-history.com/svg?repos=ccfos/nightingale&type=Date)](https://star-history.com/#ccfos/nightingale&Date)
## 社区共建
- ❇️ 请阅读浏览[夜莺开源项目和社区治理架构草案](./doc/community-governance.md),真诚欢迎每一位用户、开发者、公司以及组织,使用夜莺监控、积极反馈 Bug、提交功能需求、分享最佳实践共建专业、活跃的夜莺开源社区。
- ❤️ 夜莺贡献者
## 🔥 Users
![User Logos](doc/img/readme/logos.png)
## 🤝 Community Co-Building
- ❇️ Please read the [Nightingale Open Source Project and Community Governance Draft](./doc/community-governance.md). We sincerely welcome every user, developer, company, and organization to use Nightingale, actively report bugs, submit feature requests, share best practices, and help build a professional and active open-source community.
- ❤️ Nightingale Contributors
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
<img src="https://contrib.rocks/image?repo=ccfos/nightingale" />
</a>
## License
## 📜 License
- [Apache License V2.0](https://github.com/didi/nightingale/blob/main/LICENSE)

View File

@@ -1,113 +0,0 @@
<p align="center">
<a href="https://github.com/ccfos/nightingale">
<img src="doc/img/Nightingale_L_V.png" alt="nightingale - cloud native monitoring" width="100" /></a>
</p>
<p align="center">
<b>Open-source Alert Management Expert, an Integrated Observability Platform</b>
</p>
<p align="center">
<a href="https://flashcat.cloud/docs/">
<img alt="Docs" src="https://img.shields.io/badge/docs-get%20started-brightgreen"/></a>
<a href="https://hub.docker.com/u/flashcatcloud">
<img alt="Docker pulls" src="https://img.shields.io/docker/pulls/flashcatcloud/nightingale"/></a>
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
<img alt="GitHub contributors" src="https://img.shields.io/github/contributors-anon/ccfos/nightingale"/></a>
<img alt="GitHub Repo stars" src="https://img.shields.io/github/stars/ccfos/nightingale">
<img alt="GitHub forks" src="https://img.shields.io/github/forks/ccfos/nightingale">
<br/><img alt="GitHub Repo issues" src="https://img.shields.io/github/issues/ccfos/nightingale">
<img alt="GitHub Repo issues closed" src="https://img.shields.io/github/issues-closed/ccfos/nightingale">
<img alt="GitHub latest release" src="https://img.shields.io/github/v/release/ccfos/nightingale"/>
<img alt="License" src="https://img.shields.io/badge/license-Apache--2.0-blue"/>
<a href="https://n9e-talk.slack.com/">
<img alt="GitHub contributors" src="https://img.shields.io/badge/join%20slack-%23n9e-brightgreen.svg"/></a>
</p>
[English](./README_en.md) | [中文](./README.md)
## What is Nightingale
Nightingale is an open-source project focused on alerting. Similar to Grafana's data source integration approach, Nightingale also connects with various existing data sources. However, while Grafana focuses on visualization, Nightingale focuses on alerting engines.
Originally developed and open-sourced by Didi, Nightingale was donated to the China Computer Federation Open Source Development Committee (CCF ODC) on May 11, 2022, becoming the first open-source project accepted by the CCF ODC after its establishment.
## Quick Start
- 👉 [Documentation](https://flashcat.cloud/docs/) | [Download](https://flashcat.cloud/download/nightingale/)
- ❤️ [Report a Bug](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=&projects=&template=question.yml)
- For faster access, the above documentation and download sites are hosted on [FlashcatCloud](https://flashcat.cloud).
## Features
- **Integration with Multiple Time-Series Databases:** Supports integration with various time-series databases such as Prometheus, VictoriaMetrics, Thanos, Mimir, M3DB, and TDengine, enabling unified alert management.
- **Advanced Alerting Capabilities:** Comes with built-in support for multiple alerting rules, extensible to common notification channels. It also supports alert suppression, silencing, subscription, self-healing, and alert event management.
- **High-Performance Visualization Engine:** Offers various chart styles with numerous built-in dashboard templates and the ability to import Grafana templates. Ready to use with a business-friendly open-source license.
- **Support for Common Collectors:** Compatible with [Categraf](https://flashcat.cloud/product/categraf), Telegraf, Grafana-agent, Datadog-agent, and various exporters as collectors—there's no data that can't be monitored.
- **Seamless Integration with [Flashduty](https://flashcat.cloud/product/flashcat-duty/):** Enables alert aggregation, acknowledgment, escalation, scheduling, and IM integration, ensuring no alerts are missed, reducing unnecessary interruptions, and enhancing efficient collaboration.
## Screenshots
You can switch languages and themes in the top right corner. We now support English, Simplified Chinese, and Traditional Chinese.
![18n switch](doc/img/readme/n9e-switch-i18n.png)
### Instant Query
Similar to the built-in query analysis page in Prometheus, Nightingale offers an ad-hoc query feature with UI enhancements. It also provides built-in PromQL metrics, allowing users unfamiliar with PromQL to quickly perform queries.
![Instant Query](doc/img/readme/20240513103305.png)
### Metric View
Alternatively, you can use the Metric View to access data. With this feature, Instant Query becomes less necessary, as it caters more to advanced users. Regular users can easily perform queries using the Metric View.
![Metric View](doc/img/readme/20240513103530.png)
### Built-in Dashboards
Nightingale includes commonly used dashboards that can be imported and used directly. You can also import Grafana dashboards, although compatibility is limited to basic Grafana charts. If youre accustomed to Grafana, its recommended to continue using it for visualization, with Nightingale serving as an alerting engine.
![Built-in Dashboards](doc/img/readme/20240513103628.png)
### Built-in Alert Rules
In addition to the built-in dashboards, Nightingale also comes with numerous alert rules that are ready to use out of the box.
![Built-in Alert Rules](doc/img/readme/20240513103825.png)
## Architecture
In most community scenarios, Nightingale is primarily used as an alert engine, integrating with multiple time-series databases to unify alert rule management. Grafana remains the preferred tool for visualization. As an alert engine, the product architecture of Nightingale is as follows:
![Product Architecture](doc/img/readme/20240221152601.png)
For certain edge data centers with poor network connectivity to the central Nightingale server, we offer a distributed deployment mode for the alert engine. In this mode, even if the network is disconnected, the alerting functionality remains unaffected.
![Edge Deployment Mode](doc/img/readme/20240222102119.png)
## Communication Channels
- **Report Bugs:** It is highly recommended to submit issues via the [Nightingale GitHub Issue tracker](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Fbug&projects=&template=bug_report.yml).
- **Documentation:** For more information, we recommend thoroughly browsing the [Nightingale Documentation Site](https://flashcat.cloud/docs/content/flashcat-monitor/nightingale-v7/introduction/).
## Stargazers over time
[![Stargazers over time](https://api.star-history.com/svg?repos=ccfos/nightingale&type=Date)](https://star-history.com/#ccfos/nightingale&Date)
## Community Co-Building
- ❇️ Please read the [Nightingale Open Source Project and Community Governance Draft](./doc/community-governance.md). We sincerely welcome every user, developer, company, and organization to use Nightingale, actively report bugs, submit feature requests, share best practices, and help build a professional and active open-source community.
- ❤️ Nightingale Contributors
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
<img src="https://contrib.rocks/image?repo=ccfos/nightingale" />
</a>
## License
- [Apache License V2.0](https://github.com/didi/nightingale/blob/main/LICENSE)

120
README_zh.md Normal file
View File

@@ -0,0 +1,120 @@
<p align="center">
<a href="https://github.com/ccfos/nightingale">
<img src="doc/img/Nightingale_L_V.png" alt="nightingale - cloud native monitoring" width="100" /></a>
</p>
<p align="center">
<b>开源告警管理专家</b>
</p>
<p align="center">
<a href="https://flashcat.cloud/docs/">
<img alt="Docs" src="https://img.shields.io/badge/docs-get%20started-brightgreen"/></a>
<a href="https://hub.docker.com/u/flashcatcloud">
<img alt="Docker pulls" src="https://img.shields.io/docker/pulls/flashcatcloud/nightingale"/></a>
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
<img alt="GitHub contributors" src="https://img.shields.io/github/contributors-anon/ccfos/nightingale"/></a>
<img alt="GitHub Repo stars" src="https://img.shields.io/github/stars/ccfos/nightingale">
<img alt="GitHub forks" src="https://img.shields.io/github/forks/ccfos/nightingale">
<br/><img alt="GitHub Repo issues" src="https://img.shields.io/github/issues/ccfos/nightingale">
<img alt="GitHub Repo issues closed" src="https://img.shields.io/github/issues-closed/ccfos/nightingale">
<img alt="GitHub latest release" src="https://img.shields.io/github/v/release/ccfos/nightingale"/>
<img alt="License" src="https://img.shields.io/badge/license-Apache--2.0-blue"/>
<a href="https://n9e-talk.slack.com/">
<img alt="GitHub contributors" src="https://img.shields.io/badge/join%20slack-%23n9e-brightgreen.svg"/></a>
</p>
[English](./README.md) | [中文](./README_zh.md)
## 夜莺是什么
夜莺监控Nightingale是一款侧重告警的监控类开源项目。类似 Grafana 的数据源集成方式,夜莺也是对接多种既有的数据源,不过 Grafana 侧重在可视化,夜莺是侧重在告警引擎、告警事件的处理和分发。
> 夜莺监控项目,最初由滴滴开发和开源,并于 2022 年 5 月 11 日捐赠予中国计算机学会开源发展委员会CCF ODC为 CCF ODC 成立后接受捐赠的第一个开源项目。
![](https://n9e.github.io/img/global/arch-bg.png)
## 夜莺的工作逻辑
很多用户已经自行采集了指标、日志数据此时就把存储库VictoriaMetrics、ElasticSearch等作为数据源接入夜莺即可在夜莺里配置告警规则、通知规则完成告警事件的生成和派发。
![夜莺产品架构](doc/img/readme/20240221152601.png)
夜莺项目本身不提供监控数据采集能力。推荐您使用 [Categraf](https://github.com/flashcatcloud/categraf) 作为采集器,可以和夜莺丝滑对接。
[Categraf](https://github.com/flashcatcloud/categraf) 可以采集操作系统、网络设备、各类中间件、数据库的监控数据,通过 Remote Write 协议推送给夜莺,夜莺把监控数据转存到时序库(如 Prometheus、VictoriaMetrics 等),并提供告警和可视化能力。
对于个别边缘机房,如果和中心夜莺服务端网络链路不好,希望提升告警可用性,夜莺也提供边缘机房告警引擎下沉部署模式,这个模式下,即便边缘和中心端网络割裂,告警功能也不受影响。
![边缘部署模式](doc/img/readme/20240222102119.png)
> 上图中机房A和中心机房的网络链路很好所以直接由中心端的夜莺进程做告警引擎机房B和中心机房的网络链路不好所以在机房B部署了 `n9e-edge` 做告警引擎对机房B的数据源做告警判定。
## 告警降噪、升级、协同
夜莺的侧重点是做告警引擎,即负责产生告警事件,并根据规则做灵活派发,内置支持 20 种通知媒介电话、短信、邮件、钉钉、飞书、企微、Slack 等)。
如果您有更高级的需求,比如:
- 想要把公司的多套监控系统产生的事件聚拢到一个平台,统一做收敛降噪、响应处理、数据分析
- 想要支持人员的排班,践行 On-call 文化,想要支持告警认领、升级(避免遗漏)、协同处理
那夜莺是不合适的,推荐您选用 [FlashDuty](https://flashcat.cloud/product/flashcat-duty/) 这样的 On-call 产品,产品简单易用,也有免费套餐。
## 相关资料 & 交流渠道
- 📚 [夜莺介绍PPT](https://mp.weixin.qq.com/s/Mkwx_46xrltSq8NLqAIYow) 对您了解夜莺各项关键特性会有帮助PPT链接在文末
- 👉 [文档中心](https://flashcat.cloud/docs/) 为了更快的访问速度,站点托管在 [FlashcatCloud](https://flashcat.cloud)
- ❤️ [报告 Bug](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=&projects=&template=question.yml) 写清楚问题描述、复现步骤、截图等信息,更容易得到答案
- 💡 前后端代码分离,前端代码仓库:[https://github.com/n9e/fe](https://github.com/n9e/fe)
- 🎯 关注[这个公众号](https://gitlink.org.cn/UlricQin)了解更多夜莺动态和知识
- 🌟 加我微信:`picobyte`(我已关闭好友验证)拉入微信群,备注:`夜莺互助群`,如果已经把夜莺上到生产环境,可联系我拉入资深监控用户群
## 关键特性简介
![夜莺告警规则](doc/img/readme/2025-05-23_18-43-37.png)
- 夜莺支持告警规则、屏蔽规则、订阅规则、通知规则,内置支持 20 种通知媒介,支持消息模板自定义
- 支持事件管道,对告警事件做 Pipeline 处理,方便和自有系统做自动化整合,比如给告警事件附加一些元信息,对事件做 relabel
- 支持业务组概念,引入权限体系,分门别类管理各类规则
- 很多数据库、中间件内置了告警规则,可以直接导入使用,也可以直接导入 Prometheus 的告警规则
- 支持告警自愈,即告警之后自动触发一个脚本执行一些预定义的逻辑,比如清理一下磁盘、抓一下现场等
![夜莺事件大盘](doc/img/readme/2025-05-30_08-49-28.png)
- 夜莺存档了历史告警事件,支持多维度的查询和统计
- 支持灵活的聚合分组,一目了然看到公司的告警事件分布情况
![夜莺集成中心](doc/img/readme/2025-05-23_18-46-06.png)
- 夜莺内置常用操作系统、中间件、数据库的的指标说明、仪表盘、告警规则,不过都是社区贡献的,整体也是参差不齐
- 夜莺直接接收 Remote Write、OpenTSDB、Datadog、Falcon 等多种协议的数据,故而可以和各类 Agent 对接
- 夜莺支持 Prometheus、ElasticSearch、Loki、TDEngine 等多种数据源,可以对其中的数据做告警
- 夜莺可以很方便内嵌企业内部系统,比如 Grafana、CMDB 等,甚至可以配置这些内嵌系统的菜单可见性
![夜莺仪表盘](doc/img/readme/2025-05-23_18-49-02.png)
- 夜莺支持仪表盘功能,支持常见的图表类型,也内置了一些仪表盘,上图是其中一个仪表盘的截图。
- 如果你已经习惯了 Grafana建议仍然使用 Grafana 看图。Grafana 在看图方面道行更深。
- 机器相关的监控数据,如果是 Categraf 采集的,建议使用夜莺自带的仪表盘查看,因为 Categraf 的指标命名 Follow 的是 Telegraf 的命名方式,和 Node Exporter 不同
- 因为夜莺有个业务组的概念,机器可以归属不同的业务组,有时在仪表盘里只想查看当前所属业务组的机器,所以夜莺的仪表盘可以和业务组联动
## 广受关注
[![Stargazers over time](https://api.star-history.com/svg?repos=ccfos/nightingale&type=Date)](https://star-history.com/#ccfos/nightingale&Date)
## 感谢众多企业的信赖
![夜莺客户](doc/img/readme/logos.png)
## 社区共建
- ❇️ 请阅读浏览[夜莺开源项目和社区治理架构草案](./doc/community-governance.md),真诚欢迎每一位用户、开发者、公司以及组织,使用夜莺监控、积极反馈 Bug、提交功能需求、分享最佳实践共建专业、活跃的夜莺开源社区。
- ❤️ 夜莺贡献者
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
<img src="https://contrib.rocks/image?repo=ccfos/nightingale" />
</a>
## License
- [Apache License V2.0](https://github.com/didi/nightingale/blob/main/LICENSE)

View File

@@ -115,7 +115,9 @@ func Start(alertc aconf.Alert, pushgwc pconf.Pushgw, syncStats *memsto.Stats, al
eval.NewScheduler(alertc, externalProcessors, alertRuleCache, targetCache, targetsOfAlertRulesCache,
busiGroupCache, alertMuteCache, datasourceCache, promClients, naming, ctx, alertStats)
dp := dispatch.NewDispatch(alertRuleCache, userCache, userGroupCache, alertSubscribeCache, targetCache, notifyConfigCache, taskTplsCache, notifyRuleCache, notifyChannelCache, messageTemplateCache, alertc.Alerting, ctx, alertStats)
eventProcessorCache := memsto.NewEventProcessorCache(ctx, syncStats)
dp := dispatch.NewDispatch(alertRuleCache, userCache, userGroupCache, alertSubscribeCache, targetCache, notifyConfigCache, taskTplsCache, notifyRuleCache, notifyChannelCache, messageTemplateCache, eventProcessorCache, alertc.Alerting, ctx, alertStats)
consumer := dispatch.NewConsumer(alertc.Alerting, ctx, dp, promClients)
notifyRecordComsumer := sender.NewNotifyRecordConsumer(ctx)

View File

@@ -17,6 +17,7 @@ type Stats struct {
CounterRuleEval *prometheus.CounterVec
CounterQueryDataErrorTotal *prometheus.CounterVec
CounterQueryDataTotal *prometheus.CounterVec
CounterVarFillingQuery *prometheus.CounterVec
CounterRecordEval *prometheus.CounterVec
CounterRecordEvalErrorTotal *prometheus.CounterVec
CounterMuteTotal *prometheus.CounterVec
@@ -24,6 +25,7 @@ type Stats struct {
CounterHeartbeatErrorTotal *prometheus.CounterVec
CounterSubEventTotal *prometheus.CounterVec
GaugeQuerySeriesCount *prometheus.GaugeVec
GaugeRuleEvalDuration *prometheus.GaugeVec
GaugeNotifyRecordQueueSize prometheus.Gauge
}
@@ -54,7 +56,7 @@ func NewSyncStats() *Stats {
Subsystem: subsystem,
Name: "query_data_total",
Help: "Number of rule eval query data.",
}, []string{"datasource"})
}, []string{"datasource", "rule_id"})
CounterRecordEval := prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
@@ -135,6 +137,20 @@ func NewSyncStats() *Stats {
Help: "The size of notify record queue.",
})
GaugeRuleEvalDuration := prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "rule_eval_duration_ms",
Help: "Duration of rule eval in milliseconds.",
}, []string{"rule_id", "datasource_id"})
CounterVarFillingQuery := prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "var_filling_query_total",
Help: "Number of var filling query.",
}, []string{"rule_id", "datasource_id", "ref", "typ"})
prometheus.MustRegister(
CounterAlertsTotal,
GaugeAlertQueueSize,
@@ -150,7 +166,9 @@ func NewSyncStats() *Stats {
CounterHeartbeatErrorTotal,
CounterSubEventTotal,
GaugeQuerySeriesCount,
GaugeRuleEvalDuration,
GaugeNotifyRecordQueueSize,
CounterVarFillingQuery,
)
return &Stats{
@@ -168,6 +186,8 @@ func NewSyncStats() *Stats {
CounterHeartbeatErrorTotal: CounterHeartbeatErrorTotal,
CounterSubEventTotal: CounterSubEventTotal,
GaugeQuerySeriesCount: GaugeQuerySeriesCount,
GaugeRuleEvalDuration: GaugeRuleEvalDuration,
GaugeNotifyRecordQueueSize: GaugeNotifyRecordQueueSize,
CounterVarFillingQuery: CounterVarFillingQuery,
}
}

View File

@@ -35,9 +35,9 @@ func MatchGroupsName(groupName string, groupFilter []models.TagFilter) bool {
func matchTag(value string, filter models.TagFilter) bool {
switch filter.Func {
case "==":
return strings.TrimSpace(filter.Value) == strings.TrimSpace(value)
return strings.TrimSpace(fmt.Sprintf("%v", filter.Value)) == strings.TrimSpace(value)
case "!=":
return strings.TrimSpace(filter.Value) != strings.TrimSpace(value)
return strings.TrimSpace(fmt.Sprintf("%v", filter.Value)) != strings.TrimSpace(value)
case "in":
_, has := filter.Vset[value]
return has

View File

@@ -110,10 +110,6 @@ func (e *Consumer) consumeOne(event *models.AlertCurEvent) {
e.persist(event)
if event.IsRecovered && event.NotifyRecovered == 0 {
return
}
e.dispatch.HandleEventNotify(event, false)
}

View File

@@ -15,6 +15,7 @@ import (
"github.com/ccfos/nightingale/v6/alert/aconf"
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/alert/common"
"github.com/ccfos/nightingale/v6/alert/pipeline"
"github.com/ccfos/nightingale/v6/alert/sender"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
@@ -23,6 +24,15 @@ import (
"github.com/toolkits/pkg/logger"
)
var ShouldSkipNotify func(*ctx.Context, *models.AlertCurEvent, int64) bool
var SendByNotifyRule func(*ctx.Context, *memsto.UserCacheType, *memsto.UserGroupCacheType, *memsto.NotifyChannelCacheType,
[]*models.AlertCurEvent, int64, *models.NotifyConfig, *models.NotifyChannelConfig, *models.MessageTemplate)
func init() {
ShouldSkipNotify = shouldSkipNotify
SendByNotifyRule = SendNotifyRuleMessage
}
type Dispatch struct {
alertRuleCache *memsto.AlertRuleCacheType
userCache *memsto.UserCacheType
@@ -35,6 +45,7 @@ type Dispatch struct {
notifyRuleCache *memsto.NotifyRuleCacheType
notifyChannelCache *memsto.NotifyChannelCacheType
messageTemplateCache *memsto.MessageTemplateCacheType
eventProcessorCache *memsto.EventProcessorCacheType
alerting aconf.Alerting
@@ -43,9 +54,8 @@ type Dispatch struct {
tpls map[string]*template.Template
ExtraSenders map[string]sender.Sender
BeforeSenderHook func(*models.AlertCurEvent) bool
ctx *ctx.Context
Astats *astats.Stats
ctx *ctx.Context
Astats *astats.Stats
RwLock sync.RWMutex
}
@@ -54,7 +64,7 @@ type Dispatch struct {
func NewDispatch(alertRuleCache *memsto.AlertRuleCacheType, userCache *memsto.UserCacheType, userGroupCache *memsto.UserGroupCacheType,
alertSubscribeCache *memsto.AlertSubscribeCacheType, targetCache *memsto.TargetCacheType, notifyConfigCache *memsto.NotifyConfigCacheType,
taskTplsCache *memsto.TaskTplCache, notifyRuleCache *memsto.NotifyRuleCacheType, notifyChannelCache *memsto.NotifyChannelCacheType,
messageTemplateCache *memsto.MessageTemplateCacheType, alerting aconf.Alerting, ctx *ctx.Context, astats *astats.Stats) *Dispatch {
messageTemplateCache *memsto.MessageTemplateCacheType, eventProcessorCache *memsto.EventProcessorCacheType, alerting aconf.Alerting, c *ctx.Context, astats *astats.Stats) *Dispatch {
notify := &Dispatch{
alertRuleCache: alertRuleCache,
userCache: userCache,
@@ -66,6 +76,7 @@ func NewDispatch(alertRuleCache *memsto.AlertRuleCacheType, userCache *memsto.Us
notifyRuleCache: notifyRuleCache,
notifyChannelCache: notifyChannelCache,
messageTemplateCache: messageTemplateCache,
eventProcessorCache: eventProcessorCache,
alerting: alerting,
@@ -74,9 +85,15 @@ func NewDispatch(alertRuleCache *memsto.AlertRuleCacheType, userCache *memsto.Us
ExtraSenders: make(map[string]sender.Sender),
BeforeSenderHook: func(*models.AlertCurEvent) bool { return true },
ctx: ctx,
ctx: c,
Astats: astats,
}
pipeline.Init()
// 设置通知记录回调函数
notifyChannelCache.SetNotifyRecordFunc(sender.NotifyRecord)
return notify
}
@@ -141,11 +158,14 @@ func (e *Dispatch) reloadTpls() error {
return nil
}
func (e *Dispatch) HandleEventWithNotifyRule(event *models.AlertCurEvent, isSubscribe bool) {
func (e *Dispatch) HandleEventWithNotifyRule(eventOrigin *models.AlertCurEvent) {
if len(event.NotifyRuleIDs) > 0 {
for _, notifyRuleId := range event.NotifyRuleIDs {
logger.Infof("notify rule ids: %v, event: %+v", notifyRuleId, event)
if len(eventOrigin.NotifyRuleIds) > 0 {
for _, notifyRuleId := range eventOrigin.NotifyRuleIds {
// 深拷贝新的 event避免并发修改 event 冲突
eventCopy := eventOrigin.DeepCopy()
logger.Infof("notify rule ids: %v, event: %+v", notifyRuleId, eventCopy)
notifyRule := e.notifyRuleCache.Get(notifyRuleId)
if notifyRule == nil {
continue
@@ -154,35 +174,129 @@ func (e *Dispatch) HandleEventWithNotifyRule(event *models.AlertCurEvent, isSubs
if !notifyRule.Enable {
continue
}
eventCopy.NotifyRuleId = notifyRuleId
eventCopy.NotifyRuleName = notifyRule.Name
for i := range notifyRule.NotifyConfigs {
if !NotifyRuleApplicable(&notifyRule.NotifyConfigs[i], event) {
var processors []models.Processor
for _, pipelineConfig := range notifyRule.PipelineConfigs {
if !pipelineConfig.Enable {
continue
}
eventPipeline := e.eventProcessorCache.Get(pipelineConfig.PipelineId)
if eventPipeline == nil {
logger.Warningf("notify_id: %d, event:%+v, processor not found", notifyRuleId, eventCopy)
continue
}
if !pipelineApplicable(eventPipeline, eventCopy) {
logger.Debugf("notify_id: %d, event:%+v, pipeline_id: %d, not applicable", notifyRuleId, eventCopy, pipelineConfig.PipelineId)
continue
}
processors = append(processors, e.eventProcessorCache.GetProcessorsById(pipelineConfig.PipelineId)...)
}
for _, processor := range processors {
var res string
var err error
logger.Infof("before processor notify_id: %d, event:%+v, processor:%+v", notifyRuleId, eventCopy, processor)
eventCopy, res, err = processor.Process(e.ctx, eventCopy)
if eventCopy == nil {
logger.Warningf("after processor notify_id: %d, event:%+v, processor:%+v, event is nil", notifyRuleId, eventCopy, processor)
sender.NotifyRecord(e.ctx, []*models.AlertCurEvent{eventOrigin}, notifyRuleId, "", "", res, errors.New("drop by processor"))
break
}
logger.Infof("after processor notify_id: %d, event:%+v, processor:%+v, res:%v, err:%v", notifyRuleId, eventCopy, processor, res, err)
}
if ShouldSkipNotify(e.ctx, eventCopy, notifyRuleId) {
logger.Infof("notify_id: %d, event:%+v, should skip notify", notifyRuleId, eventCopy)
continue
}
// notify
for i := range notifyRule.NotifyConfigs {
err := NotifyRuleMatchCheck(&notifyRule.NotifyConfigs[i], eventCopy)
if err != nil {
logger.Errorf("notify_id: %d, event:%+v, channel_id:%d, template_id: %d, notify_config:%+v, err:%v", notifyRuleId, eventCopy, notifyRule.NotifyConfigs[i].ChannelID, notifyRule.NotifyConfigs[i].TemplateID, notifyRule.NotifyConfigs[i], err)
continue
}
notifyChannel := e.notifyChannelCache.Get(notifyRule.NotifyConfigs[i].ChannelID)
messageTemplate := e.messageTemplateCache.Get(notifyRule.NotifyConfigs[i].TemplateID)
if notifyChannel == nil {
sender.NotifyRecord(e.ctx, []*models.AlertCurEvent{event}, notifyRuleId, fmt.Sprintf("notify_channel_id:%d", notifyRule.NotifyConfigs[i].ChannelID), "", "", errors.New("notify_channel not found"))
logger.Warningf("notify_id: %d, event:%+v, channel_id:%d, template_id: %d, notify_channel not found", notifyRuleId, event, notifyRule.NotifyConfigs[i].ChannelID, notifyRule.NotifyConfigs[i].TemplateID)
sender.NotifyRecord(e.ctx, []*models.AlertCurEvent{eventCopy}, notifyRuleId, fmt.Sprintf("notify_channel_id:%d", notifyRule.NotifyConfigs[i].ChannelID), "", "", errors.New("notify_channel not found"))
logger.Warningf("notify_id: %d, event:%+v, channel_id:%d, template_id: %d, notify_channel not found", notifyRuleId, eventCopy, notifyRule.NotifyConfigs[i].ChannelID, notifyRule.NotifyConfigs[i].TemplateID)
continue
}
if notifyChannel.RequestType != "flashduty" && messageTemplate == nil {
logger.Warningf("notify_id: %d, channel_name: %v, event:%+v, template_id: %d, message_template not found", notifyRuleId, notifyChannel.Ident, event, notifyRule.NotifyConfigs[i].TemplateID)
sender.NotifyRecord(e.ctx, []*models.AlertCurEvent{event}, notifyRuleId, notifyChannel.Name, "", "", errors.New("message_template not found"))
logger.Warningf("notify_id: %d, channel_name: %v, event:%+v, template_id: %d, message_template not found", notifyRuleId, notifyChannel.Ident, eventCopy, notifyRule.NotifyConfigs[i].TemplateID)
sender.NotifyRecord(e.ctx, []*models.AlertCurEvent{eventCopy}, notifyRuleId, notifyChannel.Name, "", "", errors.New("message_template not found"))
continue
}
// todo go send
// todo 聚合 event
go e.sendV2([]*models.AlertCurEvent{event}, notifyRuleId, &notifyRule.NotifyConfigs[i], notifyChannel, messageTemplate)
go SendByNotifyRule(e.ctx, e.userCache, e.userGroupCache, e.notifyChannelCache, []*models.AlertCurEvent{eventCopy}, notifyRuleId, &notifyRule.NotifyConfigs[i], notifyChannel, messageTemplate)
}
}
}
}
func NotifyRuleApplicable(notifyConfig *models.NotifyConfig, event *models.AlertCurEvent) bool {
func shouldSkipNotify(ctx *ctx.Context, event *models.AlertCurEvent, notifyRuleId int64) bool {
if event == nil {
// 如果 eventCopy 为 nil说明 eventCopy 被 processor drop 掉了, 不再发送通知
return true
}
if event.IsRecovered && event.NotifyRecovered == 0 {
// 如果 eventCopy 是恢复事件,且 NotifyRecovered 为 0则不发送通知
return true
}
return false
}
func pipelineApplicable(pipeline *models.EventPipeline, event *models.AlertCurEvent) bool {
if pipeline == nil {
return true
}
if !pipeline.FilterEnable {
return true
}
tagMatch := true
if len(pipeline.LabelFilters) > 0 {
for i := range pipeline.LabelFilters {
if pipeline.LabelFilters[i].Func == "" {
pipeline.LabelFilters[i].Func = pipeline.LabelFilters[i].Op
}
}
tagFilters, err := models.ParseTagFilter(pipeline.LabelFilters)
if err != nil {
logger.Errorf("pipeline applicable failed to parse tag filter: %v event:%+v pipeline:%+v", err, event, pipeline)
return false
}
tagMatch = common.MatchTags(event.TagsMap, tagFilters)
}
attributesMatch := true
if len(pipeline.AttrFilters) > 0 {
tagFilters, err := models.ParseTagFilter(pipeline.AttrFilters)
if err != nil {
logger.Errorf("pipeline applicable failed to parse tag filter: %v event:%+v pipeline:%+v err:%v", tagFilters, event, pipeline, err)
return false
}
attributesMatch = common.MatchTags(event.JsonTagsAndValue(), tagFilters)
}
return tagMatch && attributesMatch
}
func NotifyRuleMatchCheck(notifyConfig *models.NotifyConfig, event *models.AlertCurEvent) error {
tm := time.Unix(event.TriggerTime, 0)
triggerTime := tm.Format("15:04")
triggerWeek := int(tm.Weekday())
@@ -234,6 +348,10 @@ func NotifyRuleApplicable(notifyConfig *models.NotifyConfig, event *models.Alert
}
}
if !timeMatch {
return fmt.Errorf("event time not match time filter")
}
severityMatch := false
for i := range notifyConfig.Severities {
if notifyConfig.Severities[i] == event.Severity {
@@ -241,6 +359,10 @@ func NotifyRuleApplicable(notifyConfig *models.NotifyConfig, event *models.Alert
}
}
if !severityMatch {
return fmt.Errorf("event severity not match severity filter")
}
tagMatch := true
if len(notifyConfig.LabelKeys) > 0 {
for i := range notifyConfig.LabelKeys {
@@ -252,23 +374,32 @@ func NotifyRuleApplicable(notifyConfig *models.NotifyConfig, event *models.Alert
tagFilters, err := models.ParseTagFilter(notifyConfig.LabelKeys)
if err != nil {
logger.Errorf("notify send failed to parse tag filter: %v event:%+v notify_config:%+v", err, event, notifyConfig)
return false
return fmt.Errorf("failed to parse tag filter: %v", err)
}
tagMatch = common.MatchTags(event.TagsMap, tagFilters)
}
if !tagMatch {
return fmt.Errorf("event tag not match tag filter")
}
attributesMatch := true
if len(notifyConfig.Attributes) > 0 {
tagFilters, err := models.ParseTagFilter(notifyConfig.Attributes)
if err != nil {
logger.Errorf("notify send failed to parse tag filter: %v event:%+v notify_config:%+v err:%v", tagFilters, event, notifyConfig, err)
return false
return fmt.Errorf("failed to parse tag filter: %v", err)
}
attributesMatch = common.MatchTags(event.JsonTagsAndValue(), tagFilters)
}
if !attributesMatch {
return fmt.Errorf("event attributes not match attributes filter")
}
logger.Infof("notify send timeMatch:%v severityMatch:%v tagMatch:%v attributesMatch:%v event:%+v notify_config:%+v", timeMatch, severityMatch, tagMatch, attributesMatch, event, notifyConfig)
return timeMatch && severityMatch && tagMatch && attributesMatch
return nil
}
func GetNotifyConfigParams(notifyConfig *models.NotifyConfig, contactKey string, userCache *memsto.UserCacheType, userGroupCache *memsto.UserGroupCacheType) ([]string, []int64, map[string]string) {
@@ -336,7 +467,8 @@ func GetNotifyConfigParams(notifyConfig *models.NotifyConfig, contactKey string,
return sendtos, flashDutyChannelIDs, customParams
}
func (e *Dispatch) sendV2(events []*models.AlertCurEvent, notifyRuleId int64, notifyConfig *models.NotifyConfig, notifyChannel *models.NotifyChannelConfig, messageTemplate *models.MessageTemplate) {
func SendNotifyRuleMessage(ctx *ctx.Context, userCache *memsto.UserCacheType, userGroupCache *memsto.UserGroupCacheType, notifyChannelCache *memsto.NotifyChannelCacheType,
events []*models.AlertCurEvent, notifyRuleId int64, notifyConfig *models.NotifyConfig, notifyChannel *models.NotifyChannelConfig, messageTemplate *models.MessageTemplate) {
if len(events) == 0 {
logger.Errorf("notify_id: %d events is empty", notifyRuleId)
return
@@ -352,51 +484,51 @@ func (e *Dispatch) sendV2(events []*models.AlertCurEvent, notifyRuleId int64, no
contactKey = notifyChannel.ParamConfig.UserInfo.ContactKey
}
sendtos, flashDutyChannelIDs, customParams := GetNotifyConfigParams(notifyConfig, contactKey, e.userCache, e.userGroupCache)
e.Astats.GaugeNotifyRecordQueueSize.Inc()
defer e.Astats.GaugeNotifyRecordQueueSize.Dec()
sendtos, flashDutyChannelIDs, customParams := GetNotifyConfigParams(notifyConfig, contactKey, userCache, userGroupCache)
switch notifyChannel.RequestType {
case "flashduty":
if len(flashDutyChannelIDs) == 0 {
flashDutyChannelIDs = []int64{0} // 如果 flashduty 通道没有配置,则使用 0, 给 SendFlashDuty 判断使用, 不给 flashduty 传 channel_id 参数
}
for i := range flashDutyChannelIDs {
respBody, err := notifyChannel.SendFlashDuty(events, flashDutyChannelIDs[i], e.notifyChannelCache.GetHttpClient(notifyChannel.ID))
start := time.Now()
respBody, err := notifyChannel.SendFlashDuty(events, flashDutyChannelIDs[i], notifyChannelCache.GetHttpClient(notifyChannel.ID))
respBody = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), respBody)
logger.Infof("notify_id: %d, channel_name: %v, event:%+v, IntegrationUrl: %v dutychannel_id: %v, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0], notifyChannel.RequestConfig.FlashDutyRequestConfig.IntegrationUrl, flashDutyChannelIDs[i], respBody, err)
sender.NotifyRecord(e.ctx, events, notifyRuleId, notifyChannel.Name, strconv.FormatInt(flashDutyChannelIDs[i], 10), respBody, err)
sender.NotifyRecord(ctx, events, notifyRuleId, notifyChannel.Name, strconv.FormatInt(flashDutyChannelIDs[i], 10), respBody, err)
}
return
case "http":
if e.notifyChannelCache.HttpConcurrencyAdd(notifyChannel.ID) {
defer e.notifyChannelCache.HttpConcurrencyDone(notifyChannel.ID)
}
if notifyChannel.RequestConfig == nil {
logger.Warningf("notify_id: %d, channel_name: %v, event:%+v, request config not found", notifyRuleId, notifyChannel.Name, events[0])
// 使用队列模式处理 http 通知
// 创建通知任务
task := &memsto.NotifyTask{
Events: events,
NotifyRuleId: notifyRuleId,
NotifyChannel: notifyChannel,
TplContent: tplContent,
CustomParams: customParams,
Sendtos: sendtos,
}
if notifyChannel.RequestConfig.HTTPRequestConfig == nil {
logger.Warningf("notify_id: %d, channel_name: %v, event:%+v, http request config not found", notifyRuleId, notifyChannel.Name, events[0])
}
if NeedBatchContacts(notifyChannel.RequestConfig.HTTPRequestConfig) || len(sendtos) == 0 {
resp, err := notifyChannel.SendHTTP(events, tplContent, customParams, sendtos, e.notifyChannelCache.GetHttpClient(notifyChannel.ID))
logger.Infof("notify_id: %d, channel_name: %v, event:%+v, tplContent:%s, customParams:%v, userInfo:%+v, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0], tplContent, customParams, sendtos, resp, err)
sender.NotifyRecord(e.ctx, events, notifyRuleId, notifyChannel.Name, getSendTarget(customParams, sendtos), resp, err)
} else {
for i := range sendtos {
resp, err := notifyChannel.SendHTTP(events, tplContent, customParams, []string{sendtos[i]}, e.notifyChannelCache.GetHttpClient(notifyChannel.ID))
logger.Infof("notify_id: %d, channel_name: %v, event:%+v, tplContent:%s, customParams:%v, userInfo:%+v, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0], tplContent, customParams, sendtos[i], resp, err)
sender.NotifyRecord(e.ctx, events, notifyRuleId, notifyChannel.Name, getSendTarget(customParams, []string{sendtos[i]}), resp, err)
}
// 将任务加入队列
success := notifyChannelCache.EnqueueNotifyTask(task)
if !success {
logger.Errorf("failed to enqueue notify task for channel %d, notify_id: %d", notifyChannel.ID, notifyRuleId)
// 如果入队失败,记录错误通知
sender.NotifyRecord(ctx, events, notifyRuleId, notifyChannel.Name, getSendTarget(customParams, sendtos), "", errors.New("failed to enqueue notify task, queue is full"))
}
case "smtp":
notifyChannel.SendEmail(notifyRuleId, events, tplContent, sendtos, e.notifyChannelCache.GetSmtpClient(notifyChannel.ID))
notifyChannel.SendEmail(notifyRuleId, events, tplContent, sendtos, notifyChannelCache.GetSmtpClient(notifyChannel.ID))
case "script":
start := time.Now()
target, res, err := notifyChannel.SendScript(events, tplContent, customParams, sendtos)
res = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), res)
logger.Infof("notify_id: %d, channel_name: %v, event:%+v, tplContent:%s, customParams:%v, target:%s, res:%s, err:%v", notifyRuleId, notifyChannel.Name, events[0], tplContent, customParams, target, res, err)
sender.NotifyRecord(e.ctx, events, notifyRuleId, notifyChannel.Name, target, res, err)
sender.NotifyRecord(ctx, events, notifyRuleId, notifyChannel.Name, target, res, err)
default:
logger.Warningf("notify_id: %d, channel_name: %v, event:%+v send type not found", notifyRuleId, notifyChannel.Name, events[0])
}
@@ -411,13 +543,13 @@ func NeedBatchContacts(requestConfig *models.HTTPRequestConfig) bool {
// event: 告警/恢复事件
// isSubscribe: 告警事件是否由subscribe的配置产生
func (e *Dispatch) HandleEventNotify(event *models.AlertCurEvent, isSubscribe bool) {
rule := e.alertRuleCache.Get(event.RuleId)
if rule == nil {
go e.HandleEventWithNotifyRule(event)
if event.IsRecovered && event.NotifyRecovered == 0 {
return
}
if e.blockEventNotify(rule, event) {
logger.Infof("block event notify: rule_id:%d event:%+v", rule.Id, event)
rule := e.alertRuleCache.Get(event.RuleId)
if rule == nil {
return
}
@@ -448,8 +580,6 @@ func (e *Dispatch) HandleEventNotify(event *models.AlertCurEvent, isSubscribe bo
notifyTarget.AndMerge(handler(rule, event, notifyTarget, e))
}
// 处理事件发送,这里用一个goroutine处理一个event的所有发送事件
go e.HandleEventWithNotifyRule(event, isSubscribe)
go e.Send(rule, event, notifyTarget, isSubscribe)
// 如果是不是订阅规则出现的event, 则需要处理订阅规则的event
@@ -458,25 +588,6 @@ func (e *Dispatch) HandleEventNotify(event *models.AlertCurEvent, isSubscribe bo
}
}
func (e *Dispatch) blockEventNotify(rule *models.AlertRule, event *models.AlertCurEvent) bool {
ruleType := rule.GetRuleType()
// 若为机器则先看机器是否删除
if ruleType == models.HOST {
host, ok := e.targetCache.Get(event.TagsMap["ident"])
if !ok || host == nil {
return true
}
}
// 恢复通知,检测规则配置是否改变
// if event.IsRecovered && event.RuleHash != rule.Hash() {
// return true
// }
return false
}
func (e *Dispatch) handleSubs(event *models.AlertCurEvent) {
// handle alert subscribes
subscribes := make([]*models.AlertSubscribe, 0)
@@ -508,6 +619,10 @@ func (e *Dispatch) handleSub(sub *models.AlertSubscribe, event models.AlertCurEv
return
}
if !sub.MatchCate(event.Cate) {
return
}
if !common.MatchTags(event.TagsMap, sub.ITags) {
return
}
@@ -646,6 +761,11 @@ func (e *Dispatch) HandleIbex(rule *models.AlertRule, event *models.AlertCurEven
}
json.Unmarshal([]byte(rule.RuleConfig), &ruleConfig)
if event.IsRecovered {
// 恢复事件不需要走故障自愈的逻辑
return
}
for _, t := range ruleConfig.TaskTpls {
if t.TplId == 0 {
continue
@@ -732,10 +852,10 @@ func getSendTarget(customParams map[string]string, sendtos []string) string {
values := make([]string, 0)
for _, value := range customParams {
if len(value) <= 4 {
runes := []rune(value)
if len(runes) <= 4 {
values = append(values, value)
} else {
runes := []rune(value)
maskedValue := string(runes[:len(runes)-4]) + "****"
values = append(values, maskedValue)
}

View File

@@ -18,17 +18,18 @@ func LogEvent(event *models.AlertCurEvent, location string, err ...error) {
}
logger.Infof(
"event(%s %s) %s: rule_id=%d sub_id:%d notify_rule_ids:%v cluster:%s %v%s@%d %s",
"event(%s %s) %s: rule_id=%d sub_id:%d notify_rule_ids:%v cluster:%s %v%s@%d last_eval_time:%d %s",
event.Hash,
status,
location,
event.RuleId,
event.SubRuleId,
event.NotifyRuleIDs,
event.NotifyRuleIds,
event.Cluster,
event.TagsJSON,
event.TriggerValue,
event.TriggerTime,
event.LastEvalTime,
message,
)
}

View File

@@ -93,7 +93,7 @@ func (s *Scheduler) syncAlertRules() {
}
ruleType := rule.GetRuleType()
if rule.IsPrometheusRule() || rule.IsLokiRule() || rule.IsTdengineRule() || rule.IsClickHouseRule() || rule.IsElasticSearch() {
if rule.IsPrometheusRule() || rule.IsInnerRule() {
datasourceIds := s.datasourceCache.GetIDsByDsCateAndQueries(rule.Cate, rule.DatasourceQueries)
for _, dsId := range datasourceIds {
if !naming.DatasourceHashRing.IsHit(strconv.FormatInt(dsId, 10), fmt.Sprintf("%d", rule.Id), s.aconf.Heartbeat.Endpoint) {

View File

@@ -13,6 +13,7 @@ import (
"sync"
"time"
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/alert/common"
"github.com/ccfos/nightingale/v6/alert/process"
"github.com/ccfos/nightingale/v6/dscache"
@@ -36,7 +37,6 @@ type AlertRuleWorker struct {
DatasourceId int64
Quit chan struct{}
Inhibit bool
Severity int
Rule *models.AlertRule
@@ -99,7 +99,7 @@ func NewAlertRuleWorker(rule *models.AlertRule, datasourceId int64, Processor *p
rule.CronPattern = fmt.Sprintf("@every %ds", interval)
}
arw.Scheduler = cron.New(cron.WithSeconds())
arw.Scheduler = cron.New(cron.WithSeconds(), cron.WithChain(cron.SkipIfStillRunning(cron.DefaultLogger)))
entryID, err := arw.Scheduler.AddFunc(rule.CronPattern, func() {
arw.Eval()
@@ -144,14 +144,24 @@ func (arw *AlertRuleWorker) Start() {
}
func (arw *AlertRuleWorker) Eval() {
logger.Infof("eval:%s started", arw.Key())
begin := time.Now()
var message string
defer func() {
if len(message) == 0 {
logger.Infof("rule_eval:%s finished, duration:%v", arw.Key(), time.Since(begin))
} else {
logger.Infof("rule_eval:%s finished, duration:%v, message:%s", arw.Key(), time.Since(begin), message)
}
}()
if arw.Processor.PromEvalInterval == 0 {
arw.Processor.PromEvalInterval = getPromEvalInterval(arw.Processor.ScheduleEntry.Schedule)
}
cachedRule := arw.Rule
if cachedRule == nil {
// logger.Errorf("rule_eval:%s Rule not found", arw.Key())
message = "rule not found"
return
}
arw.Processor.Stats.CounterRuleEval.WithLabelValues().Inc()
@@ -172,16 +182,17 @@ func (arw *AlertRuleWorker) Eval() {
case models.LOKI:
anomalyPoints, err = arw.GetPromAnomalyPoint(cachedRule.RuleConfig)
default:
anomalyPoints, recoverPoints = arw.GetAnomalyPoint(cachedRule, arw.Processor.DatasourceId())
anomalyPoints, recoverPoints, err = arw.GetAnomalyPoint(cachedRule, arw.Processor.DatasourceId())
}
if err != nil {
logger.Errorf("rule_eval:%s get anomaly point err:%s", arw.Key(), err.Error())
message = "failed to get anomaly points"
return
}
if arw.Processor == nil {
logger.Warningf("rule_eval:%s Processor is nil", arw.Key())
message = "processor is nil"
return
}
@@ -223,7 +234,7 @@ func (arw *AlertRuleWorker) Eval() {
}
func (arw *AlertRuleWorker) Stop() {
logger.Infof("rule_eval %s stopped", arw.Key())
logger.Infof("rule_eval:%s stopped", arw.Key())
close(arw.Quit)
c := arw.Scheduler.Stop()
<-c.Done()
@@ -232,7 +243,10 @@ func (arw *AlertRuleWorker) Stop() {
func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.AnomalyPoint, error) {
var lst []models.AnomalyPoint
var severity int
start := time.Now()
defer func() {
arw.Processor.Stats.GaugeRuleEvalDuration.WithLabelValues(fmt.Sprintf("%v", arw.Rule.Id), fmt.Sprintf("%v", arw.Processor.DatasourceId())).Set(float64(time.Since(start).Milliseconds()))
}()
var rule *models.PromRuleConfig
if err := json.Unmarshal([]byte(ruleConfig), &rule); err != nil {
@@ -259,10 +273,6 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
arw.Inhibit = rule.Inhibit
for i, query := range rule.Queries {
if query.Severity < severity {
arw.Severity = query.Severity
}
readerClient := arw.PromClients.GetCli(arw.DatasourceId)
if readerClient == nil {
@@ -281,9 +291,21 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
if hasLabelLossAggregator(query) || notExactMatch(query) {
// 若有聚合函数或非精确匹配则需要先填充变量然后查询,这个方式效率较低
anomalyPoints = arw.VarFillingBeforeQuery(query, readerClient)
arw.Processor.Stats.CounterVarFillingQuery.WithLabelValues(
fmt.Sprintf("%v", arw.Rule.Id),
fmt.Sprintf("%v", arw.Processor.DatasourceId()),
fmt.Sprintf("%v", i),
"BeforeQuery",
).Inc()
} else {
// 先查询再过滤变量,效率较高,但无法处理有聚合函数的情况
anomalyPoints = arw.VarFillingAfterQuery(query, readerClient)
arw.Processor.Stats.CounterVarFillingQuery.WithLabelValues(
fmt.Sprintf("%v", arw.Rule.Id),
fmt.Sprintf("%v", arw.Processor.DatasourceId()),
fmt.Sprintf("%v", i),
"AfterQuery",
).Inc()
}
lst = append(lst, anomalyPoints...)
} else {
@@ -302,7 +324,7 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
}
var warnings promsdk.Warnings
arw.Processor.Stats.CounterQueryDataTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId)).Inc()
arw.Processor.Stats.CounterQueryDataTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId), fmt.Sprintf("%d", arw.Rule.Id)).Inc()
value, warnings, err := readerClient.Query(context.Background(), promql, time.Now())
if err != nil {
logger.Errorf("rule_eval:%s promql:%s, error:%v", arw.Key(), promql, err)
@@ -413,6 +435,7 @@ func (arw *AlertRuleWorker) VarFillingAfterQuery(query models.PromQuery, readerC
realQuery = strings.Replace(realQuery, fmt.Sprintf("$%s", key), val, -1)
}
// 得到满足值变量的所有结果
arw.Processor.Stats.CounterQueryDataTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId), fmt.Sprintf("%d", arw.Rule.Id)).Inc()
value, _, err := readerClient.Query(context.Background(), curQuery, time.Now())
if err != nil {
logger.Errorf("rule_eval:%s, promql:%s, error:%v", arw.Key(), curQuery, err)
@@ -574,7 +597,7 @@ func (arw *AlertRuleWorker) getParamPermutation(paramVal map[string]models.Param
logger.Errorf("query:%s fail to unmarshalling into string slice, error:%v", paramQuery.Query, err)
}
if len(query) == 0 {
paramsKeyAllLabel, err := getParamKeyAllLabel(varToLabel[paramKey], originPromql, readerClient)
paramsKeyAllLabel, err := getParamKeyAllLabel(varToLabel[paramKey], originPromql, readerClient, arw.DatasourceId, arw.Rule.Id, arw.Processor.Stats)
if err != nil {
logger.Errorf("rule_eval:%s, fail to getParamKeyAllLabel, error:%v query:%s", arw.Key(), err, paramQuery.Query)
}
@@ -605,7 +628,7 @@ func (arw *AlertRuleWorker) getParamPermutation(paramVal map[string]models.Param
return res, nil
}
func getParamKeyAllLabel(paramKey string, promql string, client promsdk.API) ([]string, error) {
func getParamKeyAllLabel(paramKey string, promql string, client promsdk.API, dsId int64, rid int64, stats *astats.Stats) ([]string, error) {
labels, metricName, err := promql2.GetLabelsAndMetricNameWithReplace(promql, "$")
if err != nil {
return nil, fmt.Errorf("promql:%s, get labels error:%v", promql, err)
@@ -619,6 +642,7 @@ func getParamKeyAllLabel(paramKey string, promql string, client promsdk.API) ([]
}
pr := metricName + "{" + strings.Join(labelstrs, ",") + "}"
stats.CounterQueryDataTotal.WithLabelValues(fmt.Sprintf("%d", dsId), fmt.Sprintf("%d", rid)).Inc()
value, _, err := client.Query(context.Background(), pr, time.Now())
if err != nil {
return nil, fmt.Errorf("promql: %s query error: %v", pr, err)
@@ -733,7 +757,10 @@ func combine(paramKeys []string, paraMap map[string][]string, index int, current
func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.AnomalyPoint, error) {
var lst []models.AnomalyPoint
var severity int
start := time.Now()
defer func() {
arw.Processor.Stats.GaugeRuleEvalDuration.WithLabelValues(fmt.Sprintf("%v", arw.Rule.Id), fmt.Sprintf("%v", arw.Processor.DatasourceId())).Set(float64(time.Since(start).Milliseconds()))
}()
var rule *models.HostRuleConfig
if err := json.Unmarshal([]byte(ruleConfig), &rule); err != nil {
@@ -761,10 +788,6 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
arw.Inhibit = rule.Inhibit
now := time.Now().Unix()
for _, trigger := range rule.Triggers {
if trigger.Severity < severity {
arw.Severity = trigger.Severity
}
switch trigger.Type {
case "target_miss":
t := now - int64(trigger.Duration)
@@ -1276,6 +1299,7 @@ func (arw *AlertRuleWorker) VarFillingBeforeQuery(query models.PromQuery, reader
<-semaphore
wg.Done()
}()
arw.Processor.Stats.CounterQueryDataTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId), fmt.Sprintf("%d", arw.Rule.Id)).Inc()
value, _, err := readerClient.Query(context.Background(), promql, time.Now())
if err != nil {
logger.Errorf("rule_eval:%s, promql:%s, error:%v", arw.Key(), promql, err)
@@ -1409,13 +1433,18 @@ func fillVar(curRealQuery string, paramKey string, val string) string {
return curRealQuery
}
func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64) ([]models.AnomalyPoint, []models.AnomalyPoint) {
func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64) ([]models.AnomalyPoint, []models.AnomalyPoint, error) {
// 获取查询和规则判断条件
start := time.Now()
defer func() {
arw.Processor.Stats.GaugeRuleEvalDuration.WithLabelValues(fmt.Sprintf("%v", arw.Rule.Id), fmt.Sprintf("%v", arw.Processor.DatasourceId())).Set(float64(time.Since(start).Milliseconds()))
}()
points := []models.AnomalyPoint{}
recoverPoints := []models.AnomalyPoint{}
ruleConfig := strings.TrimSpace(rule.RuleConfig)
if ruleConfig == "" {
logger.Warningf("rule_eval:%d promql is blank", rule.Id)
logger.Warningf("rule_eval:%d ruleConfig is blank", rule.Id)
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_RULE_CONFIG, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
fmt.Sprintf("%v", arw.Rule.Id),
@@ -1423,7 +1452,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
"",
).Set(0)
return points, recoverPoints
return points, recoverPoints, fmt.Errorf("rule_eval:%d ruleConfig is blank", rule.Id)
}
var ruleQuery models.RuleQuery
@@ -1431,7 +1460,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
if err != nil {
logger.Warningf("rule_eval:%d promql parse error:%s", rule.Id, err.Error())
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_RULE_CONFIG, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
return points, recoverPoints
return points, recoverPoints, fmt.Errorf("rule_eval:%d promql parse error:%s", rule.Id, err.Error())
}
arw.Inhibit = ruleQuery.Inhibit
@@ -1451,12 +1480,13 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
fmt.Sprintf("%v", arw.Processor.DatasourceId()),
fmt.Sprintf("%v", i),
).Set(-2)
continue
return points, recoverPoints, fmt.Errorf("rule_eval:%d datasource:%d not exists", rule.Id, dsId)
}
ctx := context.WithValue(context.Background(), "delay", int64(rule.Delay))
series, err := plug.QueryData(ctx, query)
arw.Processor.Stats.CounterQueryDataTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId)).Inc()
arw.Processor.Stats.CounterQueryDataTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId), fmt.Sprintf("%d", rule.Id)).Inc()
if err != nil {
logger.Warningf("rule_eval rid:%d query data error: %v", rule.Id, err)
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_CLIENT, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
@@ -1466,7 +1496,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
fmt.Sprintf("%v", i),
).Set(-1)
continue
return points, recoverPoints, fmt.Errorf("rule_eval:%d query data error: %v", rule.Id, err)
}
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
@@ -1500,6 +1530,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
for _, query := range ruleQuery.Queries {
ref, unit, err := GetQueryRefAndUnit(query)
if err != nil {
logger.Warningf("rule_eval rid:%d query:%+v get ref and unit error:%s", rule.Id, query, err.Error())
continue
}
unitMap[ref] = unit
@@ -1579,6 +1610,11 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
}
}
queries := ruleQuery.Queries
if sample.Query != "" {
queries = []interface{}{sample.Query}
}
point := models.AnomalyPoint{
Key: sample.MetricName(),
Labels: sample.Metric,
@@ -1587,7 +1623,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
Values: values,
Severity: trigger.Severity,
Triggered: isTriggered,
Query: fmt.Sprintf("query:%+v trigger:%+v", ruleQuery.Queries, trigger),
Query: fmt.Sprintf("query:%+v trigger:%+v", queries, trigger),
RecoverConfig: trigger.RecoverConfig,
ValuesUnit: valuesUnitMap,
}
@@ -1661,5 +1697,5 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
}
}
return points, recoverPoints
return points, recoverPoints, nil
}

View File

@@ -9,6 +9,7 @@ import (
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/pkg/errors"
"github.com/toolkits/pkg/logger"
)
@@ -44,6 +45,12 @@ func TimeSpanMuteStrategy(rule *models.AlertRule, event *models.AlertCurEvent) b
triggerTime := tm.Format("15:04")
triggerWeek := strconv.Itoa(int(tm.Weekday()))
if rule.EnableDaysOfWeek == "" {
// 如果规则没有配置生效时间,则默认全天生效
return false
}
enableStime := strings.Fields(rule.EnableStime)
enableEtime := strings.Fields(rule.EnableEtime)
enableDaysOfWeek := strings.Split(rule.EnableDaysOfWeek, ";")
@@ -129,7 +136,8 @@ func EventMuteStrategy(event *models.AlertCurEvent, alertMuteCache *memsto.Alert
}
for i := 0; i < len(mutes); i++ {
if matchMute(event, mutes[i]) {
matched, _ := MatchMute(event, mutes[i])
if matched {
return true, mutes[i].Id
}
}
@@ -137,14 +145,10 @@ func EventMuteStrategy(event *models.AlertCurEvent, alertMuteCache *memsto.Alert
return false, 0
}
// matchMute 如果传入了clock这个可选参数就表示使用这个clock表示的时间否则就从event的字段中取TriggerTime
func matchMute(event *models.AlertCurEvent, mute *models.AlertMute, clock ...int64) bool {
// MatchMute 如果传入了clock这个可选参数就表示使用这个clock表示的时间否则就从event的字段中取TriggerTime
func MatchMute(event *models.AlertCurEvent, mute *models.AlertMute, clock ...int64) (bool, error) {
if mute.Disabled == 1 {
return false
}
ts := event.TriggerTime
if len(clock) > 0 {
ts = clock[0]
return false, errors.New("mute is disabled")
}
// 如果不是全局的,判断 匹配的 datasource id
@@ -156,42 +160,26 @@ func matchMute(event *models.AlertCurEvent, mute *models.AlertMute, clock ...int
// 判断 event.datasourceId 是否包含在 idm 中
if _, has := idm[event.DatasourceId]; !has {
return false
return false, errors.New("datasource id not match")
}
}
var matchTime bool
if mute.MuteTimeType == models.TimeRange {
if ts < mute.Btime || ts > mute.Etime {
return false
if !mute.IsWithinTimeRange(event.TriggerTime) {
return false, errors.New("event trigger time not within mute time range")
}
matchTime = true
} else if mute.MuteTimeType == models.Periodic {
tm := time.Unix(event.TriggerTime, 0)
triggerTime := tm.Format("15:04")
triggerWeek := strconv.Itoa(int(tm.Weekday()))
for i := 0; i < len(mute.PeriodicMutesJson); i++ {
if strings.Contains(mute.PeriodicMutesJson[i].EnableDaysOfWeek, triggerWeek) {
if mute.PeriodicMutesJson[i].EnableStime == mute.PeriodicMutesJson[i].EnableEtime || (mute.PeriodicMutesJson[i].EnableStime == "00:00" && mute.PeriodicMutesJson[i].EnableEtime == "23:59") {
matchTime = true
break
} else if mute.PeriodicMutesJson[i].EnableStime < mute.PeriodicMutesJson[i].EnableEtime {
if triggerTime >= mute.PeriodicMutesJson[i].EnableStime && triggerTime < mute.PeriodicMutesJson[i].EnableEtime {
matchTime = true
break
}
} else {
if triggerTime >= mute.PeriodicMutesJson[i].EnableStime || triggerTime < mute.PeriodicMutesJson[i].EnableEtime {
matchTime = true
break
}
}
}
ts := event.TriggerTime
if len(clock) > 0 {
ts = clock[0]
}
}
if !matchTime {
return false
if !mute.IsWithinPeriodicMute(ts) {
return false, errors.New("event trigger time not within periodic mute range")
}
} else {
logger.Warningf("mute time type invalid, %d", mute.MuteTimeType)
return false, errors.New("mute time type invalid")
}
var matchSeverity bool
@@ -207,12 +195,14 @@ func matchMute(event *models.AlertCurEvent, mute *models.AlertMute, clock ...int
}
if !matchSeverity {
return false
return false, errors.New("event severity not match mute severity")
}
if mute.ITags == nil || len(mute.ITags) == 0 {
return true
return true, nil
}
return common.MatchTags(event.TagsMap, mute.ITags)
if !common.MatchTags(event.TagsMap, mute.ITags) {
return false, errors.New("event tags not match mute tags")
}
return true, nil
}

View File

@@ -0,0 +1,12 @@
package pipeline
import (
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/aisummary"
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/callback"
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/eventdrop"
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/eventupdate"
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/relabel"
)
func Init() {
}

View File

@@ -0,0 +1,244 @@
package aisummary
import (
"bytes"
"crypto/tls"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"strconv"
"strings"
"text/template"
"time"
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/callback"
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/common"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/tplx"
)
const (
HTTP_STATUS_SUCCESS_MAX = 299
)
// AISummaryConfig 配置结构体
type AISummaryConfig struct {
callback.HTTPConfig
ModelName string `json:"model_name"`
APIKey string `json:"api_key"`
PromptTemplate string `json:"prompt_template"`
CustomParams map[string]interface{} `json:"custom_params"`
}
type Message struct {
Role string `json:"role"`
Content string `json:"content"`
}
type ChatCompletionResponse struct {
Choices []struct {
Message struct {
Content string `json:"content"`
} `json:"message"`
} `json:"choices"`
}
func init() {
models.RegisterProcessor("ai_summary", &AISummaryConfig{})
}
func (c *AISummaryConfig) Init(settings interface{}) (models.Processor, error) {
result, err := common.InitProcessor[*AISummaryConfig](settings)
return result, err
}
func (c *AISummaryConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) (*models.AlertCurEvent, string, error) {
if c.Client == nil {
if err := c.initHTTPClient(); err != nil {
return event, "", fmt.Errorf("failed to initialize HTTP client: %v processor: %v", err, c)
}
}
// 准备告警事件信息
eventInfo, err := c.prepareEventInfo(event)
if err != nil {
return event, "", fmt.Errorf("failed to prepare event info: %v processor: %v", err, c)
}
// 调用AI模型生成总结
summary, err := c.generateAISummary(eventInfo)
if err != nil {
return event, "", fmt.Errorf("failed to generate AI summary: %v processor: %v", err, c)
}
// 将总结添加到annotations字段
if event.AnnotationsJSON == nil {
event.AnnotationsJSON = make(map[string]string)
}
event.AnnotationsJSON["ai_summary"] = summary
// 更新Annotations字段
b, err := json.Marshal(event.AnnotationsJSON)
if err != nil {
return event, "", fmt.Errorf("failed to marshal annotations: %v processor: %v", err, c)
}
event.Annotations = string(b)
return event, "", nil
}
func (c *AISummaryConfig) initHTTPClient() error {
transport := &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: c.SkipSSLVerify},
}
if c.Proxy != "" {
proxyURL, err := url.Parse(c.Proxy)
if err != nil {
return fmt.Errorf("failed to parse proxy url: %v", err)
}
transport.Proxy = http.ProxyURL(proxyURL)
}
c.Client = &http.Client{
Timeout: time.Duration(c.Timeout) * time.Millisecond,
Transport: transport,
}
return nil
}
func (c *AISummaryConfig) prepareEventInfo(event *models.AlertCurEvent) (string, error) {
var defs = []string{
"{{$event := .}}",
}
text := strings.Join(append(defs, c.PromptTemplate), "")
t, err := template.New("prompt").Funcs(template.FuncMap(tplx.TemplateFuncMap)).Parse(text)
if err != nil {
return "", fmt.Errorf("failed to parse prompt template: %v", err)
}
var body bytes.Buffer
err = t.Execute(&body, event)
if err != nil {
return "", fmt.Errorf("failed to execute prompt template: %v", err)
}
return body.String(), nil
}
func (c *AISummaryConfig) generateAISummary(eventInfo string) (string, error) {
// 构建基础请求参数
reqParams := map[string]interface{}{
"model": c.ModelName,
"messages": []Message{
{
Role: "user",
Content: eventInfo,
},
},
}
// 合并自定义参数
for k, v := range c.CustomParams {
converted, err := convertCustomParam(v)
if err != nil {
return "", fmt.Errorf("failed to convert custom param %s: %v", k, err)
}
reqParams[k] = converted
}
// 序列化请求体
jsonData, err := json.Marshal(reqParams)
if err != nil {
return "", fmt.Errorf("failed to marshal request body: %v", err)
}
// 创建HTTP请求
req, err := http.NewRequest("POST", c.URL, bytes.NewBuffer(jsonData))
if err != nil {
return "", fmt.Errorf("failed to create request: %v", err)
}
// 设置请求头
req.Header.Set("Authorization", "Bearer "+c.APIKey)
req.Header.Set("Content-Type", "application/json")
for k, v := range c.Headers {
req.Header.Set(k, v)
}
// 发送请求
resp, err := c.Client.Do(req)
if err != nil {
return "", fmt.Errorf("failed to send request: %v", err)
}
defer resp.Body.Close()
// 检查响应状态码
if resp.StatusCode > HTTP_STATUS_SUCCESS_MAX {
body, _ := io.ReadAll(resp.Body)
return "", fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body))
}
// 读取响应
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", fmt.Errorf("failed to read response body: %v", err)
}
// 解析响应
var chatResp ChatCompletionResponse
if err := json.Unmarshal(body, &chatResp); err != nil {
return "", fmt.Errorf("failed to unmarshal response: %v", err)
}
if len(chatResp.Choices) == 0 {
return "", fmt.Errorf("no response from AI model")
}
return chatResp.Choices[0].Message.Content, nil
}
// convertCustomParam 将前端传入的参数转换为正确的类型
func convertCustomParam(value interface{}) (interface{}, error) {
if value == nil {
return nil, nil
}
// 如果是字符串,尝试转换为其他类型
if str, ok := value.(string); ok {
// 尝试转换为数字
if f, err := strconv.ParseFloat(str, 64); err == nil {
// 检查是否为整数
if f == float64(int64(f)) {
return int64(f), nil
}
return f, nil
}
// 尝试转换为布尔值
if b, err := strconv.ParseBool(str); err == nil {
return b, nil
}
// 尝试解析为JSON数组
if strings.HasPrefix(strings.TrimSpace(str), "[") {
var arr []interface{}
if err := json.Unmarshal([]byte(str), &arr); err == nil {
return arr, nil
}
}
// 尝试解析为JSON对象
if strings.HasPrefix(strings.TrimSpace(str), "{") {
var obj map[string]interface{}
if err := json.Unmarshal([]byte(str), &obj); err == nil {
return obj, nil
}
}
}
return value, nil
}

View File

@@ -0,0 +1,139 @@
package aisummary
import (
"testing"
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/callback"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/stretchr/testify/assert"
)
func TestAISummaryConfig_Process(t *testing.T) {
// 创建测试配置
config := &AISummaryConfig{
HTTPConfig: callback.HTTPConfig{
URL: "https://generativelanguage.googleapis.com/v1beta/openai/chat/completions",
Timeout: 30000,
SkipSSLVerify: true,
Headers: map[string]string{
"Content-Type": "application/json",
},
},
ModelName: "gemini-2.0-flash",
APIKey: "*",
PromptTemplate: "告警规则:{{$event.RuleName}}\n严重程度{{$event.Severity}}",
CustomParams: map[string]interface{}{
"temperature": 0.7,
"max_tokens": 2000,
"top_p": 0.9,
},
}
// 创建测试事件
event := &models.AlertCurEvent{
RuleName: "Test Rule",
Severity: 1,
TagsMap: map[string]string{
"host": "test-host",
},
AnnotationsJSON: map[string]string{
"description": "Test alert",
},
}
// 测试模板处理
eventInfo, err := config.prepareEventInfo(event)
assert.NoError(t, err)
assert.Contains(t, eventInfo, "Test Rule")
assert.Contains(t, eventInfo, "1")
// 测试配置初始化
processor, err := config.Init(config)
assert.NoError(t, err)
assert.NotNil(t, processor)
// 测试处理函数
result, _, err := processor.Process(&ctx.Context{}, event)
assert.NoError(t, err)
assert.NotNil(t, result)
assert.NotEmpty(t, result.AnnotationsJSON["ai_summary"])
// 展示处理结果
t.Log("\n=== 处理结果 ===")
t.Logf("告警规则: %s", result.RuleName)
t.Logf("严重程度: %d", result.Severity)
t.Logf("标签: %v", result.TagsMap)
t.Logf("原始注释: %v", result.AnnotationsJSON["description"])
t.Logf("AI总结: %s", result.AnnotationsJSON["ai_summary"])
}
func TestConvertCustomParam(t *testing.T) {
tests := []struct {
name string
input interface{}
expected interface{}
hasError bool
}{
{
name: "nil value",
input: nil,
expected: nil,
hasError: false,
},
{
name: "string number to int64",
input: "123",
expected: int64(123),
hasError: false,
},
{
name: "string float to float64",
input: "123.45",
expected: 123.45,
hasError: false,
},
{
name: "string boolean to bool",
input: "true",
expected: true,
hasError: false,
},
{
name: "string false to bool",
input: "false",
expected: false,
hasError: false,
},
{
name: "JSON array string to slice",
input: `["a", "b", "c"]`,
expected: []interface{}{"a", "b", "c"},
hasError: false,
},
{
name: "JSON object string to map",
input: `{"key": "value", "num": 123}`,
expected: map[string]interface{}{"key": "value", "num": float64(123)},
hasError: false,
},
{
name: "plain string remains string",
input: "hello world",
expected: "hello world",
hasError: false,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
converted, err := convertCustomParam(test.input)
if test.hasError {
assert.Error(t, err)
return
}
assert.NoError(t, err)
assert.Equal(t, test.expected, converted)
})
}
}

View File

@@ -0,0 +1,103 @@
package callback
import (
"crypto/tls"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/common"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/toolkits/pkg/logger"
)
type HTTPConfig struct {
URL string `json:"url"`
Method string `json:"method,omitempty"`
Body string `json:"body,omitempty"`
Headers map[string]string `json:"header"`
AuthUsername string `json:"auth_username"`
AuthPassword string `json:"auth_password"`
Timeout int `json:"timeout"` // 单位:ms
SkipSSLVerify bool `json:"skip_ssl_verify"`
Proxy string `json:"proxy"`
Client *http.Client `json:"-"`
}
// RelabelConfig
type CallbackConfig struct {
HTTPConfig
}
func init() {
models.RegisterProcessor("callback", &CallbackConfig{})
}
func (c *CallbackConfig) Init(settings interface{}) (models.Processor, error) {
result, err := common.InitProcessor[*CallbackConfig](settings)
return result, err
}
func (c *CallbackConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) (*models.AlertCurEvent, string, error) {
if c.Client == nil {
transport := &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: c.SkipSSLVerify},
}
if c.Proxy != "" {
proxyURL, err := url.Parse(c.Proxy)
if err != nil {
return event, "", fmt.Errorf("failed to parse proxy url: %v processor: %v", err, c)
} else {
transport.Proxy = http.ProxyURL(proxyURL)
}
}
c.Client = &http.Client{
Timeout: time.Duration(c.Timeout) * time.Millisecond,
Transport: transport,
}
}
headers := make(map[string]string)
headers["Content-Type"] = "application/json"
for k, v := range c.Headers {
headers[k] = v
}
body, err := json.Marshal(event)
if err != nil {
return event, "", fmt.Errorf("failed to marshal event: %v processor: %v", err, c)
}
req, err := http.NewRequest("POST", c.URL, strings.NewReader(string(body)))
if err != nil {
return event, "", fmt.Errorf("failed to create request: %v processor: %v", err, c)
}
for k, v := range headers {
req.Header.Set(k, v)
}
if c.AuthUsername != "" && c.AuthPassword != "" {
req.SetBasicAuth(c.AuthUsername, c.AuthPassword)
}
resp, err := c.Client.Do(req)
if err != nil {
return event, "", fmt.Errorf("failed to send request: %v processor: %v", err, c)
}
b, err := io.ReadAll(resp.Body)
if err != nil {
return event, "", fmt.Errorf("failed to read response body: %v processor: %v", err, c)
}
logger.Debugf("callback processor response body: %s", string(b))
return event, "callback success", nil
}

View File

@@ -0,0 +1,24 @@
package common
import (
"encoding/json"
)
// InitProcessor 是一个通用的初始化处理器的方法
// 使用泛型简化处理器初始化逻辑
// T 必须是 models.Processor 接口的实现
func InitProcessor[T any](settings interface{}) (T, error) {
var zero T
b, err := json.Marshal(settings)
if err != nil {
return zero, err
}
var result T
err = json.Unmarshal(b, &result)
if err != nil {
return zero, err
}
return result, nil
}

View File

@@ -0,0 +1,60 @@
package eventdrop
import (
"bytes"
"fmt"
"strings"
texttemplate "text/template"
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/common"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/tplx"
"github.com/toolkits/pkg/logger"
)
type EventDropConfig struct {
Content string `json:"content"`
}
func init() {
models.RegisterProcessor("event_drop", &EventDropConfig{})
}
func (c *EventDropConfig) Init(settings interface{}) (models.Processor, error) {
result, err := common.InitProcessor[*EventDropConfig](settings)
return result, err
}
func (c *EventDropConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) (*models.AlertCurEvent, string, error) {
// 使用背景是可以根据此处理器,实现对事件进行更加灵活的过滤的逻辑
// 在标签过滤和属性过滤都不满足需求时可以使用
// 如果模板执行结果为 true则删除该事件
var defs = []string{
"{{ $event := . }}",
"{{ $labels := .TagsMap }}",
"{{ $value := .TriggerValue }}",
}
text := strings.Join(append(defs, c.Content), "")
tpl, err := texttemplate.New("eventdrop").Funcs(tplx.TemplateFuncMap).Parse(text)
if err != nil {
return event, "", fmt.Errorf("processor failed to parse template: %v processor: %v", err, c)
}
var body bytes.Buffer
if err = tpl.Execute(&body, event); err != nil {
return event, "", fmt.Errorf("processor failed to execute template: %v processor: %v", err, c)
}
result := strings.TrimSpace(body.String())
logger.Infof("processor eventdrop result: %v", result)
if result == "true" {
logger.Infof("processor eventdrop drop event: %v", event)
return nil, "drop event success", nil
}
return event, "drop event failed", nil
}

View File

@@ -0,0 +1,96 @@
package eventupdate
import (
"crypto/tls"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/callback"
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/common"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/toolkits/pkg/logger"
)
// RelabelConfig
type EventUpdateConfig struct {
callback.HTTPConfig
}
func init() {
models.RegisterProcessor("event_update", &EventUpdateConfig{})
}
func (c *EventUpdateConfig) Init(settings interface{}) (models.Processor, error) {
result, err := common.InitProcessor[*EventUpdateConfig](settings)
return result, err
}
func (c *EventUpdateConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) (*models.AlertCurEvent, string, error) {
if c.Client == nil {
transport := &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: c.SkipSSLVerify},
}
if c.Proxy != "" {
proxyURL, err := url.Parse(c.Proxy)
if err != nil {
return event, "", fmt.Errorf("failed to parse proxy url: %v processor: %v", err, c)
} else {
transport.Proxy = http.ProxyURL(proxyURL)
}
}
c.Client = &http.Client{
Timeout: time.Duration(c.Timeout) * time.Millisecond,
Transport: transport,
}
}
headers := make(map[string]string)
headers["Content-Type"] = "application/json"
for k, v := range c.Headers {
headers[k] = v
}
body, err := json.Marshal(event)
if err != nil {
return event, "", fmt.Errorf("failed to marshal event: %v processor: %v", err, c)
}
req, err := http.NewRequest("POST", c.URL, strings.NewReader(string(body)))
if err != nil {
return event, "", fmt.Errorf("failed to create request: %v processor: %v", err, c)
}
for k, v := range headers {
req.Header.Set(k, v)
}
if c.AuthUsername != "" && c.AuthPassword != "" {
req.SetBasicAuth(c.AuthUsername, c.AuthPassword)
}
resp, err := c.Client.Do(req)
if err != nil {
return event, "", fmt.Errorf("failed to send request: %v processor: %v", err, c)
}
b, err := io.ReadAll(resp.Body)
if err != nil {
return nil, "", fmt.Errorf("failed to read response body: %v processor: %v", err, c)
}
logger.Debugf("event update processor response body: %s", string(b))
err = json.Unmarshal(b, &event)
if err != nil {
return event, "", fmt.Errorf("failed to unmarshal response body: %v processor: %v", err, c)
}
return event, "", nil
}

View File

@@ -0,0 +1,107 @@
package relabel
import (
"fmt"
"regexp"
"strings"
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/common"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pushgw/pconf"
"github.com/ccfos/nightingale/v6/pushgw/writer"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/prompb"
)
const (
REPLACE_DOT = "___"
)
// RelabelConfig
type RelabelConfig struct {
SourceLabels []string `json:"source_labels"`
Separator string `json:"separator"`
Regex string `json:"regex"`
RegexCompiled *regexp.Regexp
If string `json:"if"`
IfRegex *regexp.Regexp
Modulus uint64 `json:"modulus"`
TargetLabel string `json:"target_label"`
Replacement string `json:"replacement"`
Action string `json:"action"`
}
func init() {
models.RegisterProcessor("relabel", &RelabelConfig{})
}
func (r *RelabelConfig) Init(settings interface{}) (models.Processor, error) {
result, err := common.InitProcessor[*RelabelConfig](settings)
return result, err
}
func (r *RelabelConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) (*models.AlertCurEvent, string, error) {
sourceLabels := make([]model.LabelName, len(r.SourceLabels))
for i := range r.SourceLabels {
sourceLabels[i] = model.LabelName(strings.ReplaceAll(r.SourceLabels[i], ".", REPLACE_DOT))
}
relabelConfigs := []*pconf.RelabelConfig{
{
SourceLabels: sourceLabels,
Separator: r.Separator,
Regex: r.Regex,
RegexCompiled: r.RegexCompiled,
If: r.If,
IfRegex: r.IfRegex,
Modulus: r.Modulus,
TargetLabel: r.TargetLabel,
Replacement: r.Replacement,
Action: r.Action,
},
}
EventRelabel(event, relabelConfigs)
return event, "", nil
}
func EventRelabel(event *models.AlertCurEvent, relabelConfigs []*pconf.RelabelConfig) {
labels := make([]prompb.Label, len(event.TagsJSON))
event.OriginalTagsJSON = make([]string, len(event.TagsJSON))
for i, tag := range event.TagsJSON {
label := strings.SplitN(tag, "=", 2)
if len(label) != 2 {
continue
}
event.OriginalTagsJSON[i] = tag
label[0] = strings.ReplaceAll(string(label[0]), ".", REPLACE_DOT)
labels[i] = prompb.Label{Name: label[0], Value: label[1]}
}
for i := 0; i < len(relabelConfigs); i++ {
if relabelConfigs[i].Replacement == "" {
relabelConfigs[i].Replacement = "$1"
}
if relabelConfigs[i].Separator == "" {
relabelConfigs[i].Separator = ";"
}
if relabelConfigs[i].Regex == "" {
relabelConfigs[i].Regex = "(.*)"
}
}
gotLabels := writer.Process(labels, relabelConfigs...)
event.TagsJSON = make([]string, len(gotLabels))
event.TagsMap = make(map[string]string, len(gotLabels))
for i, label := range gotLabels {
label.Name = strings.ReplaceAll(string(label.Name), REPLACE_DOT, ".")
event.TagsJSON[i] = fmt.Sprintf("%s=%s", label.Name, label.Value)
event.TagsMap[label.Name] = label.Value
}
event.Tags = strings.Join(event.TagsJSON, ",,")
}

View File

@@ -14,14 +14,13 @@ import (
"github.com/ccfos/nightingale/v6/alert/common"
"github.com/ccfos/nightingale/v6/alert/dispatch"
"github.com/ccfos/nightingale/v6/alert/mute"
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/relabel"
"github.com/ccfos/nightingale/v6/alert/queue"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/tplx"
"github.com/ccfos/nightingale/v6/pushgw/writer"
"github.com/prometheus/prometheus/prompb"
"github.com/robfig/cron/v3"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/str"
@@ -61,11 +60,9 @@ type Processor struct {
pendingsUseByRecover *AlertCurEventMap
inhibit bool
tagsMap map[string]string
tagsArr []string
target string
targetNote string
groupName string
tagsMap map[string]string
tagsArr []string
groupName string
alertRuleCache *memsto.AlertRuleCacheType
TargetCache *memsto.TargetCacheType
@@ -154,7 +151,7 @@ func (p *Processor) Handle(anomalyPoints []models.AnomalyPoint, from string, inh
eventsMap := make(map[string][]*models.AlertCurEvent)
for _, anomalyPoint := range anomalyPoints {
event := p.BuildEvent(anomalyPoint, from, now, ruleHash)
event.NotifyRuleIDs = cachedRule.NotifyRuleIds
event.NotifyRuleIds = cachedRule.NotifyRuleIds
// 如果 event 被 mute 了,本质也是 fire 的状态,这里无论如何都添加到 alertingKeys 中,防止 fire 的事件自动恢复了
hash := event.Hash
alertingKeys[hash] = struct{}{}
@@ -196,7 +193,7 @@ func (p *Processor) Handle(anomalyPoints []models.AnomalyPoint, from string, inh
func (p *Processor) BuildEvent(anomalyPoint models.AnomalyPoint, from string, now int64, ruleHash string) *models.AlertCurEvent {
p.fillTags(anomalyPoint)
p.mayHandleIdent()
hash := Hash(p.rule.Id, p.datasourceId, anomalyPoint)
ds := p.datasourceCache.GetById(p.datasourceId)
var dsName string
@@ -216,8 +213,6 @@ func (p *Processor) BuildEvent(anomalyPoint models.AnomalyPoint, from string, no
event.DatasourceId = p.datasourceId
event.Cluster = dsName
event.Hash = hash
event.TargetIdent = p.target
event.TargetNote = p.targetNote
event.TriggerValue = anomalyPoint.ReadableValue()
event.TriggerValues = anomalyPoint.Values
event.TriggerValuesJson = models.EventTriggerValues{ValuesWithUnit: anomalyPoint.ValuesUnit}
@@ -249,15 +244,6 @@ func (p *Processor) BuildEvent(anomalyPoint models.AnomalyPoint, from string, no
logger.Warningf("unmarshal annotations json failed: %v, rule: %d", err, p.rule.Id)
}
if p.target != "" {
if pt, exist := p.TargetCache.Get(p.target); exist {
pt.GroupNames = p.BusiGroupCache.GetNamesByBusiGroupIds(pt.GroupIds)
event.Target = pt
} else {
logger.Infof("Target[ident: %s] doesn't exist in cache.", p.target)
}
}
if event.TriggerValues != "" && strings.Count(event.TriggerValues, "$") > 1 {
// TriggerValues 有多个变量,将多个变量都放到 TriggerValue 中
event.TriggerValue = event.TriggerValues
@@ -271,6 +257,19 @@ func (p *Processor) BuildEvent(anomalyPoint models.AnomalyPoint, from string, no
// 生成事件之后,立马进程 relabel 处理
Relabel(p.rule, event)
// 放到 Relabel(p.rule, event) 下面,为了处理 relabel 之后,标签里才出现 ident 的情况
p.mayHandleIdent(event)
if event.TargetIdent != "" {
if pt, exist := p.TargetCache.Get(event.TargetIdent); exist {
pt.GroupNames = p.BusiGroupCache.GetNamesByBusiGroupIds(pt.GroupIds)
event.Target = pt
} else {
logger.Infof("fill event target error, ident: %s doesn't exist in cache.", event.TargetIdent)
}
}
return event
}
@@ -279,44 +278,15 @@ func Relabel(rule *models.AlertRule, event *models.AlertCurEvent) {
return
}
// need to keep the original label
event.OriginalTags = event.Tags
event.OriginalTagsJSON = event.TagsJSON
if len(rule.EventRelabelConfig) == 0 {
return
}
// need to keep the original label
event.OriginalTags = event.Tags
event.OriginalTagsJSON = make([]string, len(event.TagsJSON))
labels := make([]prompb.Label, len(event.TagsJSON))
for i, tag := range event.TagsJSON {
label := strings.SplitN(tag, "=", 2)
event.OriginalTagsJSON[i] = tag
labels[i] = prompb.Label{Name: label[0], Value: label[1]}
}
for i := 0; i < len(rule.EventRelabelConfig); i++ {
if rule.EventRelabelConfig[i].Replacement == "" {
rule.EventRelabelConfig[i].Replacement = "$1"
}
if rule.EventRelabelConfig[i].Separator == "" {
rule.EventRelabelConfig[i].Separator = ";"
}
if rule.EventRelabelConfig[i].Regex == "" {
rule.EventRelabelConfig[i].Regex = "(.*)"
}
}
// relabel process
relabels := writer.Process(labels, rule.EventRelabelConfig...)
event.TagsJSON = make([]string, len(relabels))
event.TagsMap = make(map[string]string, len(relabels))
for i, label := range relabels {
event.TagsJSON[i] = fmt.Sprintf("%s=%s", label.Name, label.Value)
event.TagsMap[label.Name] = label.Value
}
event.Tags = strings.Join(event.TagsJSON, ",,")
relabel.EventRelabel(event, rule.EventRelabelConfig)
}
func (p *Processor) HandleRecover(alertingKeys map[string]struct{}, now int64, inhibit bool) {
@@ -436,8 +406,8 @@ func (p *Processor) RecoverSingle(byRecover bool, hash string, now int64, value
func (p *Processor) handleEvent(events []*models.AlertCurEvent) {
var fireEvents []*models.AlertCurEvent
// severity 初始为 4, 一定为遇到比自己优先级高的事件
severity := 4
// severity 初始为最低优先级, 一定为遇到比自己优先级高的事件
severity := models.SeverityLowest
for _, event := range events {
if event == nil {
continue
@@ -458,17 +428,18 @@ func (p *Processor) handleEvent(events []*models.AlertCurEvent) {
continue
}
var preTriggerTime int64 // 第一个 pending event 的触发时间
var preEvalTime int64 // 第一个 pending event 的检测时间
preEvent, has := p.pendings.Get(event.Hash)
if has {
p.pendings.UpdateLastEvalTime(event.Hash, event.LastEvalTime)
preTriggerTime = preEvent.TriggerTime
preEvalTime = preEvent.FirstEvalTime
} else {
event.FirstEvalTime = event.LastEvalTime
p.pendings.Set(event.Hash, event)
preTriggerTime = event.TriggerTime
preEvalTime = event.FirstEvalTime
}
if event.LastEvalTime-preTriggerTime+int64(event.PromEvalInterval) >= int64(p.rule.PromForDuration) {
if event.LastEvalTime-preEvalTime+int64(event.PromEvalInterval) >= int64(p.rule.PromForDuration) {
fireEvents = append(fireEvents, event)
if severity > event.Severity {
severity = event.Severity
@@ -497,16 +468,18 @@ func (p *Processor) fireEvent(event *models.AlertCurEvent) {
return
}
logger.Debugf("rule_eval:%s event:%+v fire", p.Key(), event)
message := "unknown"
defer func() {
logger.Infof("rule_eval:%s event-hash-%s %s", p.Key(), event.Hash, message)
}()
if fired, has := p.fires.Get(event.Hash); has {
p.fires.UpdateLastEvalTime(event.Hash, event.LastEvalTime)
event.FirstTriggerTime = fired.FirstTriggerTime
p.HandleFireEventHook(event)
if cachedRule.NotifyRepeatStep == 0 {
logger.Debugf("rule_eval:%s event:%+v repeat is zero nothing to do", p.Key(), event)
// 说明不想重复通知那就直接返回了nothing to do
// do not need to send alert again
message = "stalled, rule.notify_repeat_step is 0, no need to repeat notify"
return
}
@@ -515,21 +488,26 @@ func (p *Processor) fireEvent(event *models.AlertCurEvent) {
if cachedRule.NotifyMaxNumber == 0 {
// 最大可以发送次数如果是0表示不想限制最大发送次数一直发即可
event.NotifyCurNumber = fired.NotifyCurNumber + 1
message = fmt.Sprintf("fired, notify_repeat_step_matched(%d >= %d + %d * 60) notify_max_number_ignore(#%d / %d)", event.LastEvalTime, fired.LastSentTime, cachedRule.NotifyRepeatStep, event.NotifyCurNumber, cachedRule.NotifyMaxNumber)
p.pushEventToQueue(event)
} else {
// 有最大发送次数的限制,就要看已经发了几次了,是否达到了最大发送次数
if fired.NotifyCurNumber >= cachedRule.NotifyMaxNumber {
logger.Debugf("rule_eval:%s event:%+v reach max number", p.Key(), event)
message = fmt.Sprintf("stalled, notify_repeat_step_matched(%d >= %d + %d * 60) notify_max_number_not_matched(#%d / %d)", event.LastEvalTime, fired.LastSentTime, cachedRule.NotifyRepeatStep, fired.NotifyCurNumber, cachedRule.NotifyMaxNumber)
return
} else {
event.NotifyCurNumber = fired.NotifyCurNumber + 1
message = fmt.Sprintf("fired, notify_repeat_step_matched(%d >= %d + %d * 60) notify_max_number_matched(#%d / %d)", event.LastEvalTime, fired.LastSentTime, cachedRule.NotifyRepeatStep, event.NotifyCurNumber, cachedRule.NotifyMaxNumber)
p.pushEventToQueue(event)
}
}
} else {
message = fmt.Sprintf("stalled, notify_repeat_step_not_matched(%d < %d + %d * 60)", event.LastEvalTime, fired.LastSentTime, cachedRule.NotifyRepeatStep)
}
} else {
event.NotifyCurNumber = 1
event.FirstTriggerTime = event.TriggerTime
message = fmt.Sprintf("fired, first_trigger_time: %d", event.FirstTriggerTime)
p.HandleFireEventHook(event)
p.pushEventToQueue(event)
}
@@ -567,7 +545,7 @@ func (p *Processor) RecoverAlertCurEventFromDb() {
if alertRule == nil {
continue
}
event.NotifyRuleIDs = alertRule.NotifyRuleIds
event.NotifyRuleIds = alertRule.NotifyRuleIds
if event.Cate == models.HOST {
target, exists := p.TargetCache.Get(event.TargetIdent)
@@ -607,7 +585,9 @@ func (p *Processor) fillTags(anomalyPoint models.AnomalyPoint) {
}
// handle rule tags
for _, tag := range p.rule.AppendTagsJSON {
tags := p.rule.AppendTagsJSON
tags = append(tags, "rulename="+p.rule.Name)
for _, tag := range tags {
arr := strings.SplitN(tag, "=", 2)
var defs = []string{
@@ -633,27 +613,25 @@ func (p *Processor) fillTags(anomalyPoint models.AnomalyPoint) {
tagsMap[arr[0]] = body.String()
}
tagsMap["rulename"] = p.rule.Name
p.tagsMap = tagsMap
// handle tagsArr
p.tagsArr = labelMapToArr(tagsMap)
}
func (p *Processor) mayHandleIdent() {
func (p *Processor) mayHandleIdent(event *models.AlertCurEvent) {
// handle ident
if ident, has := p.tagsMap["ident"]; has {
if ident, has := event.TagsMap["ident"]; has {
if target, exists := p.TargetCache.Get(ident); exists {
p.target = target.Ident
p.targetNote = target.Note
event.TargetIdent = target.Ident
event.TargetNote = target.Note
} else {
p.target = ident
p.targetNote = ""
event.TargetIdent = ident
event.TargetNote = ""
}
} else {
p.target = ""
p.targetNote = ""
event.TargetIdent = ""
event.TargetNote = ""
}
}

View File

@@ -39,7 +39,7 @@ func NewRecordRuleContext(rule *models.RecordingRule, datasourceId int64, promCl
rule.CronPattern = fmt.Sprintf("@every %ds", rule.PromEvalInterval)
}
rrc.scheduler = cron.New(cron.WithSeconds())
rrc.scheduler = cron.New(cron.WithSeconds(), cron.WithChain(cron.SkipIfStillRunning(cron.DefaultLogger)))
_, err := rrc.scheduler.AddFunc(rule.CronPattern, func() {
rrc.Eval()
})
@@ -56,12 +56,13 @@ func (rrc *RecordRuleContext) Key() string {
}
func (rrc *RecordRuleContext) Hash() string {
return str.MD5(fmt.Sprintf("%d_%s_%s_%d_%s",
return str.MD5(fmt.Sprintf("%d_%s_%s_%d_%s_%s",
rrc.rule.Id,
rrc.rule.CronPattern,
rrc.rule.PromQl,
rrc.datasourceId,
rrc.rule.AppendTags,
rrc.rule.Name,
))
}

View File

@@ -1,6 +1,7 @@
package sender
import (
"fmt"
"html/template"
"net/url"
"strings"
@@ -134,7 +135,9 @@ func (c *DefaultCallBacker) CallBack(ctx CallBackContext) {
func doSendAndRecord(ctx *ctx.Context, url, token string, body interface{}, channel string,
stats *astats.Stats, events []*models.AlertCurEvent) {
start := time.Now()
res, err := doSend(url, body, channel, stats)
res = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), res)
NotifyRecord(ctx, events, 0, channel, token, res, err)
}
@@ -166,7 +169,9 @@ func NotifyRecord(ctx *ctx.Context, evts []*models.AlertCurEvent, notifyRuleID i
func doSend(url string, body interface{}, channel string, stats *astats.Stats) (string, error) {
stats.AlertNotifyTotal.WithLabelValues(channel).Inc()
start := time.Now()
res, code, err := poster.PostJSON(url, time.Second*5, body, 3)
res = []byte(fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), res))
if err != nil {
logger.Errorf("%s_sender: result=fail url=%s code=%d error=%v req:%v response=%s", channel, url, code, err, body, string(res))
stats.AlertNotifyErrorTotal.WithLabelValues(channel).Inc()

View File

@@ -141,7 +141,7 @@ func updateSmtp(ctx *ctx.Context, ncc *memsto.NotifyConfigCacheType) {
func startEmailSender(ctx *ctx.Context, smtp aconf.SMTPConfig) {
conf := smtp
if conf.Host == "" || conf.Port == 0 {
logger.Warning("SMTP configurations invalid")
logger.Debug("SMTP configurations invalid")
<-mailQuit
return
}

View File

@@ -30,12 +30,14 @@ type IbexCallBacker struct {
func (c *IbexCallBacker) CallBack(ctx CallBackContext) {
if len(ctx.CallBackURL) == 0 || len(ctx.Events) == 0 {
logger.Warningf("event_callback_ibex: url or events is empty, url: %s, events: %+v", ctx.CallBackURL, ctx.Events)
return
}
event := ctx.Events[0]
if event.IsRecovered {
logger.Infof("event_callback_ibex: event is recovered, event: %+v", event)
return
}
@@ -43,8 +45,9 @@ func (c *IbexCallBacker) CallBack(ctx CallBackContext) {
}
func (c *IbexCallBacker) handleIbex(ctx *ctx.Context, url string, event *models.AlertCurEvent) {
logger.Infof("event_callback_ibex: url: %s, event: %+v", url, event)
if imodels.DB() == nil && ctx.IsCenter {
logger.Warning("event_callback_ibex: db is nil")
logger.Warningf("event_callback_ibex: db is nil, event: %+v", event)
return
}
@@ -63,17 +66,23 @@ func (c *IbexCallBacker) handleIbex(ctx *ctx.Context, url string, event *models.
id, err := strconv.ParseInt(idstr, 10, 64)
if err != nil {
logger.Errorf("event_callback_ibex: failed to parse url: %s", url)
logger.Errorf("event_callback_ibex: failed to parse url: %s event: %+v", url, event)
return
}
if host == "" {
// 用户在callback url中没有传入host就从event中解析
host = event.TargetIdent
if host == "" {
if ident, has := event.TagsMap["ident"]; has {
host = ident
}
}
}
if host == "" {
logger.Error("event_callback_ibex: failed to get host")
logger.Errorf("event_callback_ibex: failed to get host, id: %d, event: %+v", id, event)
return
}
@@ -83,21 +92,23 @@ func (c *IbexCallBacker) handleIbex(ctx *ctx.Context, url string, event *models.
func CallIbex(ctx *ctx.Context, id int64, host string,
taskTplCache *memsto.TaskTplCache, targetCache *memsto.TargetCacheType,
userCache *memsto.UserCacheType, event *models.AlertCurEvent) {
logger.Infof("event_callback_ibex: id: %d, host: %s, event: %+v", id, host, event)
tpl := taskTplCache.Get(id)
if tpl == nil {
logger.Errorf("event_callback_ibex: no such tpl(%d)", id)
logger.Errorf("event_callback_ibex: no such tpl(%d), event: %+v", id, event)
return
}
// check perm
// tpl.GroupId - host - account 三元组校验权限
can, err := canDoIbex(tpl.UpdateBy, tpl, host, targetCache, userCache)
if err != nil {
logger.Errorf("event_callback_ibex: check perm fail: %v", err)
logger.Errorf("event_callback_ibex: check perm fail: %v, event: %+v", err, event)
return
}
if !can {
logger.Errorf("event_callback_ibex: user(%s) no permission", tpl.UpdateBy)
logger.Errorf("event_callback_ibex: user(%s) no permission, event: %+v", tpl.UpdateBy, event)
return
}
@@ -122,7 +133,7 @@ func CallIbex(ctx *ctx.Context, id int64, host string,
tags, err := json.Marshal(tagsMap)
if err != nil {
logger.Errorf("event_callback_ibex: failed to marshal tags to json: %v", tagsMap)
logger.Errorf("event_callback_ibex: failed to marshal tags to json: %v, event: %+v", tagsMap, event)
return
}
@@ -145,7 +156,7 @@ func CallIbex(ctx *ctx.Context, id int64, host string,
id, err = TaskAdd(in, tpl.UpdateBy, ctx.IsCenter)
if err != nil {
logger.Errorf("event_callback_ibex: call ibex fail: %v", err)
logger.Errorf("event_callback_ibex: call ibex fail: %v, event: %+v", err, event)
return
}
@@ -167,7 +178,7 @@ func CallIbex(ctx *ctx.Context, id int64, host string,
}
if err = record.Add(ctx); err != nil {
logger.Errorf("event_callback_ibex: persist task_record fail: %v", err)
logger.Errorf("event_callback_ibex: persist task_record fail: %v, event: %+v", err, event)
}
}
@@ -187,7 +198,7 @@ func canDoIbex(username string, tpl *models.TaskTpl, host string, targetCache *m
func TaskAdd(f models.TaskForm, authUser string, isCenter bool) (int64, error) {
if storage.Cache == nil {
logger.Warning("event_callback_ibex: redis cache is nil")
logger.Warningf("event_callback_ibex: redis cache is nil, task: %+v", f)
return 0, fmt.Errorf("redis cache is nil")
}

View File

@@ -79,6 +79,7 @@ func alertingCallScript(ctx *ctx.Context, stdinBytes []byte, notifyScript models
cmd.Stdout = &buf
cmd.Stderr = &buf
start := time.Now()
err := startCmd(cmd)
if err != nil {
logger.Errorf("event_script_notify_fail: run cmd err: %v", err)
@@ -88,6 +89,7 @@ func alertingCallScript(ctx *ctx.Context, stdinBytes []byte, notifyScript models
err, isTimeout := sys.WrapTimeout(cmd, time.Duration(config.Timeout)*time.Second)
res := buf.String()
res = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), res)
// 截断超出长度的输出
if len(res) > 512 {

View File

@@ -99,7 +99,9 @@ func SingleSendWebhooks(ctx *ctx.Context, webhooks map[string]*models.Webhook, e
for _, conf := range webhooks {
retryCount := 0
for retryCount < 3 {
start := time.Now()
needRetry, res, err := sendWebhook(conf, event, stats)
res = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), res)
NotifyRecord(ctx, []*models.AlertCurEvent{event}, 0, "webhook", conf.Url, res, err)
if !needRetry {
break
@@ -169,7 +171,9 @@ func StartConsumer(ctx *ctx.Context, queue *WebhookQueue, popSize int, webhook *
retryCount := 0
for retryCount < webhook.RetryCount {
start := time.Now()
needRetry, res, err := sendWebhook(webhook, events, stats)
res = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), res)
go NotifyRecord(ctx, events, 0, "webhook", webhook.Url, res, err)
if !needRetry {
break

View File

@@ -85,254 +85,221 @@ func MergeOperationConf() error {
const (
builtInOps = `
ops:
- name: dashboards
cname: Dashboards
ops:
- name: "/dashboards"
cname: View Dashboards
- name: "/dashboards/add"
cname: Add Dashboard
- name: "/dashboards/put"
cname: Modify Dashboard
- name: "/dashboards/del"
cname: Delete Dashboard
- name: "/embedded-dashboards/put"
cname: Modify Embedded Dashboard
- name: "/embedded-dashboards"
cname: View Embedded Dashboard
- name: "/public-dashboards"
cname: View Public Dashboard
- name: metric
cname: Time Series Metrics
ops:
- name: "/metric/explorer"
cname: View Metric Data
- name: "/object/explorer"
cname: View Object Data
- name: builtin-metrics
cname: Metric Views
ops:
- name: "/metrics-built-in"
cname: View Built-in Metrics
- name: "/builtin-metrics/add"
cname: Add Built-in Metric
- name: "/builtin-metrics/put"
cname: Modify Built-in Metric
- name: "/builtin-metrics/del"
cname: Delete Built-in Metric
- name: recording-rules
cname: Recording Rule Management
ops:
- name: "/recording-rules"
cname: View Recording Rules
- name: "/recording-rules/add"
cname: Add Recording Rule
- name: "/recording-rules/put"
cname: Modify Recording Rule
- name: "/recording-rules/del"
cname: Delete Recording Rule
- name: log
cname: Log Analysis
ops:
- name: "/log/explorer"
cname: View Logs
- name: "/log/index-patterns"
cname: View Index Patterns
- name: alert
cname: Alert Rules
ops:
- name: "/alert-rules"
cname: View Alert Rules
- name: "/alert-rules/add"
cname: Add Alert Rule
- name: "/alert-rules/put"
cname: Modify Alert Rule
- name: "/alert-rules/del"
cname: Delete Alert Rule
- name: alert-mutes
cname: Alert Silence Management
ops:
- name: "/alert-mutes"
cname: View Alert Silences
- name: "/alert-mutes/add"
cname: Add Alert Silence
- name: "/alert-mutes/put"
cname: Modify Alert Silence
- name: "/alert-mutes/del"
cname: Delete Alert Silence
- name: alert-subscribes
cname: Alert Subscription Management
ops:
- name: "/alert-subscribes"
cname: View Alert Subscriptions
- name: "/alert-subscribes/add"
cname: Add Alert Subscription
- name: "/alert-subscribes/put"
cname: Modify Alert Subscription
- name: "/alert-subscribes/del"
cname: Delete Alert Subscription
- name: alert-events
cname: Alert Event Management
ops:
- name: "/alert-cur-events"
cname: View Current Alerts
- name: "/alert-cur-events/del"
cname: Delete Current Alert
- name: "/alert-his-events"
cname: View Historical Alerts
- name: notification
cname: Alert Notification
ops:
- name: "/help/notification-settings"
cname: View Notification Settings
- name: "/help/notification-tpls"
cname: View Notification Templates
- name: job
cname: Task Management
ops:
- name: "/job-tpls"
cname: View Task Templates
- name: "/job-tpls/add"
cname: Add Task Template
- name: "/job-tpls/put"
cname: Modify Task Template
- name: "/job-tpls/del"
cname: Delete Task Template
- name: "/job-tasks"
cname: View Task Instances
- name: "/job-tasks/add"
cname: Add Task Instance
- name: "/job-tasks/put"
cname: Modify Task Instance
- name: targets
- name: Infrastructure
cname: Infrastructure
ops:
- name: "/targets"
cname: View Objects
- name: "/targets/add"
cname: Add Object
- name: "/targets/put"
cname: Modify Object
- name: "/targets/del"
cname: Delete Object
- name: "/targets/bind"
cname: Bind Object
- name: /targets
cname: Host - View
- name: /targets/put
cname: Host - Modify
- name: /targets/del
cname: Host - Delete
- name: /targets/bind
cname: Host - Bind Uncategorized
- name: user
cname: User Management
- name: Explorer
cname: Explorer
ops:
- name: "/users"
cname: View User List
- name: "/user-groups"
cname: View User Groups
- name: "/user-groups/add"
cname: Add User Group
- name: "/user-groups/put"
cname: Modify User Group
- name: "/user-groups/del"
cname: Delete User Group
- name: /metric/explorer
cname: Metrics Explorer
- name: /object/explorer
cname: Quick View
- name: /metrics-built-in
cname: Built-in Metric - View
- name: /builtin-metrics/add
cname: Built-in Metric - Add
- name: /builtin-metrics/put
cname: Built-in Metric - Modify
- name: /builtin-metrics/del
cname: Built-in Metric - Delete
- name: /recording-rules
cname: Recording Rule - View
- name: /recording-rules/add
cname: Recording Rule - Add
- name: /recording-rules/put
cname: Recording Rule - Modify
- name: /recording-rules/del
cname: Recording Rule - Delete
- name: /log/explorer
cname: Logs Explorer
- name: /log/index-patterns # 前端有个管理索引模式的页面,所以需要一个权限点来控制,后面应该改成侧拉板
cname: Index Pattern - View
- name: /log/index-patterns/add
cname: Index Pattern - Add
- name: /log/index-patterns/put
cname: Index Pattern - Modify
- name: /log/index-patterns/del
cname: Index Pattern - Delete
- name: /dashboards
cname: Dashboard - View
- name: /dashboards/add
cname: Dashboard - Add
- name: /dashboards/put
cname: Dashboard - Modify
- name: /dashboards/del
cname: Dashboard - Delete
- name: /public-dashboards
cname: Dashboard - View Public
- name: busi-groups
cname: Business Group Management
- name: alerting
cname: Alerting
ops:
- name: "/busi-groups"
cname: View Business Groups
- name: "/busi-groups/add"
cname: Add Business Group
- name: "/busi-groups/put"
cname: Modify Business Group
- name: "/busi-groups/del"
cname: Delete Business Group
- name: /alert-rules
cname: Alerting Rule - View
- name: /alert-rules/add
cname: Alerting Rule - Add
- name: /alert-rules/put
cname: Alerting Rule - Modify
- name: /alert-rules/del
cname: Alerting Rule - Delete
- name: /alert-mutes
cname: Mutting Rule - View
- name: /alert-mutes/add
cname: Mutting Rule - Add
- name: /alert-mutes/put
cname: Mutting Rule - Modify
- name: /alert-mutes/del
cname: Mutting Rule - Delete
- name: /alert-subscribes
cname: Subscribing Rule - View
- name: /alert-subscribes/add
cname: Subscribing Rule - Add
- name: /alert-subscribes/put
cname: Subscribing Rule - Modify
- name: /alert-subscribes/del
cname: Subscribing Rule - Delete
- name: /job-tpls
cname: Self-healing-Script - View
- name: /job-tpls/add
cname: Self-healing-Script - Add
- name: /job-tpls/put
cname: Self-healing-Script - Modify
- name: /job-tpls/del
cname: Self-healing-Script - Delete
- name: /job-tasks
cname: Self-healing-Job - View
- name: /job-tasks/add
cname: Self-healing-Job - Add
- name: /job-tasks/put
cname: Self-healing-Job - Modify
- name: /alert-cur-events
cname: Active Event - View
- name: /alert-cur-events/del
cname: Active Event - Delete
- name: /alert-his-events
cname: Historical Event - View
- name: permissions
cname: Permission Management
- name: Notification
cname: Notification
ops:
- name: "/permissions"
cname: View Permission Settings
- name: contacts
cname: User Contact Management
ops:
- name: "/contacts"
cname: User Contact Management
- name: /notification-rules
cname: Notification Rule - View
- name: /notification-rules/add
cname: Notification Rule - Add
- name: /notification-rules/put
cname: Notification Rule - Modify
- name: /notification-rules/del
cname: Notification Rule - Delete
- name: /notification-channels
cname: Media Type - View
- name: /notification-channels/add
cname: Media Type - Add
- name: /notification-channels/put
cname: Media Type - Modify
- name: /notification-channels/del
cname: Media Type - Delete
- name: /notification-templates
cname: Message Template - View
- name: /notification-templates/add
cname: Message Template - Add
- name: /notification-templates/put
cname: Message Template - Modify
- name: /notification-templates/del
cname: Message Template - Delete
- name: /event-pipelines
cname: Event Pipeline - View
- name: /event-pipelines/add
cname: Event Pipeline - Add
- name: /event-pipelines/put
cname: Event Pipeline - Modify
- name: /event-pipelines/del
cname: Event Pipeline - Delete
- name: /help/notification-settings # 用于控制老版本的通知设置菜单是否展示
cname: Notification Settings - View
- name: /help/notification-tpls # 用于控制老版本的通知模板菜单是否展示
cname: Notification Templates - View
- name: built-in-components
cname: Template Center
- name: Integrations
cname: Integrations
ops:
- name: "/built-in-components"
cname: View Built-in Components
- name: "/built-in-components/add"
cname: Add Built-in Component
- name: "/built-in-components/put"
cname: Modify Built-in Component
- name: "/built-in-components/del"
cname: Delete Built-in Component
- name: /datasources # 用于控制能否看到数据源列表页面的菜单。只有 Admin 才能修改、删除数据源
cname: Data Source - View
- name: /components
cname: Component - View
- name: /components/add
cname: Component - Add
- name: /components/put
cname: Component - Modify
- name: /components/del
cname: Component - Delete
- name: /embedded-products
cname: Embedded Product - View
- name: /embedded-product/add
cname: Embedded Product - Add
- name: /embedded-product/put
cname: Embedded Product - Modify
- name: /embedded-product/delete
cname: Embedded Product - Delete
- name: datasource
cname: Data Source Management
- name: Organization
cname: Organization
ops:
- name: "/help/source"
cname: View Data Source Configuration
- name: /users
cname: User - View
- name: /users/add
cname: User - Add
- name: /users/put
cname: User - Modify
- name: /users/del
cname: User - Delete
- name: /user-groups
cname: Team - View
- name: /user-groups/add
cname: Team - Add
- name: /user-groups/put
cname: Team - Modify
- name: /user-groups/del
cname: Team - Delete
- name: /busi-groups
cname: Business Group - View
- name: /busi-groups/add
cname: Business Group - Add
- name: /busi-groups/put
cname: Business Group - Modify
- name: /busi-groups/del
cname: Business Group - Delete
- name: /roles
cname: Role - View
- name: /roles/add
cname: Role - Add
- name: /roles/put
cname: Role - Modify
- name: /roles/del
cname: Role - Delete
- name: system
cname: System Information
- name: System Settings
cname: System Settings
ops:
- name: "/help/variable-configs"
cname: View Variable Configuration
- name: "/help/version"
cname: View Version Information
- name: "/help/servers"
cname: View Server Information
- name: "/help/sso"
cname: View SSO Configuration
- name: "/site-settings"
- name: /system/site-settings # 仅用于控制能否展示菜单,只有 Admin 才能修改、删除
cname: View Site Settings
- name: /system/variable-settings
cname: View Variable Settings
- name: /system/sso-settings
cname: View SSO Settings
- name: /system/alerting-engines
cname: View Alerting Engines
- name: /system/version
cname: View Product Version
- name: message-templates
cname: Message Templates
ops:
- name: "/notification-templates"
cname: View Message Templates
- name: "/notification-templates/add"
cname: Add Message Templates
- name: "/notification-templates/put"
cname: Modify Message Templates
- name: "/notification-templates/del"
cname: Delete Message Templates
- name: notify-rules
cname: Notify Rules
ops:
- name: "/notification-rules"
cname: View Notify Rules
- name: "/notification-rules/add"
cname: Add Notify Rules
- name: "/notification-rules/put"
cname: Modify Notify Rules
- name: "/notification-rules/del"
cname: Delete Notify Rules
- name: notify-channels
cname: Notify Channels
ops:
- name: "/notification-channels"
cname: View Notify Channels
- name: "/notification-channels/add"
cname: Add Notify Channels
- name: "/notification-channels/put"
cname: Modify Notify Channels
- name: "/notification-channels/del"
cname: Delete Notify Channels
`
)

View File

@@ -25,4 +25,34 @@ var Plugins = []Plugin{
Type: "tdengine",
TypeName: "TDengine",
},
{
Id: 5,
Category: "logging",
Type: "ck",
TypeName: "ClickHouse",
},
{
Id: 6,
Category: "timeseries",
Type: "mysql",
TypeName: "MySQL",
},
{
Id: 7,
Category: "timeseries",
Type: "pgsql",
TypeName: "PostgreSQL",
},
{
Id: 8,
Category: "logging",
Type: "doris",
TypeName: "Doris",
},
{
Id: 9,
Category: "logging",
Type: "opensearch",
TypeName: "OpenSearch",
},
}

View File

@@ -13,7 +13,6 @@ import (
alertrt "github.com/ccfos/nightingale/v6/alert/router"
"github.com/ccfos/nightingale/v6/center/cconf"
"github.com/ccfos/nightingale/v6/center/cconf/rsa"
"github.com/ccfos/nightingale/v6/center/cstats"
"github.com/ccfos/nightingale/v6/center/integration"
"github.com/ccfos/nightingale/v6/center/metas"
centerrt "github.com/ccfos/nightingale/v6/center/router"
@@ -60,7 +59,6 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
}
i18nx.Init(configDir)
cstats.Init()
flashduty.Init(config.Center.FlashDuty)
db, err := storage.New(config.DB)
@@ -86,7 +84,7 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
}
metas := metas.New(redis)
idents := idents.New(ctx, redis)
idents := idents.New(ctx, redis, config.Pushgw)
syncStats := memsto.NewSyncStats()
alertStats := astats.NewSyncStats()
@@ -94,6 +92,9 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
if config.Center.MigrateBusiGroupLabel || models.CanMigrateBg(ctx) {
models.MigrateBg(ctx, config.Pushgw.BusiGroupLabelKey)
}
if models.CanMigrateEP(ctx) {
models.MigrateEP(ctx)
}
configCache := memsto.NewConfigCache(ctx, syncStats, config.HTTP.RSA.RSAPrivateKey, config.HTTP.RSA.RSAPassWord)
busiGroupCache := memsto.NewBusiGroupCache(ctx, syncStats)

View File

@@ -6,40 +6,49 @@ import (
"github.com/prometheus/client_golang/prometheus"
)
const Service = "n9e-center"
const (
namespace = "n9e"
subsystem = "center"
)
var (
labels = []string{"service", "code", "path", "method"}
uptime = prometheus.NewCounterVec(
uptime = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "uptime",
Help: "HTTP service uptime.",
}, []string{"service"},
)
RequestCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "http_request_count_total",
Help: "Total number of HTTP requests made.",
}, labels,
Namespace: namespace,
Subsystem: subsystem,
Name: "uptime",
Help: "HTTP service uptime.",
},
)
RequestDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Buckets: []float64{.01, .1, 1, 10},
Name: "http_request_duration_seconds",
Help: "HTTP request latencies in seconds.",
}, labels,
Namespace: namespace,
Subsystem: subsystem,
Buckets: prometheus.DefBuckets,
Name: "http_request_duration_seconds",
Help: "HTTP request latencies in seconds.",
}, []string{"code", "path", "method"},
)
RedisOperationLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "redis_operation_latency_seconds",
Help: "Histogram of latencies for Redis operations",
Buckets: []float64{.005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5},
},
[]string{"operation", "status"},
)
)
func Init() {
func init() {
// Register the summary and the histogram with Prometheus's default registry.
prometheus.MustRegister(
uptime,
RequestCounter,
RequestDuration,
RedisOperationLatency,
)
go recordUptime()
@@ -48,6 +57,6 @@ func Init() {
// recordUptime increases service uptime per second.
func recordUptime() {
for range time.Tick(time.Second) {
uptime.WithLabelValues(Service).Inc()
uptime.Inc()
}
}

View File

@@ -3,11 +3,15 @@ package integration
import (
"encoding/json"
"path"
"sort"
"strings"
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/pkg/errors"
"github.com/toolkits/pkg/container/set"
"github.com/toolkits/pkg/file"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/runner"
@@ -15,7 +19,18 @@ import (
const SYSTEM = "system"
var BuiltinPayloadInFile *BuiltinPayloadInFileType
type BuiltinPayloadInFileType struct {
Data map[uint64]map[string]map[string][]*models.BuiltinPayload // map[componet_id]map[type]map[cate][]*models.BuiltinPayload
IndexData map[int64]*models.BuiltinPayload // map[uuid]payload
BuiltinMetrics map[string]*models.BuiltinMetric
}
func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
BuiltinPayloadInFile = NewBuiltinPayloadInFileType()
err := models.InitBuiltinPayloads(ctx)
if err != nil {
logger.Warning("init old builtinPayloads fail ", err)
@@ -146,11 +161,10 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
}
newAlerts := []models.AlertRule{}
writeAlertFileFlag := false
for _, alert := range alerts {
if alert.UUID == 0 {
writeAlertFileFlag = true
alert.UUID = time.Now().UnixNano()
time.Sleep(time.Microsecond)
alert.UUID = time.Now().UnixMicro()
}
newAlerts = append(newAlerts, alert)
@@ -169,47 +183,13 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
Tags: alert.AppendTags,
Content: string(content),
UUID: alert.UUID,
ID: alert.UUID,
CreatedBy: SYSTEM,
UpdatedBy: SYSTEM,
}
BuiltinPayloadInFile.AddBuiltinPayload(&builtinAlert)
old, err := models.BuiltinPayloadGet(ctx, "uuid = ?", alert.UUID)
if err != nil {
logger.Warning("get builtin alert fail ", builtinAlert, err)
continue
}
if old == nil {
err := builtinAlert.Add(ctx, SYSTEM)
if err != nil {
logger.Warning("add builtin alert fail ", builtinAlert, err)
}
continue
}
if old.UpdatedBy == SYSTEM {
old.ComponentID = component.ID
old.Content = string(content)
old.Name = alert.Name
old.Tags = alert.AppendTags
err = models.DB(ctx).Model(old).Select("*").Updates(old).Error
if err != nil {
logger.Warningf("update builtin alert:%+v fail %v", builtinAlert, err)
}
}
}
if writeAlertFileFlag {
bs, err = json.MarshalIndent(newAlerts, "", " ")
if err != nil {
logger.Warning("marshal builtin alerts fail ", newAlerts, err)
continue
}
_, err = file.WriteBytes(fp, bs)
if err != nil {
logger.Warning("write builtin alerts file fail ", f, err)
}
}
}
}
@@ -261,32 +241,11 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
Tags: dashboard.Tags,
Content: string(content),
UUID: dashboard.UUID,
ID: dashboard.UUID,
CreatedBy: SYSTEM,
UpdatedBy: SYSTEM,
}
old, err := models.BuiltinPayloadGet(ctx, "uuid = ?", dashboard.UUID)
if err != nil {
logger.Warning("get builtin alert fail ", builtinDashboard, err)
continue
}
if old == nil {
err := builtinDashboard.Add(ctx, SYSTEM)
if err != nil {
logger.Warning("add builtin alert fail ", builtinDashboard, err)
}
continue
}
if old.UpdatedBy == SYSTEM {
old.ComponentID = component.ID
old.Content = string(content)
old.Name = dashboard.Name
old.Tags = dashboard.Tags
err = models.DB(ctx).Model(old).Select("*").Updates(old).Error
if err != nil {
logger.Warningf("update builtin alert:%+v fail %v", builtinDashboard, err)
}
}
BuiltinPayloadInFile.AddBuiltinPayload(&builtinDashboard)
}
} else if err != nil {
logger.Warningf("read builtin component dash dir fail %s %v", component.Ident, err)
@@ -304,64 +263,23 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
}
metrics := []models.BuiltinMetric{}
newMetrics := []models.BuiltinMetric{}
err = json.Unmarshal(bs, &metrics)
if err != nil {
logger.Warning("parse builtin component metrics file fail", f, err)
continue
}
writeMetricFileFlag := false
for _, metric := range metrics {
if metric.UUID == 0 {
writeMetricFileFlag = true
metric.UUID = time.Now().UnixNano()
time.Sleep(time.Microsecond)
metric.UUID = time.Now().UnixMicro()
}
newMetrics = append(newMetrics, metric)
metric.ID = metric.UUID
metric.CreatedBy = SYSTEM
metric.UpdatedBy = SYSTEM
old, err := models.BuiltinMetricGet(ctx, "uuid = ?", metric.UUID)
if err != nil {
logger.Warning("get builtin metrics fail ", metric, err)
continue
}
if old == nil {
err := metric.Add(ctx, SYSTEM)
if err != nil {
logger.Warning("add builtin metrics fail ", metric, err)
}
continue
}
if old.UpdatedBy == SYSTEM {
old.Collector = metric.Collector
old.Typ = metric.Typ
old.Name = metric.Name
old.Unit = metric.Unit
old.Note = metric.Note
old.Lang = metric.Lang
old.Expression = metric.Expression
err = models.DB(ctx).Model(old).Select("*").Updates(old).Error
if err != nil {
logger.Warningf("update builtin metric:%+v fail %v", metric, err)
}
}
BuiltinPayloadInFile.BuiltinMetrics[metric.Expression] = &metric
}
if writeMetricFileFlag {
bs, err = json.MarshalIndent(newMetrics, "", " ")
if err != nil {
logger.Warning("marshal builtin metrics fail ", newMetrics, err)
continue
}
_, err = file.WriteBytes(fp, bs)
if err != nil {
logger.Warning("write builtin metrics file fail ", f, err)
}
}
}
} else if err != nil {
logger.Warningf("read builtin component metrics dir fail %s %v", component.Ident, err)
@@ -387,3 +305,322 @@ type BuiltinBoard struct {
Hide int `json:"hide"` // 0: false, 1: true
UUID int64 `json:"uuid"`
}
func NewBuiltinPayloadInFileType() *BuiltinPayloadInFileType {
return &BuiltinPayloadInFileType{
Data: make(map[uint64]map[string]map[string][]*models.BuiltinPayload),
IndexData: make(map[int64]*models.BuiltinPayload),
BuiltinMetrics: make(map[string]*models.BuiltinMetric),
}
}
func (b *BuiltinPayloadInFileType) AddBuiltinPayload(bp *models.BuiltinPayload) {
if _, exists := b.Data[bp.ComponentID]; !exists {
b.Data[bp.ComponentID] = make(map[string]map[string][]*models.BuiltinPayload)
}
bpInType := b.Data[bp.ComponentID]
if _, exists := bpInType[bp.Type]; !exists {
bpInType[bp.Type] = make(map[string][]*models.BuiltinPayload)
}
bpInCate := bpInType[bp.Type]
if _, exists := bpInCate[bp.Cate]; !exists {
bpInCate[bp.Cate] = make([]*models.BuiltinPayload, 0)
}
bpInCate[bp.Cate] = append(bpInCate[bp.Cate], bp)
b.IndexData[bp.UUID] = bp
}
func (b *BuiltinPayloadInFileType) GetBuiltinPayload(typ, cate, query string, componentId uint64) ([]*models.BuiltinPayload, error) {
var result []*models.BuiltinPayload
source := b.Data[componentId]
if source == nil {
return nil, nil
}
typeMap, exists := source[typ]
if !exists {
return nil, nil
}
if cate != "" {
payloads, exists := typeMap[cate]
if !exists {
return nil, nil
}
result = append(result, filterByQuery(payloads, query)...)
} else {
for _, payloads := range typeMap {
result = append(result, filterByQuery(payloads, query)...)
}
}
if len(result) > 0 {
sort.Slice(result, func(i, j int) bool {
return result[i].Name < result[j].Name
})
}
return result, nil
}
func (b *BuiltinPayloadInFileType) GetBuiltinPayloadCates(typ string, componentId uint64) ([]string, error) {
var result []string
source := b.Data[componentId]
if source == nil {
return result, nil
}
typeData := source[typ]
if typeData == nil {
return result, nil
}
for cate := range typeData {
result = append(result, cate)
}
sort.Strings(result)
return result, nil
}
func filterByQuery(payloads []*models.BuiltinPayload, query string) []*models.BuiltinPayload {
if query == "" {
return payloads
}
queryLower := strings.ToLower(query)
var filtered []*models.BuiltinPayload
for _, p := range payloads {
if strings.Contains(strings.ToLower(p.Name), queryLower) || strings.Contains(strings.ToLower(p.Tags), queryLower) {
filtered = append(filtered, p)
}
}
return filtered
}
func (b *BuiltinPayloadInFileType) BuiltinMetricGets(metricsInDB []*models.BuiltinMetric, lang, collector, typ, query, unit string, limit, offset int) ([]*models.BuiltinMetric, int, error) {
var filteredMetrics []*models.BuiltinMetric
expressionSet := set.NewStringSet()
builtinMetricsByDB := convertBuiltinMetricByDB(metricsInDB)
builtinMetricsMap := make(map[string]*models.BuiltinMetric)
for expression, metric := range builtinMetricsByDB {
builtinMetricsMap[expression] = metric
}
for expression, metric := range b.BuiltinMetrics {
builtinMetricsMap[expression] = metric
}
for _, metric := range builtinMetricsMap {
if !applyFilter(metric, collector, typ, query, unit) {
continue
}
// Skip if expression is already in db cache
// NOTE: 忽略重复的expression特别的在旧版本中用户可能已经创建了重复的metrics需要覆盖掉ByFile中相同的Metrics
// NOTE: Ignore duplicate expressions, especially in the old version, users may have created duplicate metrics,
if expressionSet.Exists(metric.Expression) {
continue
}
// Add db expression in set.
expressionSet.Add(metric.Expression)
// Apply language
trans, err := getTranslationWithLanguage(metric, lang)
if err != nil {
logger.Errorf("Error getting translation for metric %s: %v", metric.Name, err)
continue // Skip if translation not found
}
metric.Name = trans.Name
metric.Note = trans.Note
filteredMetrics = append(filteredMetrics, metric)
}
// Sort metrics
sort.Slice(filteredMetrics, func(i, j int) bool {
if filteredMetrics[i].Collector != filteredMetrics[j].Collector {
return filteredMetrics[i].Collector < filteredMetrics[j].Collector
}
if filteredMetrics[i].Typ != filteredMetrics[j].Typ {
return filteredMetrics[i].Typ < filteredMetrics[j].Typ
}
return filteredMetrics[i].Expression < filteredMetrics[j].Expression
})
totalCount := len(filteredMetrics)
// Validate parameters
if offset < 0 {
offset = 0
}
if limit < 0 {
limit = 0
}
// Handle edge cases
if offset >= totalCount || limit == 0 {
return []*models.BuiltinMetric{}, totalCount, nil
}
// Apply pagination
end := offset + limit
if end > totalCount {
end = totalCount
}
return filteredMetrics[offset:end], totalCount, nil
}
func (b *BuiltinPayloadInFileType) BuiltinMetricTypes(lang, collector, query string) []string {
typeSet := set.NewStringSet()
for _, metric := range b.BuiltinMetrics {
if !applyFilter(metric, collector, "", query, "") {
continue
}
typeSet.Add(metric.Typ)
}
return typeSet.ToSlice()
}
func (b *BuiltinPayloadInFileType) BuiltinMetricCollectors(lang, typ, query string) []string {
collectorSet := set.NewStringSet()
for _, metric := range b.BuiltinMetrics {
if !applyFilter(metric, "", typ, query, "") {
continue
}
collectorSet.Add(metric.Collector)
}
return collectorSet.ToSlice()
}
func applyFilter(metric *models.BuiltinMetric, collector, typ, query, unit string) bool {
if collector != "" && collector != metric.Collector {
return false
}
if typ != "" && typ != metric.Typ {
return false
}
if unit != "" && !containsUnit(unit, metric.Unit) {
return false
}
if query != "" && !applyQueryFilter(metric, query) {
return false
}
return true
}
func containsUnit(unit, metricUnit string) bool {
us := strings.Split(unit, ",")
for _, u := range us {
if u == metricUnit {
return true
}
}
return false
}
func applyQueryFilter(metric *models.BuiltinMetric, query string) bool {
qs := strings.Split(query, " ")
for _, q := range qs {
if strings.HasPrefix(q, "-") {
q = strings.TrimPrefix(q, "-")
if strings.Contains(metric.Name, q) || strings.Contains(metric.Note, q) || strings.Contains(metric.Expression, q) {
return false
}
} else {
if !strings.Contains(metric.Name, q) && !strings.Contains(metric.Note, q) && !strings.Contains(metric.Expression, q) {
return false
}
}
}
return true
}
func getTranslationWithLanguage(bm *models.BuiltinMetric, lang string) (*models.Translation, error) {
var defaultTranslation *models.Translation
for _, t := range bm.Translation {
if t.Lang == lang {
return &t, nil
}
if t.Lang == "en_US" {
defaultTranslation = &t
}
}
if defaultTranslation != nil {
return defaultTranslation, nil
}
return nil, errors.Errorf("translation not found for metric %s", bm.Name)
}
func convertBuiltinMetricByDB(metricsInDB []*models.BuiltinMetric) map[string]*models.BuiltinMetric {
builtinMetricsByDB := make(map[string]*models.BuiltinMetric)
builtinMetricsByDBList := make(map[string][]*models.BuiltinMetric)
for _, metric := range metricsInDB {
builtinMetrics, ok := builtinMetricsByDBList[metric.Expression]
if !ok {
builtinMetrics = []*models.BuiltinMetric{}
}
builtinMetrics = append(builtinMetrics, metric)
builtinMetricsByDBList[metric.Expression] = builtinMetrics
}
for expression, builtinMetrics := range builtinMetricsByDBList {
if len(builtinMetrics) == 0 {
continue
}
// NOTE: 为兼容旧版本用户已经创建的 metrics同时将修改 metrics 收敛到同一个记录上,
// 我们选择使用 expression 相同但是 id 最小的 metric 记录作为主要的 Metric。
sort.Slice(builtinMetrics, func(i, j int) bool {
return builtinMetrics[i].ID < builtinMetrics[j].ID
})
currentBuiltinMetric := builtinMetrics[0]
// User have no customed translation, so we can merge it
if len(currentBuiltinMetric.Translation) == 0 {
translationMap := make(map[string]models.Translation)
for _, bm := range builtinMetrics {
for _, t := range getDefaultTranslation(bm) {
translationMap[t.Lang] = t
}
}
currentBuiltinMetric.Translation = make([]models.Translation, 0, len(translationMap))
for _, t := range translationMap {
currentBuiltinMetric.Translation = append(currentBuiltinMetric.Translation, t)
}
}
builtinMetricsByDB[expression] = currentBuiltinMetric
}
return builtinMetricsByDB
}
func getDefaultTranslation(bm *models.BuiltinMetric) []models.Translation {
if len(bm.Translation) != 0 {
return bm.Translation
}
return []models.Translation{{
Lang: bm.Lang,
Name: bm.Name,
Note: bm.Note,
}}
}

View File

@@ -6,6 +6,7 @@ import (
"sync"
"time"
"github.com/ccfos/nightingale/v6/center/cstats"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/storage"
@@ -115,15 +116,23 @@ func (s *Set) updateTargets(m map[string]models.HostMeta) error {
}
newMap[models.WrapIdent(ident)] = meta
}
start := time.Now()
err := storage.MSet(context.Background(), s.redis, newMap)
if err != nil {
cstats.RedisOperationLatency.WithLabelValues("mset_target_meta", "fail").Observe(time.Since(start).Seconds())
return err
} else {
cstats.RedisOperationLatency.WithLabelValues("mset_target_meta", "success").Observe(time.Since(start).Seconds())
}
if len(extendMap) > 0 {
err = storage.MSet(context.Background(), s.redis, extendMap)
if err != nil {
cstats.RedisOperationLatency.WithLabelValues("mset_target_extend", "fail").Observe(time.Since(start).Seconds())
return err
} else {
cstats.RedisOperationLatency.WithLabelValues("mset_target_extend", "success").Observe(time.Since(start).Seconds())
}
}

View File

@@ -93,10 +93,9 @@ func stat() gin.HandlerFunc {
code := fmt.Sprintf("%d", c.Writer.Status())
method := c.Request.Method
labels := []string{cstats.Service, code, c.FullPath(), method}
labels := []string{code, c.FullPath(), method}
cstats.RequestCounter.WithLabelValues(labels...).Inc()
cstats.RequestDuration.WithLabelValues(labels...).Observe(float64(time.Since(start).Seconds()))
cstats.RequestDuration.WithLabelValues(labels...).Observe(time.Since(start).Seconds())
}
}
@@ -178,6 +177,7 @@ func (rt *Router) Config(r *gin.Engine) {
pages := r.Group(pagesPrefix)
{
pages.DELETE("/datasource/series", rt.auth(), rt.admin(), rt.deleteDatasourceSeries)
if rt.Center.AnonymousAccess.PromQuerier {
pages.Any("/proxy/:id/*url", rt.dsProxy)
pages.POST("/query-range-batch", rt.promBatchQueryRange)
@@ -232,6 +232,11 @@ func (rt *Router) Config(r *gin.Engine) {
pages.POST("/log-query", rt.QueryLog)
}
// OpenSearch 专用接口
pages.POST("/os-indices", rt.QueryOSIndices)
pages.POST("/os-variable", rt.QueryOSVariable)
pages.POST("/os-fields", rt.QueryOSFields)
pages.GET("/sql-template", rt.QuerySqlTemplate)
pages.POST("/auth/login", rt.jwtMock(), rt.loginPost)
pages.POST("/auth/logout", rt.jwtMock(), rt.auth(), rt.user(), rt.logoutPost)
@@ -255,6 +260,7 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/notify-channels", rt.notifyChannelsGets)
pages.GET("/contact-keys", rt.contactKeysGets)
pages.GET("/install-date", rt.installDateGet)
pages.GET("/self/perms", rt.auth(), rt.user(), rt.permsGets)
pages.GET("/self/profile", rt.auth(), rt.user(), rt.selfProfileGet)
@@ -265,11 +271,11 @@ func (rt *Router) Config(r *gin.Engine) {
pages.DELETE("/self/token/:id", rt.auth(), rt.user(), rt.deleteToken)
pages.GET("/users", rt.auth(), rt.user(), rt.perm("/users"), rt.userGets)
pages.POST("/users", rt.auth(), rt.admin(), rt.userAddPost)
pages.POST("/users", rt.auth(), rt.user(), rt.perm("/users/add"), rt.userAddPost)
pages.GET("/user/:id/profile", rt.auth(), rt.userProfileGet)
pages.PUT("/user/:id/profile", rt.auth(), rt.admin(), rt.userProfilePut)
pages.PUT("/user/:id/password", rt.auth(), rt.admin(), rt.userPasswordPut)
pages.DELETE("/user/:id", rt.auth(), rt.admin(), rt.userDel)
pages.PUT("/user/:id/profile", rt.auth(), rt.user(), rt.perm("/users/put"), rt.userProfilePut)
pages.PUT("/user/:id/password", rt.auth(), rt.user(), rt.perm("/users/put"), rt.userPasswordPut)
pages.DELETE("/user/:id", rt.auth(), rt.user(), rt.perm("/users/del"), rt.userDel)
pages.GET("/metric-views", rt.auth(), rt.metricViewGets)
pages.DELETE("/metric-views", rt.auth(), rt.user(), rt.metricViewDel)
@@ -310,6 +316,7 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/busi-groups/tags", rt.auth(), rt.user(), rt.busiGroupsGetTags)
pages.GET("/targets", rt.auth(), rt.user(), rt.targetGets)
pages.POST("/target-update", rt.auth(), rt.targetUpdate)
pages.GET("/target/extra-meta", rt.auth(), rt.user(), rt.targetExtendInfoByIdent)
pages.POST("/target/list", rt.auth(), rt.user(), rt.targetGetsByHostFilter)
pages.DELETE("/targets", rt.auth(), rt.user(), rt.perm("/targets/del"), rt.targetDel)
@@ -373,6 +380,8 @@ func (rt *Router) Config(r *gin.Engine) {
pages.POST("/relabel-test", rt.auth(), rt.user(), rt.relabelTest)
pages.POST("/busi-group/:id/alert-rules/clone", rt.auth(), rt.user(), rt.perm("/alert-rules/add"), rt.bgrw(), rt.cloneToMachine)
pages.POST("/busi-groups/alert-rules/clones", rt.auth(), rt.user(), rt.perm("/alert-rules/add"), rt.batchAlertRuleClone)
pages.POST("/busi-group/alert-rules/notify-tryrun", rt.auth(), rt.user(), rt.perm("/alert-rules/add"), rt.alertRuleNotifyTryRun)
pages.POST("/busi-group/alert-rules/enable-tryrun", rt.auth(), rt.user(), rt.perm("/alert-rules/add"), rt.alertRuleEnableTryRun)
pages.GET("/busi-groups/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules"), rt.recordingRuleGetsByGids)
pages.GET("/busi-group/:id/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules"), rt.recordingRuleGets)
@@ -390,6 +399,7 @@ func (rt *Router) Config(r *gin.Engine) {
pages.PUT("/busi-group/:id/alert-mute/:amid", rt.auth(), rt.user(), rt.perm("/alert-mutes/put"), rt.alertMutePutByFE)
pages.GET("/busi-group/:id/alert-mute/:amid", rt.auth(), rt.user(), rt.perm("/alert-mutes"), rt.alertMuteGet)
pages.PUT("/busi-group/:id/alert-mutes/fields", rt.auth(), rt.user(), rt.perm("/alert-mutes/put"), rt.bgrw(), rt.alertMutePutFields)
pages.POST("/alert-mute-tryrun", rt.auth(), rt.user(), rt.perm("/alert-mutes/add"), rt.alertMuteTryRun)
pages.GET("/busi-groups/alert-subscribes", rt.auth(), rt.user(), rt.perm("/alert-subscribes"), rt.alertSubscribeGetsByGids)
pages.GET("/busi-group/:id/alert-subscribes", rt.auth(), rt.user(), rt.perm("/alert-subscribes"), rt.bgro(), rt.alertSubscribeGets)
@@ -397,22 +407,18 @@ func (rt *Router) Config(r *gin.Engine) {
pages.POST("/busi-group/:id/alert-subscribes", rt.auth(), rt.user(), rt.perm("/alert-subscribes/add"), rt.bgrw(), rt.alertSubscribeAdd)
pages.PUT("/busi-group/:id/alert-subscribes", rt.auth(), rt.user(), rt.perm("/alert-subscribes/put"), rt.bgrw(), rt.alertSubscribePut)
pages.DELETE("/busi-group/:id/alert-subscribes", rt.auth(), rt.user(), rt.perm("/alert-subscribes/del"), rt.bgrw(), rt.alertSubscribeDel)
pages.POST("/alert-subscribe/alert-subscribes-tryrun", rt.auth(), rt.user(), rt.perm("/alert-subscribes/add"), rt.alertSubscribeTryRun)
if rt.Center.AnonymousAccess.AlertDetail {
pages.GET("/alert-cur-event/:eid", rt.alertCurEventGet)
pages.GET("/alert-his-event/:eid", rt.alertHisEventGet)
pages.GET("/event-notify-records/:eid", rt.notificationRecordList)
} else {
pages.GET("/alert-cur-event/:eid", rt.auth(), rt.user(), rt.alertCurEventGet)
pages.GET("/alert-his-event/:eid", rt.auth(), rt.user(), rt.alertHisEventGet)
pages.GET("/event-notify-records/:eid", rt.auth(), rt.user(), rt.notificationRecordList)
}
pages.GET("/alert-cur-event/:eid", rt.alertCurEventGet)
pages.GET("/alert-his-event/:eid", rt.alertHisEventGet)
pages.GET("/event-notify-records/:eid", rt.notificationRecordList)
// card logic
pages.GET("/alert-cur-events/list", rt.auth(), rt.user(), rt.alertCurEventsList)
pages.GET("/alert-cur-events/card", rt.auth(), rt.user(), rt.alertCurEventsCard)
pages.POST("/alert-cur-events/card/details", rt.auth(), rt.alertCurEventsCardDetails)
pages.GET("/alert-his-events/list", rt.auth(), rt.user(), rt.alertHisEventsList)
pages.DELETE("/alert-his-events", rt.auth(), rt.admin(), rt.alertHisEventsDelete)
pages.DELETE("/alert-cur-events", rt.auth(), rt.user(), rt.perm("/alert-cur-events/del"), rt.alertCurEventDel)
pages.GET("/alert-cur-events/stats", rt.auth(), rt.alertCurEventsStatistics)
@@ -444,13 +450,13 @@ func (rt *Router) Config(r *gin.Engine) {
pages.POST("/datasource/status/update", rt.auth(), rt.admin(), rt.datasourceUpdataStatus)
pages.DELETE("/datasource/", rt.auth(), rt.admin(), rt.datasourceDel)
pages.GET("/roles", rt.auth(), rt.admin(), rt.roleGets)
pages.POST("/roles", rt.auth(), rt.admin(), rt.roleAdd)
pages.PUT("/roles", rt.auth(), rt.admin(), rt.rolePut)
pages.DELETE("/role/:id", rt.auth(), rt.admin(), rt.roleDel)
pages.GET("/roles", rt.auth(), rt.user(), rt.roleGets)
pages.POST("/roles", rt.auth(), rt.user(), rt.perm("/roles/add"), rt.roleAdd)
pages.PUT("/roles", rt.auth(), rt.user(), rt.perm("/roles/put"), rt.rolePut)
pages.DELETE("/role/:id", rt.auth(), rt.user(), rt.perm("/roles/del"), rt.roleDel)
pages.GET("/role/:id/ops", rt.auth(), rt.admin(), rt.operationOfRole)
pages.PUT("/role/:id/ops", rt.auth(), rt.admin(), rt.roleBindOperation)
pages.GET("/role/:id/ops", rt.auth(), rt.user(), rt.perm("/roles"), rt.operationOfRole)
pages.PUT("/role/:id/ops", rt.auth(), rt.user(), rt.perm("/roles/put"), rt.roleBindOperation)
pages.GET("/operation", rt.operations)
pages.GET("/notify-tpls", rt.auth(), rt.user(), rt.notifyTplGets)
@@ -472,7 +478,7 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/notify-channel", rt.auth(), rt.user(), rt.perm("/help/notification-settings"), rt.notifyChannelGets)
pages.PUT("/notify-channel", rt.auth(), rt.admin(), rt.notifyChannelPuts)
pages.GET("/notify-contact", rt.auth(), rt.user(), rt.perm("/help/notification-settings"), rt.notifyContactGets)
pages.GET("/notify-contact", rt.auth(), rt.user(), rt.notifyContactGets)
pages.PUT("/notify-contact", rt.auth(), rt.admin(), rt.notifyContactPuts)
pages.GET("/notify-config", rt.auth(), rt.user(), rt.perm("/help/notification-settings"), rt.notifyConfigGet)
@@ -481,13 +487,20 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/es-index-pattern", rt.auth(), rt.esIndexPatternGet)
pages.GET("/es-index-pattern-list", rt.auth(), rt.esIndexPatternGetList)
pages.POST("/es-index-pattern", rt.auth(), rt.admin(), rt.esIndexPatternAdd)
pages.PUT("/es-index-pattern", rt.auth(), rt.admin(), rt.esIndexPatternPut)
pages.DELETE("/es-index-pattern", rt.auth(), rt.admin(), rt.esIndexPatternDel)
pages.POST("/es-index-pattern", rt.auth(), rt.user(), rt.perm("/log/index-patterns/add"), rt.esIndexPatternAdd)
pages.PUT("/es-index-pattern", rt.auth(), rt.user(), rt.perm("/log/index-patterns/put"), rt.esIndexPatternPut)
pages.DELETE("/es-index-pattern", rt.auth(), rt.user(), rt.perm("/log/index-patterns/del"), rt.esIndexPatternDel)
pages.GET("/embedded-dashboards", rt.auth(), rt.user(), rt.perm("/embedded-dashboards"), rt.embeddedDashboardsGet)
pages.PUT("/embedded-dashboards", rt.auth(), rt.user(), rt.perm("/embedded-dashboards/put"), rt.embeddedDashboardsPut)
// 获取 embedded-product 列表
pages.GET("/embedded-product", rt.auth(), rt.user(), rt.embeddedProductGets)
pages.GET("/embedded-product/:id", rt.auth(), rt.user(), rt.embeddedProductGet)
pages.POST("/embedded-product", rt.auth(), rt.user(), rt.perm("/embedded-product/add"), rt.embeddedProductAdd)
pages.PUT("/embedded-product/:id", rt.auth(), rt.user(), rt.perm("/embedded-product/put"), rt.embeddedProductPut)
pages.DELETE("/embedded-product/:id", rt.auth(), rt.user(), rt.perm("/embedded-product/delete"), rt.embeddedProductDelete)
pages.GET("/user-variable-configs", rt.auth(), rt.user(), rt.perm("/help/variable-configs"), rt.userVariableConfigGets)
pages.POST("/user-variable-config", rt.auth(), rt.user(), rt.perm("/help/variable-configs"), rt.userVariableConfigAdd)
pages.PUT("/user-variable-config/:id", rt.auth(), rt.user(), rt.perm("/help/variable-configs"), rt.userVariableConfigPut)
@@ -497,21 +510,23 @@ func (rt *Router) Config(r *gin.Engine) {
pages.PUT("/config", rt.auth(), rt.admin(), rt.configPutByKey)
pages.GET("/site-info", rt.siteInfo)
// source token 相关路由
pages.POST("/source-token", rt.auth(), rt.user(), rt.sourceTokenAdd)
// for admin api
pages.GET("/user/busi-groups", rt.auth(), rt.admin(), rt.userBusiGroupsGets)
pages.GET("/builtin-components", rt.auth(), rt.user(), rt.builtinComponentsGets)
pages.POST("/builtin-components", rt.auth(), rt.user(), rt.perm("/built-in-components/add"), rt.builtinComponentsAdd)
pages.PUT("/builtin-components", rt.auth(), rt.user(), rt.perm("/built-in-components/put"), rt.builtinComponentsPut)
pages.DELETE("/builtin-components", rt.auth(), rt.user(), rt.perm("/built-in-components/del"), rt.builtinComponentsDel)
pages.POST("/builtin-components", rt.auth(), rt.user(), rt.perm("/components/add"), rt.builtinComponentsAdd)
pages.PUT("/builtin-components", rt.auth(), rt.user(), rt.perm("/components/put"), rt.builtinComponentsPut)
pages.DELETE("/builtin-components", rt.auth(), rt.user(), rt.perm("/components/del"), rt.builtinComponentsDel)
pages.GET("/builtin-payloads", rt.auth(), rt.user(), rt.builtinPayloadsGets)
pages.GET("/builtin-payloads/cates", rt.auth(), rt.user(), rt.builtinPayloadcatesGet)
pages.POST("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/built-in-components/add"), rt.builtinPayloadsAdd)
pages.GET("/builtin-payload/:id", rt.auth(), rt.user(), rt.perm("/built-in-components"), rt.builtinPayloadGet)
pages.PUT("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/built-in-components/put"), rt.builtinPayloadsPut)
pages.DELETE("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/built-in-components/del"), rt.builtinPayloadsDel)
pages.GET("/builtin-payload", rt.auth(), rt.user(), rt.builtinPayloadsGetByUUIDOrID)
pages.POST("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/components/add"), rt.builtinPayloadsAdd)
pages.PUT("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/components/put"), rt.builtinPayloadsPut)
pages.DELETE("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/components/del"), rt.builtinPayloadsDel)
pages.GET("/builtin-payload", rt.auth(), rt.user(), rt.builtinPayloadsGetByUUID)
pages.POST("/message-templates", rt.auth(), rt.user(), rt.perm("/notification-templates/add"), rt.messageTemplatesAdd)
pages.DELETE("/message-templates", rt.auth(), rt.user(), rt.perm("/notification-templates/del"), rt.messageTemplatesDel)
@@ -527,6 +542,19 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/notify-rules", rt.auth(), rt.user(), rt.perm("/notification-rules"), rt.notifyRulesGet)
pages.POST("/notify-rule/test", rt.auth(), rt.user(), rt.perm("/notification-rules"), rt.notifyTest)
pages.GET("/notify-rule/custom-params", rt.auth(), rt.user(), rt.perm("/notification-rules"), rt.notifyRuleCustomParamsGet)
pages.POST("/notify-rule/event-pipelines-tryrun", rt.auth(), rt.user(), rt.perm("/notification-rules/add"), rt.tryRunEventProcessorByNotifyRule)
pages.GET("/event-tagkeys", rt.auth(), rt.user(), rt.eventTagKeys)
pages.GET("/event-tagvalues", rt.auth(), rt.user(), rt.eventTagValues)
// 事件Pipeline相关路由
pages.GET("/event-pipelines", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.eventPipelinesList)
pages.POST("/event-pipeline", rt.auth(), rt.user(), rt.perm("/event-pipelines/add"), rt.addEventPipeline)
pages.PUT("/event-pipeline", rt.auth(), rt.user(), rt.perm("/event-pipelines/put"), rt.updateEventPipeline)
pages.GET("/event-pipeline/:id", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.getEventPipeline)
pages.DELETE("/event-pipelines", rt.auth(), rt.user(), rt.perm("/event-pipelines/del"), rt.deleteEventPipelines)
pages.POST("/event-pipeline-tryrun", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.tryRunEventPipeline)
pages.POST("/event-processor-tryrun", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.tryRunEventProcessor)
pages.POST("/notify-channel-configs", rt.auth(), rt.user(), rt.perm("/notification-channels/add"), rt.notifyChannelsAdd)
pages.DELETE("/notify-channel-configs", rt.auth(), rt.user(), rt.perm("/notification-channels/del"), rt.notifyChannelsDel)
@@ -647,6 +675,11 @@ func (rt *Router) Config(r *gin.Engine) {
service.GET("/message-templates", rt.messageTemplateGets)
service.GET("/event-pipelines", rt.eventPipelinesListByService)
// 手机号加密存储配置接口
service.POST("/users/phone/encrypt", rt.usersPhoneEncrypt)
service.POST("/users/phone/decrypt", rt.usersPhoneDecrypt)
}
}

View File

@@ -1,50 +1,55 @@
package router
import (
"fmt"
"net/http"
"sort"
"strings"
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
func parseAggrRules(c *gin.Context) []*models.AggrRule {
aggrRules := strings.Split(ginx.QueryStr(c, "rule", ""), "::") // e.g. field:group_name::field:severity::tagkey:ident
if len(aggrRules) == 0 {
ginx.Bomb(http.StatusBadRequest, "rule empty")
func getUserGroupIds(ctx *gin.Context, rt *Router, myGroups bool) ([]int64, error) {
if !myGroups {
return nil, nil
}
rules := make([]*models.AggrRule, len(aggrRules))
for i := 0; i < len(aggrRules); i++ {
pair := strings.Split(aggrRules[i], ":")
if len(pair) != 2 {
ginx.Bomb(http.StatusBadRequest, "rule invalid")
}
if !(pair[0] == "field" || pair[0] == "tagkey") {
ginx.Bomb(http.StatusBadRequest, "rule invalid")
}
rules[i] = &models.AggrRule{
Type: pair[0],
Value: pair[1],
}
}
return rules
me := ctx.MustGet("user").(*models.User)
return models.MyGroupIds(rt.Ctx, me.Id)
}
func (rt *Router) alertCurEventsCard(c *gin.Context) {
stime, etime := getTimeRange(c)
severity := ginx.QueryInt(c, "severity", -1)
severity := strx.IdsInt64ForAPI(ginx.QueryStr(c, "severity", ""), ",")
query := ginx.QueryStr(c, "query", "")
myGroups := ginx.QueryBool(c, "my_groups", false) // 是否只看自己组默认false
var gids []int64
var err error
if myGroups {
gids, err = getUserGroupIds(c, rt, myGroups)
ginx.Dangerous(err)
if len(gids) == 0 {
gids = append(gids, -1)
}
}
viewId := ginx.QueryInt64(c, "view_id")
alertView, err := models.GetAlertAggrViewByViewID(rt.Ctx, viewId)
ginx.Dangerous(err)
if alertView == nil {
ginx.Bomb(http.StatusNotFound, "alert aggr view not found")
}
dsIds := queryDatasourceIds(c)
rules := parseAggrRules(c)
prod := ginx.QueryStr(c, "prods", "")
if prod == "" {
@@ -61,17 +66,18 @@ func (rt *Router) alertCurEventsCard(c *gin.Context) {
cates = strings.Split(cate, ",")
}
bgids, err := GetBusinessGroupIds(c, rt.Ctx, rt.Center.EventHistoryGroupView)
bgids, err := GetBusinessGroupIds(c, rt.Ctx, rt.Center.EventHistoryGroupView, myGroups)
ginx.Dangerous(err)
// 最多获取50000个获取太多也没啥意义
list, err := models.AlertCurEventsGet(rt.Ctx, prods, bgids, stime, etime, severity, dsIds,
cates, 0, query, 50000, 0)
cates, 0, query, 50000, 0, []int64{})
ginx.Dangerous(err)
cardmap := make(map[string]*AlertCard)
for _, event := range list {
title := event.GenCardTitle(rules)
title, err := event.GenCardTitle(alertView.Rule)
ginx.Dangerous(err)
if _, has := cardmap[title]; has {
cardmap[title].Total++
cardmap[title].EventIds = append(cardmap[title].EventIds, event.Id)
@@ -86,6 +92,10 @@ func (rt *Router) alertCurEventsCard(c *gin.Context) {
Severity: event.Severity,
}
}
if cardmap[title].Severity < 1 {
cardmap[title].Severity = 3
}
}
titles := make([]string, 0, len(cardmap))
@@ -142,11 +152,15 @@ func (rt *Router) alertCurEventsGetByRid(c *gin.Context) {
// 列表方式,拉取活跃告警
func (rt *Router) alertCurEventsList(c *gin.Context) {
stime, etime := getTimeRange(c)
severity := ginx.QueryInt(c, "severity", -1)
severity := strx.IdsInt64ForAPI(ginx.QueryStr(c, "severity", ""), ",")
query := ginx.QueryStr(c, "query", "")
limit := ginx.QueryInt(c, "limit", 20)
myGroups := ginx.QueryBool(c, "my_groups", false) // 是否只看自己组默认false
dsIds := queryDatasourceIds(c)
eventIds := strx.IdsInt64ForAPI(ginx.QueryStr(c, "event_ids", ""), ",")
prod := ginx.QueryStr(c, "prods", "")
if prod == "" {
prod = ginx.QueryStr(c, "rule_prods", "")
@@ -165,18 +179,19 @@ func (rt *Router) alertCurEventsList(c *gin.Context) {
ruleId := ginx.QueryInt64(c, "rid", 0)
bgids, err := GetBusinessGroupIds(c, rt.Ctx, rt.Center.EventHistoryGroupView)
bgids, err := GetBusinessGroupIds(c, rt.Ctx, rt.Center.EventHistoryGroupView, myGroups)
ginx.Dangerous(err)
total, err := models.AlertCurEventTotal(rt.Ctx, prods, bgids, stime, etime, severity, dsIds,
cates, ruleId, query)
cates, ruleId, query, eventIds)
ginx.Dangerous(err)
list, err := models.AlertCurEventsGet(rt.Ctx, prods, bgids, stime, etime, severity, dsIds,
cates, ruleId, query, limit, ginx.Offset(c, limit))
cates, ruleId, query, limit, ginx.Offset(c, limit), eventIds)
ginx.Dangerous(err)
cache := make(map[int64]*models.UserGroup)
for i := 0; i < len(list); i++ {
list[i].FillNotifyGroups(rt.Ctx, cache)
}
@@ -218,24 +233,68 @@ func (rt *Router) checkCurEventBusiGroupRWPermission(c *gin.Context, ids []int64
func (rt *Router) alertCurEventGet(c *gin.Context) {
eid := ginx.UrlParamInt64(c, "eid")
event, err := models.AlertCurEventGetById(rt.Ctx, eid)
ginx.Dangerous(err)
event, err := GetCurEventDetail(rt.Ctx, eid)
if event == nil {
ginx.Bomb(404, "No such active event")
}
if !rt.Center.AnonymousAccess.AlertDetail && rt.Center.EventHistoryGroupView {
hasPermission := HasPermission(rt.Ctx, c, "event", fmt.Sprintf("%d", eid), rt.Center.AnonymousAccess.AlertDetail)
if !hasPermission {
rt.auth()(c)
rt.user()(c)
rt.bgroCheck(c, event.GroupId)
}
ruleConfig, needReset := models.FillRuleConfigTplName(rt.Ctx, event.RuleConfig)
ginx.NewRender(c).Data(event, err)
}
func GetCurEventDetail(ctx *ctx.Context, eid int64) (*models.AlertCurEvent, error) {
event, err := models.AlertCurEventGetById(ctx, eid)
if err != nil {
return nil, err
}
if event == nil {
return nil, fmt.Errorf("no such active event")
}
ruleConfig, needReset := models.FillRuleConfigTplName(ctx, event.RuleConfig)
if needReset {
event.RuleConfigJson = ruleConfig
}
event.LastEvalTime = event.TriggerTime
ginx.NewRender(c).Data(event, nil)
event.NotifyVersion, err = GetEventNotifyVersion(ctx, event.RuleId, event.NotifyRuleIds)
ginx.Dangerous(err)
event.NotifyRules, err = GetEventNorifyRuleNames(ctx, event.NotifyRuleIds)
return event, err
}
func GetEventNorifyRuleNames(ctx *ctx.Context, notifyRuleIds []int64) ([]*models.EventNotifyRule, error) {
notifyRuleNames := make([]*models.EventNotifyRule, 0)
notifyRules, err := models.NotifyRulesGet(ctx, "id in ?", notifyRuleIds)
if err != nil {
return nil, err
}
for _, notifyRule := range notifyRules {
notifyRuleNames = append(notifyRuleNames, &models.EventNotifyRule{
Id: notifyRule.ID,
Name: notifyRule.Name,
})
}
return notifyRuleNames, nil
}
func GetEventNotifyVersion(ctx *ctx.Context, ruleId int64, notifyRuleIds []int64) (int, error) {
if len(notifyRuleIds) != 0 {
// 如果存在 notify_rule_ids则认为使用新的告警通知方式
return 1, nil
}
rule, err := models.AlertRuleGetById(ctx, ruleId)
if err != nil {
return 0, err
}
return rule.NotifyVersion, nil
}
func (rt *Router) alertCurEventsStatistics(c *gin.Context) {
@@ -247,3 +306,123 @@ func (rt *Router) alertCurEventDelByHash(c *gin.Context) {
hash := ginx.QueryStr(c, "hash")
ginx.NewRender(c).Message(models.AlertCurEventDelByHash(rt.Ctx, hash))
}
func (rt *Router) eventTagKeys(c *gin.Context) {
// 获取最近1天的活跃告警事件
now := time.Now().Unix()
stime := now - 24*3600
etime := now
// 获取用户可见的业务组ID列表
bgids, err := GetBusinessGroupIds(c, rt.Ctx, rt.Center.EventHistoryGroupView, false)
if err != nil {
logger.Warningf("failed to get business group ids: %v", err)
ginx.NewRender(c).Data([]string{"ident", "app", "service", "instance"}, nil)
return
}
// 查询活跃告警事件,限制数量以提高性能
events, err := models.AlertCurEventsGet(rt.Ctx, []string{}, bgids, stime, etime, []int64{}, []int64{}, []string{}, 0, "", 200, 0, []int64{})
if err != nil {
logger.Warningf("failed to get current alert events: %v", err)
ginx.NewRender(c).Data([]string{"ident", "app", "service", "instance"}, nil)
return
}
// 如果没有查到事件,返回默认标签
if len(events) == 0 {
ginx.NewRender(c).Data([]string{"ident", "app", "service", "instance"}, nil)
return
}
// 收集所有标签键并去重
tagKeys := make(map[string]struct{})
for _, event := range events {
for key := range event.TagsMap {
tagKeys[key] = struct{}{}
}
}
// 转换为字符串切片
var result []string
for key := range tagKeys {
result = append(result, key)
}
// 如果没有收集到任何标签键,返回默认值
if len(result) == 0 {
result = []string{"ident", "app", "service", "instance"}
}
ginx.NewRender(c).Data(result, nil)
}
func (rt *Router) eventTagValues(c *gin.Context) {
// 获取标签key
tagKey := ginx.QueryStr(c, "key")
// 获取最近1天的活跃告警事件
now := time.Now().Unix()
stime := now - 24*3600
etime := now
// 获取用户可见的业务组ID列表
bgids, err := GetBusinessGroupIds(c, rt.Ctx, rt.Center.EventHistoryGroupView, false)
if err != nil {
logger.Warningf("failed to get business group ids: %v", err)
ginx.NewRender(c).Data([]string{}, nil)
return
}
// 查询活跃告警事件,获取更多数据以保证统计准确性
events, err := models.AlertCurEventsGet(rt.Ctx, []string{}, bgids, stime, etime, []int64{}, []int64{}, []string{}, 0, "", 1000, 0, []int64{})
if err != nil {
logger.Warningf("failed to get current alert events: %v", err)
ginx.NewRender(c).Data([]string{}, nil)
return
}
// 如果没有查到事件,返回空数组
if len(events) == 0 {
ginx.NewRender(c).Data([]string{}, nil)
return
}
// 统计标签值出现次数
valueCount := make(map[string]int)
for _, event := range events {
// TagsMap已经在AlertCurEventsGet中处理直接使用
if value, exists := event.TagsMap[tagKey]; exists && value != "" {
valueCount[value]++
}
}
// 转换为切片并按出现次数降序排序
type tagValue struct {
value string
count int
}
tagValues := make([]tagValue, 0, len(valueCount))
for value, count := range valueCount {
tagValues = append(tagValues, tagValue{value, count})
}
// 按出现次数降序排序
sort.Slice(tagValues, func(i, j int) bool {
return tagValues[i].count > tagValues[j].count
})
// 只取Top20并转换为字符串数组
limit := 20
if len(tagValues) < limit {
limit = len(tagValues)
}
result := make([]string, 0, limit)
for i := 0; i < limit; i++ {
result = append(result, tagValues[i].value)
}
ginx.NewRender(c).Data(result, nil)
}

View File

@@ -2,6 +2,7 @@ package router
import (
"fmt"
"net/http"
"strings"
"time"
@@ -10,6 +11,7 @@ import (
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
"golang.org/x/exp/slices"
)
@@ -56,15 +58,15 @@ func (rt *Router) alertHisEventsList(c *gin.Context) {
ruleId := ginx.QueryInt64(c, "rid", 0)
bgids, err := GetBusinessGroupIds(c, rt.Ctx, rt.Center.EventHistoryGroupView)
bgids, err := GetBusinessGroupIds(c, rt.Ctx, rt.Center.EventHistoryGroupView, false)
ginx.Dangerous(err)
total, err := models.AlertHisEventTotal(rt.Ctx, prods, bgids, stime, etime, severity,
recovered, dsIds, cates, ruleId, query)
recovered, dsIds, cates, ruleId, query, []int64{})
ginx.Dangerous(err)
list, err := models.AlertHisEventGets(rt.Ctx, prods, bgids, stime, etime, severity, recovered,
dsIds, cates, ruleId, query, limit, ginx.Offset(c, limit))
dsIds, cates, ruleId, query, limit, ginx.Offset(c, limit), []int64{})
ginx.Dangerous(err)
cache := make(map[int64]*models.UserGroup)
@@ -78,16 +80,67 @@ func (rt *Router) alertHisEventsList(c *gin.Context) {
}, nil)
}
type alertHisEventsDeleteForm struct {
Severities []int `json:"severities"`
Timestamp int64 `json:"timestamp" binding:"required"`
}
func (rt *Router) alertHisEventsDelete(c *gin.Context) {
var f alertHisEventsDeleteForm
ginx.BindJSON(c, &f)
// 校验
if f.Timestamp == 0 {
ginx.Bomb(http.StatusBadRequest, "timestamp parameter is required")
return
}
user := c.MustGet("user").(*models.User)
// 启动后台清理任务
go func() {
limit := 100
for {
n, err := models.AlertHisEventBatchDelete(rt.Ctx, f.Timestamp, f.Severities, limit)
if err != nil {
logger.Errorf("Failed to delete alert history events: operator=%s, timestamp=%d, severities=%v, error=%v",
user.Username, f.Timestamp, f.Severities, err)
break
}
logger.Debugf("Successfully deleted alert history events: operator=%s, timestamp=%d, severities=%v, deleted=%d",
user.Username, f.Timestamp, f.Severities, n)
if n < int64(limit) {
break // 已经删完
}
time.Sleep(100 * time.Millisecond) // 防止锁表
}
}()
ginx.NewRender(c).Data("Alert history events deletion started", nil)
}
var TransferEventToCur func(*ctx.Context, *models.AlertHisEvent) *models.AlertCurEvent
func init() {
TransferEventToCur = transferEventToCur
}
func transferEventToCur(ctx *ctx.Context, event *models.AlertHisEvent) *models.AlertCurEvent {
cur := event.ToCur()
return cur
}
func (rt *Router) alertHisEventGet(c *gin.Context) {
eid := ginx.UrlParamInt64(c, "eid")
event, err := models.AlertHisEventGetById(rt.Ctx, eid)
ginx.Dangerous(err)
if event == nil {
ginx.Bomb(404, "No such alert event")
}
if !rt.Center.AnonymousAccess.AlertDetail && rt.Center.EventHistoryGroupView {
hasPermission := HasPermission(rt.Ctx, c, "event", fmt.Sprintf("%d", eid), rt.Center.AnonymousAccess.AlertDetail)
if !hasPermission {
rt.auth()(c)
rt.user()(c)
rt.bgroCheck(c, event.GroupId)
}
@@ -96,46 +149,54 @@ func (rt *Router) alertHisEventGet(c *gin.Context) {
event.RuleConfigJson = ruleConfig
}
ginx.NewRender(c).Data(event, err)
event.NotifyVersion, err = GetEventNotifyVersion(rt.Ctx, event.RuleId, event.NotifyRuleIds)
ginx.Dangerous(err)
event.NotifyRules, err = GetEventNorifyRuleNames(rt.Ctx, event.NotifyRuleIds)
ginx.NewRender(c).Data(TransferEventToCur(rt.Ctx, event), err)
}
func GetBusinessGroupIds(c *gin.Context, ctx *ctx.Context, eventHistoryGroupView bool) ([]int64, error) {
func GetBusinessGroupIds(c *gin.Context, ctx *ctx.Context, onlySelfGroupView bool, myGroups bool) ([]int64, error) {
bgid := ginx.QueryInt64(c, "bgid", 0)
var bgids []int64
if !eventHistoryGroupView || strings.HasPrefix(c.Request.URL.Path, "/v1") {
if strings.HasPrefix(c.Request.URL.Path, "/v1") {
// 如果请求路径以 /v1 开头,不查询用户信息
if bgid > 0 {
return []int64{bgid}, nil
}
return bgids, nil
}
user := c.MustGet("user").(*models.User)
if user.IsAdmin() {
if myGroups || (onlySelfGroupView && !user.IsAdmin()) {
// 1. 页面上勾选了我的业务组,需要查询用户所属的业务组
// 2. 如果 onlySelfGroupView 为 true表示只允许查询用户所属的业务组
bussGroupIds, err := models.MyBusiGroupIds(ctx, user.Id)
if err != nil {
return nil, err
}
if len(bussGroupIds) == 0 {
// 如果没查到用户属于任何业务组需要返回一个0否则会导致查询到全部告警历史
return []int64{0}, nil
}
if bgid > 0 {
if !slices.Contains(bussGroupIds, bgid) && !user.IsAdmin() {
return nil, fmt.Errorf("business group ID not allowed")
}
return []int64{bgid}, nil
}
return bgids, nil
}
bussGroupIds, err := models.MyBusiGroupIds(ctx, user.Id)
if err != nil {
return nil, err
}
if len(bussGroupIds) == 0 {
// 如果没查到用户属于任何业务组需要返回一个0否则会导致查询到全部告警历史
return []int64{0}, nil
}
if bgid > 0 && !slices.Contains(bussGroupIds, bgid) {
return nil, fmt.Errorf("business group ID not allowed")
return bussGroupIds, nil
}
if bgid > 0 {
// Pass filter parameters, priority to use
return []int64{bgid}, nil
}
return bussGroupIds, nil
return bgids, nil
}

View File

@@ -11,16 +11,18 @@ import (
"gopkg.in/yaml.v2"
"github.com/ccfos/nightingale/v6/alert/mute"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pushgw/pconf"
"github.com/ccfos/nightingale/v6/pushgw/writer"
"github.com/gin-gonic/gin"
"github.com/jinzhu/copier"
"github.com/pkg/errors"
"github.com/prometheus/prometheus/prompb"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
"github.com/toolkits/pkg/str"
)
type AlertRuleModifyHookFunc func(ar *models.AlertRule)
@@ -33,13 +35,12 @@ func (rt *Router) alertRuleGets(c *gin.Context) {
cache := make(map[int64]*models.UserGroup)
for i := 0; i < len(ars); i++ {
ars[i].FillNotifyGroups(rt.Ctx, cache)
ars[i].FillSeverities()
}
}
ginx.NewRender(c).Data(ars, err)
}
func getAlertCueEventTimeRange(c *gin.Context) (stime, etime int64) {
func GetAlertCueEventTimeRange(c *gin.Context) (stime, etime int64) {
stime = ginx.QueryInt64(c, "stime", 0)
etime = ginx.QueryInt64(c, "etime", 0)
if etime == 0 {
@@ -52,7 +53,7 @@ func getAlertCueEventTimeRange(c *gin.Context) (stime, etime int64) {
}
func (rt *Router) alertRuleGetsByGids(c *gin.Context) {
gids := str.IdsInt64(ginx.QueryStr(c, "gids", ""), ",")
gids := strx.IdsInt64ForAPI(ginx.QueryStr(c, "gids", ""), ",")
if len(gids) > 0 {
for _, gid := range gids {
rt.bgroCheck(c, gid)
@@ -78,7 +79,6 @@ func (rt *Router) alertRuleGetsByGids(c *gin.Context) {
names := make([]string, 0, len(ars))
for i := 0; i < len(ars); i++ {
ars[i].FillNotifyGroups(rt.Ctx, cache)
ars[i].FillSeverities()
if len(ars[i].DatasourceQueries) != 0 {
ars[i].DatasourceIdsJson = rt.DatasourceCache.GetIDsByDsCateAndQueries(ars[i].Cate, ars[i].DatasourceQueries)
@@ -88,7 +88,7 @@ func (rt *Router) alertRuleGetsByGids(c *gin.Context) {
names = append(names, ars[i].UpdateBy)
}
stime, etime := getAlertCueEventTimeRange(c)
stime, etime := GetAlertCueEventTimeRange(c)
cnt := models.AlertCurEventCountByRuleId(rt.Ctx, rids, stime, etime)
if cnt != nil {
for i := 0; i < len(ars); i++ {
@@ -157,6 +157,120 @@ func (rt *Router) alertRuleAddByFE(c *gin.Context) {
ginx.NewRender(c).Data(reterr, nil)
}
type AlertRuleTryRunForm struct {
EventId int64 `json:"event_id" binding:"required"`
AlertRuleConfig models.AlertRule `json:"config" binding:"required"`
}
func (rt *Router) alertRuleNotifyTryRun(c *gin.Context) {
// check notify channels of old version
var f AlertRuleTryRunForm
ginx.BindJSON(c, &f)
hisEvent, err := models.AlertHisEventGetById(rt.Ctx, f.EventId)
ginx.Dangerous(err)
if hisEvent == nil {
ginx.Bomb(http.StatusNotFound, "event not found")
}
curEvent := *hisEvent.ToCur()
curEvent.SetTagsMap()
if f.AlertRuleConfig.NotifyVersion == 1 {
for _, id := range f.AlertRuleConfig.NotifyRuleIds {
notifyRule, err := models.GetNotifyRule(rt.Ctx, id)
ginx.Dangerous(err)
for _, notifyConfig := range notifyRule.NotifyConfigs {
_, err = SendNotifyChannelMessage(rt.Ctx, rt.UserCache, rt.UserGroupCache, notifyConfig, []*models.AlertCurEvent{&curEvent})
ginx.Dangerous(err)
}
}
ginx.NewRender(c).Data("notification test ok", nil)
return
}
if len(f.AlertRuleConfig.NotifyChannelsJSON) == 0 {
ginx.Bomb(http.StatusOK, "no notify channels selected")
}
if len(f.AlertRuleConfig.NotifyGroupsJSON) == 0 {
ginx.Bomb(http.StatusOK, "no notify groups selected")
}
ancs := make([]string, 0, len(curEvent.NotifyChannelsJSON))
ugids := f.AlertRuleConfig.NotifyGroupsJSON
ngids := make([]int64, 0)
for i := 0; i < len(ugids); i++ {
if gid, err := strconv.ParseInt(ugids[i], 10, 64); err == nil {
ngids = append(ngids, gid)
}
}
userGroups := rt.UserGroupCache.GetByUserGroupIds(ngids)
uids := make([]int64, 0)
for i := range userGroups {
uids = append(uids, userGroups[i].UserIds...)
}
users := rt.UserCache.GetByUserIds(uids)
for _, NotifyChannels := range curEvent.NotifyChannelsJSON {
flag := true
// ignore non-default channels
switch NotifyChannels {
case models.Dingtalk, models.Wecom, models.Feishu, models.Mm,
models.Telegram, models.Email, models.FeishuCard:
// do nothing
default:
continue
}
// default channels
for ui := range users {
if _, b := users[ui].ExtractToken(NotifyChannels); b {
flag = false
break
}
}
if flag {
ancs = append(ancs, NotifyChannels)
}
}
if len(ancs) > 0 {
ginx.Dangerous(errors.New(fmt.Sprintf("All users are missing notify channel configurations. Please check for missing tokens (each channel should be configured with at least one user). %v", ancs)))
}
ginx.NewRender(c).Data("notification test ok", nil)
}
func (rt *Router) alertRuleEnableTryRun(c *gin.Context) {
// check notify channels of old version
var f AlertRuleTryRunForm
ginx.BindJSON(c, &f)
hisEvent, err := models.AlertHisEventGetById(rt.Ctx, f.EventId)
ginx.Dangerous(err)
if hisEvent == nil {
ginx.Bomb(http.StatusNotFound, "event not found")
}
curEvent := *hisEvent.ToCur()
curEvent.SetTagsMap()
if f.AlertRuleConfig.Disabled == 1 {
ginx.Bomb(http.StatusOK, "rule is disabled")
}
if mute.TimeSpanMuteStrategy(&f.AlertRuleConfig, &curEvent) {
ginx.Bomb(http.StatusOK, "event is not match for period of time")
}
if mute.BgNotMatchMuteStrategy(&f.AlertRuleConfig, &curEvent, rt.TargetCache) {
ginx.Bomb(http.StatusOK, "event target busi group not match rule busi group")
}
ginx.NewRender(c).Data("event is effective", nil)
}
func (rt *Router) alertRuleAddByImport(c *gin.Context) {
username := c.MustGet("username").(string)
@@ -174,6 +288,15 @@ func (rt *Router) alertRuleAddByImport(c *gin.Context) {
models.DataSourceQueryAll,
}
}
// 将导入的规则统一转为新版本的通知规则配置
lst[i].NotifyVersion = 1
lst[i].NotifyChannelsJSON = []string{}
lst[i].NotifyGroupsJSON = []string{}
lst[i].NotifyChannels = ""
lst[i].NotifyGroups = ""
lst[i].Callbacks = ""
lst[i].CallbacksJSON = []string{}
}
bgid := ginx.UrlParamInt64(c, "id")
@@ -192,19 +315,52 @@ func (rt *Router) alertRuleAddByImportPromRule(c *gin.Context) {
var f promRuleForm
ginx.Dangerous(c.BindJSON(&f))
// 首先尝试解析带 groups 的格式
var pr struct {
Groups []models.PromRuleGroup `yaml:"groups"`
}
err := yaml.Unmarshal([]byte(f.Payload), &pr)
if err != nil {
ginx.Bomb(http.StatusBadRequest, "invalid yaml format, please use the example format. err: %v", err)
var groups []models.PromRuleGroup
if err != nil || len(pr.Groups) == 0 {
// 如果解析失败或没有 groups尝试解析规则数组格式
var rules []models.PromRule
err = yaml.Unmarshal([]byte(f.Payload), &rules)
if err != nil {
// 最后尝试解析单个规则格式
var singleRule models.PromRule
err = yaml.Unmarshal([]byte(f.Payload), &singleRule)
if err != nil {
ginx.Bomb(http.StatusBadRequest, "invalid yaml format. err: %v", err)
}
// 验证单个规则是否有效
if singleRule.Alert == "" && singleRule.Record == "" {
ginx.Bomb(http.StatusBadRequest, "input yaml is empty or invalid")
}
rules = []models.PromRule{singleRule}
}
// 验证规则数组是否为空
if len(rules) == 0 {
ginx.Bomb(http.StatusBadRequest, "input yaml contains no rules")
}
// 将规则数组包装成 group
groups = []models.PromRuleGroup{
{
Name: "imported_rules",
Rules: rules,
},
}
} else {
// 使用已解析的 groups
groups = pr.Groups
}
if len(pr.Groups) == 0 {
ginx.Bomb(http.StatusBadRequest, "input yaml is empty")
}
lst := models.DealPromGroup(pr.Groups, f.DatasourceQueries, f.Disabled)
lst := models.DealPromGroup(groups, f.DatasourceQueries, f.Disabled)
username := c.MustGet("username").(string)
bgid := ginx.UrlParamInt64(c, "id")
ginx.NewRender(c).Data(rt.alertRuleAdd(lst, username, bgid, c.GetHeader("X-Language")), nil)
@@ -349,8 +505,8 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
ginx.Bomb(http.StatusBadRequest, "fields empty")
}
f.Fields["update_by"] = c.MustGet("username").(string)
f.Fields["update_at"] = time.Now().Unix()
updateBy := c.MustGet("username").(string)
updateAt := time.Now().Unix()
for i := 0; i < len(f.Ids); i++ {
ar, err := models.AlertRuleGetById(rt.Ctx, f.Ids[i])
@@ -367,7 +523,6 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
b, err := json.Marshal(originRule)
ginx.Dangerous(err)
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"rule_config": string(b)}))
continue
}
}
@@ -380,7 +535,6 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
b, err := json.Marshal(ar.AnnotationsJSON)
ginx.Dangerous(err)
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"annotations": string(b)}))
continue
}
}
@@ -393,7 +547,6 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
b, err := json.Marshal(ar.AnnotationsJSON)
ginx.Dangerous(err)
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"annotations": string(b)}))
continue
}
}
@@ -403,7 +556,6 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
callback := callbacks.(string)
if !strings.Contains(ar.Callbacks, callback) {
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"callbacks": ar.Callbacks + " " + callback}))
continue
}
}
}
@@ -413,7 +565,6 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
if callbacks, has := f.Fields["callbacks"]; has {
callback := callbacks.(string)
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"callbacks": strings.ReplaceAll(ar.Callbacks, callback, "")}))
continue
}
}
@@ -423,7 +574,6 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
bytes, err := json.Marshal(datasourceQueries)
ginx.Dangerous(err)
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"datasource_queries": bytes}))
continue
}
}
@@ -439,6 +589,12 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
ginx.Dangerous(ar.UpdateColumn(rt.Ctx, k, v))
}
}
// 统一更新更新时间和更新人,只有更新时间变了,告警规则才会被引擎拉取
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{
"update_by": updateBy,
"update_at": updateAt,
}))
}
ginx.NewRender(c).Message(nil)

View File

@@ -2,13 +2,17 @@ package router
import (
"net/http"
"strconv"
"strings"
"time"
"github.com/ccfos/nightingale/v6/alert/common"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/str"
"github.com/toolkits/pkg/i18n"
)
// Return all, front-end search and paging
@@ -31,7 +35,7 @@ func (rt *Router) alertSubscribeGets(c *gin.Context) {
}
func (rt *Router) alertSubscribeGetsByGids(c *gin.Context) {
gids := str.IdsInt64(ginx.QueryStr(c, "gids", ""), ",")
gids := strx.IdsInt64ForAPI(ginx.QueryStr(c, "gids", ""), ",")
if len(gids) > 0 {
for _, gid := range gids {
rt.bgroCheck(c, gid)
@@ -104,6 +108,148 @@ func (rt *Router) alertSubscribeAdd(c *gin.Context) {
ginx.NewRender(c).Message(f.Add(rt.Ctx))
}
type SubscribeTryRunForm struct {
EventId int64 `json:"event_id" binding:"required"`
SubscribeConfig models.AlertSubscribe `json:"config" binding:"required"`
}
func (rt *Router) alertSubscribeTryRun(c *gin.Context) {
var f SubscribeTryRunForm
ginx.BindJSON(c, &f)
ginx.Dangerous(f.SubscribeConfig.Verify())
hisEvent, err := models.AlertHisEventGetById(rt.Ctx, f.EventId)
ginx.Dangerous(err)
if hisEvent == nil {
ginx.Bomb(http.StatusNotFound, "event not found")
}
curEvent := *hisEvent.ToCur()
curEvent.SetTagsMap()
lang := c.GetHeader("X-Language")
// 先判断匹配条件
if !f.SubscribeConfig.MatchCluster(curEvent.DatasourceId) {
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "event datasource not match"))
}
if len(f.SubscribeConfig.RuleIds) != 0 {
match := false
for _, rid := range f.SubscribeConfig.RuleIds {
if rid == curEvent.RuleId {
match = true
break
}
}
if !match {
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "event rule id not match"))
}
}
// 匹配 tag
f.SubscribeConfig.Parse()
if !common.MatchTags(curEvent.TagsMap, f.SubscribeConfig.ITags) {
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "event tags not match"))
}
// 匹配group name
if !common.MatchGroupsName(curEvent.GroupName, f.SubscribeConfig.IBusiGroups) {
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "event group name not match"))
}
// 检查严重级别Severity匹配
if len(f.SubscribeConfig.SeveritiesJson) != 0 {
match := false
for _, s := range f.SubscribeConfig.SeveritiesJson {
if s == curEvent.Severity || s == 0 {
match = true
break
}
}
if !match {
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "event severity not match"))
}
}
// 新版本通知规则
if f.SubscribeConfig.NotifyVersion == 1 {
if len(f.SubscribeConfig.NotifyRuleIds) == 0 {
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "no notify rules selected"))
}
for _, id := range f.SubscribeConfig.NotifyRuleIds {
notifyRule, err := models.GetNotifyRule(rt.Ctx, id)
if err != nil {
ginx.Bomb(http.StatusNotFound, i18n.Sprintf(lang, "subscribe notify rule not found: %v", err))
}
for _, notifyConfig := range notifyRule.NotifyConfigs {
_, err = SendNotifyChannelMessage(rt.Ctx, rt.UserCache, rt.UserGroupCache, notifyConfig, []*models.AlertCurEvent{&curEvent})
if err != nil {
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "notify rule send error: %v", err))
}
}
}
ginx.NewRender(c).Data(i18n.Sprintf(lang, "event match subscribe and notification test ok"), nil)
return
}
// 旧版通知方式
f.SubscribeConfig.ModifyEvent(&curEvent)
if len(curEvent.NotifyChannelsJSON) == 0 {
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "no notify channels selected"))
}
if len(curEvent.NotifyGroupsJSON) == 0 {
ginx.Bomb(http.StatusOK, i18n.Sprintf(lang, "no notify groups selected"))
}
ancs := make([]string, 0, len(curEvent.NotifyChannelsJSON))
ugids := strings.Fields(f.SubscribeConfig.UserGroupIds)
ngids := make([]int64, 0)
for i := 0; i < len(ugids); i++ {
if gid, err := strconv.ParseInt(ugids[i], 10, 64); err == nil {
ngids = append(ngids, gid)
}
}
userGroups := rt.UserGroupCache.GetByUserGroupIds(ngids)
uids := make([]int64, 0)
for i := range userGroups {
uids = append(uids, userGroups[i].UserIds...)
}
users := rt.UserCache.GetByUserIds(uids)
for _, NotifyChannels := range curEvent.NotifyChannelsJSON {
flag := true
// ignore non-default channels
switch NotifyChannels {
case models.Dingtalk, models.Wecom, models.Feishu, models.Mm,
models.Telegram, models.Email, models.FeishuCard:
// do nothing
default:
continue
}
// default channels
for ui := range users {
if _, b := users[ui].ExtractToken(NotifyChannels); b {
flag = false
break
}
}
if flag {
ancs = append(ancs, NotifyChannels)
}
}
if len(ancs) > 0 {
ginx.Bomb(http.StatusBadRequest, i18n.Sprintf(lang, "all users missing notify channel configurations: %v", ancs))
}
ginx.NewRender(c).Data(i18n.Sprintf(lang, "event match subscribe and notify settings ok"), nil)
}
func (rt *Router) alertSubscribePut(c *gin.Context) {
var fs []models.AlertSubscribe
ginx.BindJSON(c, &fs)
@@ -142,6 +288,7 @@ func (rt *Router) alertSubscribePut(c *gin.Context) {
"busi_groups",
"note",
"notify_rule_ids",
"notify_version",
))
}

View File

@@ -6,11 +6,11 @@ import (
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
"github.com/toolkits/pkg/str"
)
type boardForm struct {
@@ -51,9 +51,14 @@ func (rt *Router) boardAdd(c *gin.Context) {
func (rt *Router) boardGet(c *gin.Context) {
bid := ginx.UrlParamStr(c, "bid")
board, err := models.BoardGet(rt.Ctx, "id = ? or ident = ?", bid, bid)
board, err := models.BoardGet(rt.Ctx, "ident = ?", bid)
ginx.Dangerous(err)
if board == nil {
board, err = models.BoardGet(rt.Ctx, "id = ?", bid)
ginx.Dangerous(err)
}
if board == nil {
ginx.Bomb(http.StatusNotFound, "No such dashboard")
}
@@ -96,7 +101,7 @@ func (rt *Router) boardGet(c *gin.Context) {
// 根据 bids 参数,获取多个 board
func (rt *Router) boardGetsByBids(c *gin.Context) {
bids := str.IdsInt64(ginx.QueryStr(c, "bids", ""), ",")
bids := strx.IdsInt64ForAPI(ginx.QueryStr(c, "bids", ""), ",")
boards, err := models.BoardGetsByBids(rt.Ctx, bids)
ginx.Dangerous(err)
ginx.NewRender(c).Data(boards, err)
@@ -265,7 +270,7 @@ func (rt *Router) publicBoardGets(c *gin.Context) {
}
func (rt *Router) boardGetsByGids(c *gin.Context) {
gids := str.IdsInt64(ginx.QueryStr(c, "gids", ""), ",")
gids := strx.IdsInt64ForAPI(ginx.QueryStr(c, "gids", ""), ",")
query := ginx.QueryStr(c, "query", "")
if len(gids) > 0 {

View File

@@ -57,7 +57,7 @@ func (rt *Router) metricFilterDel(c *gin.Context) {
ginx.Dangerous(err)
if !HasPerm(gids, old.GroupsPerm, true) {
ginx.NewRender(c).Message("no permission")
ginx.NewRender(c).Message("forbidden")
return
}
}
@@ -79,7 +79,7 @@ func (rt *Router) metricFilterPut(c *gin.Context) {
ginx.Dangerous(err)
if !HasPerm(gids, old.GroupsPerm, true) {
ginx.NewRender(c).Message("no permission")
ginx.NewRender(c).Message("forbidden")
return
}
}

View File

@@ -2,8 +2,10 @@ package router
import (
"net/http"
"sort"
"time"
"github.com/ccfos/nightingale/v6/center/integration"
"github.com/ccfos/nightingale/v6/models"
"github.com/gin-gonic/gin"
@@ -29,7 +31,7 @@ func (rt *Router) builtinMetricsAdd(c *gin.Context) {
reterr := make(map[string]string)
for i := 0; i < count; i++ {
lst[i].Lang = lang
lst[i].UUID = time.Now().UnixNano()
lst[i].UUID = time.Now().UnixMicro()
if err := lst[i].Add(rt.Ctx, username); err != nil {
reterr[lst[i].Name] = i18n.Sprintf(c.GetHeader("X-Language"), err.Error())
}
@@ -48,11 +50,12 @@ func (rt *Router) builtinMetricsGets(c *gin.Context) {
lang = "zh_CN"
}
bm, err := models.BuiltinMetricGets(rt.Ctx, lang, collector, typ, query, unit, limit, ginx.Offset(c, limit))
bmInDB, err := models.BuiltinMetricGets(rt.Ctx, "", collector, typ, query, unit, limit, ginx.Offset(c, limit))
ginx.Dangerous(err)
total, err := models.BuiltinMetricCount(rt.Ctx, lang, collector, typ, query, unit)
bm, total, err := integration.BuiltinPayloadInFile.BuiltinMetricGets(bmInDB, lang, collector, typ, query, unit, limit, ginx.Offset(c, limit))
ginx.Dangerous(err)
ginx.NewRender(c).Data(gin.H{
"list": bm,
"total": total,
@@ -86,15 +89,11 @@ func (rt *Router) builtinMetricsDel(c *gin.Context) {
func (rt *Router) builtinMetricsDefaultTypes(c *gin.Context) {
lst := []string{
"Linux",
"Procstat",
"cAdvisor",
"Ping",
"MySQL",
"Redis",
"Kafka",
"Elasticsearch",
"PostgreSQL",
"MongoDB",
"Memcached",
"ClickHouse",
}
ginx.NewRender(c).Data(lst, nil)
}
@@ -102,29 +101,28 @@ func (rt *Router) builtinMetricsDefaultTypes(c *gin.Context) {
func (rt *Router) builtinMetricsTypes(c *gin.Context) {
collector := ginx.QueryStr(c, "collector", "")
query := ginx.QueryStr(c, "query", "")
disabled := ginx.QueryInt(c, "disabled", -1)
lang := c.GetHeader("X-Language")
metricTypeList, err := models.BuiltinMetricTypes(rt.Ctx, lang, collector, query)
metricTypeListInDB, err := models.BuiltinMetricTypes(rt.Ctx, lang, collector, query)
ginx.Dangerous(err)
componentList, err := models.BuiltinComponentGets(rt.Ctx, "", disabled)
ginx.Dangerous(err)
metricTypeListInFile := integration.BuiltinPayloadInFile.BuiltinMetricTypes(lang, collector, query)
// 创建一个 map 来存储 componentList 中的类型
componentTypes := make(map[string]struct{})
for _, comp := range componentList {
componentTypes[comp.Ident] = struct{}{}
typeMap := make(map[string]struct{})
for _, metricType := range metricTypeListInDB {
typeMap[metricType] = struct{}{}
}
for _, metricType := range metricTypeListInFile {
typeMap[metricType] = struct{}{}
}
filteredMetricTypeList := make([]string, 0)
for _, metricType := range metricTypeList {
if _, exists := componentTypes[metricType]; exists {
filteredMetricTypeList = append(filteredMetricTypeList, metricType)
}
metricTypeList := make([]string, 0, len(typeMap))
for metricType := range typeMap {
metricTypeList = append(metricTypeList, metricType)
}
sort.Strings(metricTypeList)
ginx.NewRender(c).Data(filteredMetricTypeList, nil)
ginx.NewRender(c).Data(metricTypeList, nil)
}
func (rt *Router) builtinMetricsCollectors(c *gin.Context) {
@@ -132,5 +130,24 @@ func (rt *Router) builtinMetricsCollectors(c *gin.Context) {
query := ginx.QueryStr(c, "query", "")
lang := c.GetHeader("X-Language")
ginx.NewRender(c).Data(models.BuiltinMetricCollectors(rt.Ctx, lang, typ, query))
collectorListInDB, err := models.BuiltinMetricCollectors(rt.Ctx, lang, typ, query)
ginx.Dangerous(err)
collectorListInFile := integration.BuiltinPayloadInFile.BuiltinMetricCollectors(lang, typ, query)
collectorMap := make(map[string]struct{})
for _, collector := range collectorListInDB {
collectorMap[collector] = struct{}{}
}
for _, collector := range collectorListInFile {
collectorMap[collector] = struct{}{}
}
collectorList := make([]string, 0, len(collectorMap))
for collector := range collectorMap {
collectorList = append(collectorList, collector)
}
sort.Strings(collectorList)
ginx.NewRender(c).Data(collectorList, nil)
}

View File

@@ -7,6 +7,7 @@ import (
"time"
"github.com/BurntSushi/toml"
"github.com/ccfos/nightingale/v6/center/integration"
"github.com/ccfos/nightingale/v6/models"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
@@ -192,13 +193,26 @@ func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
func (rt *Router) builtinPayloadsGets(c *gin.Context) {
typ := ginx.QueryStr(c, "type", "")
if typ == "" {
ginx.Bomb(http.StatusBadRequest, "type is required")
return
}
ComponentID := ginx.QueryInt64(c, "component_id", 0)
cate := ginx.QueryStr(c, "cate", "")
query := ginx.QueryStr(c, "query", "")
lst, err := models.BuiltinPayloadGets(rt.Ctx, uint64(ComponentID), typ, cate, query)
ginx.NewRender(c).Data(lst, err)
ginx.Dangerous(err)
lstInFile, err := integration.BuiltinPayloadInFile.GetBuiltinPayload(typ, cate, query, uint64(ComponentID))
ginx.Dangerous(err)
if len(lstInFile) > 0 {
lst = append(lst, lstInFile...)
}
ginx.NewRender(c).Data(lst, nil)
}
func (rt *Router) builtinPayloadcatesGet(c *gin.Context) {
@@ -206,21 +220,31 @@ func (rt *Router) builtinPayloadcatesGet(c *gin.Context) {
ComponentID := ginx.QueryInt64(c, "component_id", 0)
cates, err := models.BuiltinPayloadCates(rt.Ctx, typ, uint64(ComponentID))
ginx.NewRender(c).Data(cates, err)
}
ginx.Dangerous(err)
func (rt *Router) builtinPayloadGet(c *gin.Context) {
id := ginx.UrlParamInt64(c, "id")
catesInFile, err := integration.BuiltinPayloadInFile.GetBuiltinPayloadCates(typ, uint64(ComponentID))
ginx.Dangerous(err)
bp, err := models.BuiltinPayloadGet(rt.Ctx, "id = ?", id)
if err != nil {
ginx.Bomb(http.StatusInternalServerError, err.Error())
}
if bp == nil {
ginx.Bomb(http.StatusNotFound, "builtin payload not found")
// 使用 map 进行去重
cateMap := make(map[string]bool)
// 添加数据库中的分类
for _, cate := range cates {
cateMap[cate] = true
}
ginx.NewRender(c).Data(bp, nil)
// 添加文件中的分类
for _, cate := range catesInFile {
cateMap[cate] = true
}
// 将去重后的结果转换回切片
result := make([]string, 0, len(cateMap))
for cate := range cateMap {
result = append(result, cate)
}
ginx.NewRender(c).Data(result, nil)
}
func (rt *Router) builtinPayloadsPut(c *gin.Context) {
@@ -273,14 +297,15 @@ func (rt *Router) builtinPayloadsDel(c *gin.Context) {
ginx.NewRender(c).Message(models.BuiltinPayloadDels(rt.Ctx, req.Ids))
}
func (rt *Router) builtinPayloadsGetByUUIDOrID(c *gin.Context) {
uuid := ginx.QueryInt64(c, "uuid", 0)
// 优先以 uuid 为准
if uuid != 0 {
ginx.NewRender(c).Data(models.BuiltinPayloadGet(rt.Ctx, "uuid = ?", uuid))
return
}
func (rt *Router) builtinPayloadsGetByUUID(c *gin.Context) {
uuid := ginx.QueryInt64(c, "uuid")
id := ginx.QueryInt64(c, "id", 0)
ginx.NewRender(c).Data(models.BuiltinPayloadGet(rt.Ctx, "id = ?", id))
bp, err := models.BuiltinPayloadGet(rt.Ctx, "uuid = ?", uuid)
ginx.Dangerous(err)
if bp != nil {
ginx.NewRender(c).Data(bp, nil)
} else {
ginx.NewRender(c).Data(integration.BuiltinPayloadInFile.IndexData[uuid], nil)
}
}

View File

@@ -4,11 +4,11 @@ import (
"net/http"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/str"
)
type busiGroupForm struct {
@@ -131,7 +131,7 @@ func (rt *Router) busiGroupGetsByService(c *gin.Context) {
// 这个接口只有在活跃告警页面才调用获取各个BG的活跃告警数量
func (rt *Router) busiGroupAlertingsGets(c *gin.Context) {
ids := ginx.QueryStr(c, "ids", "")
ret, err := models.AlertNumbers(rt.Ctx, str.IdsInt64(ids))
ret, err := models.AlertNumbers(rt.Ctx, strx.IdsInt64ForAPI(ids))
ginx.NewRender(c).Data(ret, err)
}
@@ -142,7 +142,7 @@ func (rt *Router) busiGroupGet(c *gin.Context) {
}
func (rt *Router) busiGroupsGetTags(c *gin.Context) {
bgids := str.IdsInt64(ginx.QueryStr(c, "gids", ""), ",")
bgids := strx.IdsInt64ForAPI(ginx.QueryStr(c, "gids", ""), ",")
targetIdents, err := models.TargetIndentsGetByBgids(rt.Ctx, bgids)
ginx.Dangerous(err)
tags, err := models.TargetGetTags(rt.Ctx, targetIdents, true, "busigroup")

View File

@@ -4,15 +4,15 @@ import (
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/str"
)
func (rt *Router) chartShareGets(c *gin.Context) {
ids := ginx.QueryStr(c, "ids", "")
lst, err := models.ChartShareGetsByIds(rt.Ctx, str.IdsInt64(ids, ","))
lst, err := models.ChartShareGetsByIds(rt.Ctx, strx.IdsInt64ForAPI(ids, ","))
ginx.NewRender(c).Data(lst, err)
}

View File

@@ -2,16 +2,19 @@ package router
import (
"crypto/tls"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"github.com/ccfos/nightingale/v6/datasource/opensearch"
"github.com/ccfos/nightingale/v6/models"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
"github.com/toolkits/pkg/logger"
)
@@ -57,15 +60,21 @@ func (rt *Router) datasourceBriefs(c *gin.Context) {
for _, item := range list {
item.AuthJson.BasicAuthPassword = ""
if item.PluginType != models.PROMETHEUS {
item.SettingsJson = nil
} else {
if item.PluginType == models.PROMETHEUS {
for k, v := range item.SettingsJson {
if strings.HasPrefix(k, "prometheus.") {
item.SettingsJson[strings.TrimPrefix(k, "prometheus.")] = v
delete(item.SettingsJson, k)
}
}
} else if item.PluginType == "cloudwatch" {
for k := range item.SettingsJson {
if !strings.Contains(k, "region") {
delete(item.SettingsJson, k)
}
}
} else {
item.SettingsJson = nil
}
dss = append(dss, item)
}
@@ -94,7 +103,7 @@ func (rt *Router) datasourceUpsert(c *gin.Context) {
if !req.ForceSave {
if req.PluginType == models.PROMETHEUS || req.PluginType == models.LOKI || req.PluginType == models.TDENGINE {
err = DatasourceCheck(req)
err = DatasourceCheck(c, req)
if err != nil {
Dangerous(c, err)
return
@@ -102,6 +111,48 @@ func (rt *Router) datasourceUpsert(c *gin.Context) {
}
}
for k, v := range req.SettingsJson {
if strings.Contains(k, "cluster_name") {
req.ClusterName = v.(string)
break
}
}
if req.PluginType == models.OPENSEARCH {
b, err := json.Marshal(req.SettingsJson)
if err != nil {
logger.Warningf("marshal settings fail: %v", err)
return
}
var os opensearch.OpenSearch
err = json.Unmarshal(b, &os)
if err != nil {
logger.Warningf("unmarshal settings fail: %v", err)
return
}
if len(os.Nodes) == 0 {
logger.Warningf("nodes empty, %+v", req)
return
}
req.HTTPJson = models.HTTP{
Timeout: os.Timeout,
Url: os.Nodes[0],
Headers: os.Headers,
TLS: models.TLS{
SkipTlsVerify: os.TLS.SkipTlsVerify,
},
}
req.AuthJson = models.Auth{
BasicAuth: os.Basic.Enable,
BasicAuthUser: os.Basic.Username,
BasicAuthPassword: os.Basic.Password,
}
}
if req.Id == 0 {
req.CreatedBy = username
req.Status = "enabled"
@@ -117,13 +168,13 @@ func (rt *Router) datasourceUpsert(c *gin.Context) {
}
err = req.Add(rt.Ctx)
} else {
err = req.Update(rt.Ctx, "name", "description", "cluster_name", "settings", "http", "auth", "updated_by", "updated_at", "is_default")
err = req.Update(rt.Ctx, "name", "identifier", "description", "cluster_name", "settings", "http", "auth", "updated_by", "updated_at", "is_default")
}
Render(c, nil, err)
}
func DatasourceCheck(ds models.Datasource) error {
func DatasourceCheck(c *gin.Context, ds models.Datasource) error {
if ds.PluginType == models.PROMETHEUS || ds.PluginType == models.LOKI || ds.PluginType == models.TDENGINE {
if ds.HTTPJson.Url == "" {
return fmt.Errorf("url is empty")
@@ -142,11 +193,12 @@ func DatasourceCheck(ds models.Datasource) error {
},
}
ds.HTTPJson.Url = strings.TrimRight(ds.HTTPJson.Url, "/")
var fullURL string
req, err := ds.HTTPJson.NewReq(&fullURL)
if err != nil {
logger.Errorf("Error creating request: %v", err)
return fmt.Errorf("request urls:%v failed", ds.HTTPJson.GetUrls())
return fmt.Errorf("request urls:%v failed: %v", ds.HTTPJson.GetUrls(), err)
}
if ds.PluginType == models.PROMETHEUS {
@@ -162,14 +214,14 @@ func DatasourceCheck(ds models.Datasource) error {
req, err = http.NewRequest("GET", fullURL, nil)
if err != nil {
logger.Errorf("Error creating request: %v", err)
return fmt.Errorf("request url:%s failed", fullURL)
return fmt.Errorf("request url:%s failed: %v", fullURL, err)
}
} else if ds.PluginType == models.TDENGINE {
fullURL = fmt.Sprintf("%s/rest/sql", ds.HTTPJson.Url)
req, err = http.NewRequest("POST", fullURL, strings.NewReader("show databases"))
if err != nil {
logger.Errorf("Error creating request: %v", err)
return fmt.Errorf("request url:%s failed", fullURL)
return fmt.Errorf("request url:%s failed: %v", fullURL, err)
}
}
@@ -181,7 +233,11 @@ func DatasourceCheck(ds models.Datasource) error {
req, err = http.NewRequest("GET", fullURL, nil)
if err != nil {
logger.Errorf("Error creating request: %v", err)
return fmt.Errorf("request url:%s failed", fullURL)
if !strings.Contains(ds.HTTPJson.Url, "/loki") {
lang := c.GetHeader("X-Language")
return fmt.Errorf(i18n.Sprintf(lang, "/loki suffix is miss, please add /loki to the url: %s", ds.HTTPJson.Url+"/loki"))
}
return fmt.Errorf("request url:%s failed: %v", fullURL, err)
}
}
@@ -196,12 +252,16 @@ func DatasourceCheck(ds models.Datasource) error {
resp, err := client.Do(req)
if err != nil {
logger.Errorf("Error making request: %v\n", err)
return fmt.Errorf("request url:%s failed", fullURL)
return fmt.Errorf("request url:%s failed: %v", fullURL, err)
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
logger.Errorf("Error making request: %v\n", resp.StatusCode)
if resp.StatusCode == 404 && ds.PluginType == models.LOKI && !strings.Contains(ds.HTTPJson.Url, "/loki") {
lang := c.GetHeader("X-Language")
return fmt.Errorf(i18n.Sprintf(lang, "/loki suffix is miss, please add /loki to the url: %s", ds.HTTPJson.Url+"/loki"))
}
body, _ := io.ReadAll(resp.Body)
return fmt.Errorf("request url:%s failed code:%d body:%s", fullURL, resp.StatusCode, string(body))
}

View File

@@ -0,0 +1,141 @@
package router
import (
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) embeddedProductGets(c *gin.Context) {
products, err := models.EmbeddedProductGets(rt.Ctx)
ginx.Dangerous(err)
// 获取当前用户可访问的Group ID 列表
me := c.MustGet("user").(*models.User)
if me.IsAdmin() {
ginx.NewRender(c).Data(products, err)
return
}
gids, err := models.MyGroupIds(rt.Ctx, me.Id)
bgSet := make(map[int64]struct{}, len(gids))
for _, id := range gids {
bgSet[id] = struct{}{}
}
// 过滤出公开或有权限访问的私有 product link
var result []*models.EmbeddedProduct
for _, product := range products {
if !product.IsPrivate {
result = append(result, product)
continue
}
for _, tid := range product.TeamIDs {
if _, ok := bgSet[tid]; ok {
result = append(result, product)
break
}
}
}
ginx.NewRender(c).Data(result, err)
}
func (rt *Router) embeddedProductGet(c *gin.Context) {
id := ginx.UrlParamInt64(c, "id")
if id <= 0 {
ginx.Bomb(400, "invalid id")
}
data, err := models.GetEmbeddedProductByID(rt.Ctx, id)
ginx.Dangerous(err)
me := c.MustGet("user").(*models.User)
hashPermission, err := hasEmbeddedProductAccess(rt.Ctx, me, data)
ginx.Dangerous(err)
if !hashPermission {
ginx.Bomb(403, "forbidden")
}
ginx.NewRender(c).Data(data, nil)
}
func (rt *Router) embeddedProductAdd(c *gin.Context) {
var eps []models.EmbeddedProduct
ginx.BindJSON(c, &eps)
me := c.MustGet("user").(*models.User)
for i := range eps {
eps[i].CreateBy = me.Nickname
eps[i].UpdateBy = me.Nickname
}
err := models.AddEmbeddedProduct(rt.Ctx, eps)
ginx.NewRender(c).Message(err)
}
func (rt *Router) embeddedProductPut(c *gin.Context) {
var ep models.EmbeddedProduct
id := ginx.UrlParamInt64(c, "id")
ginx.BindJSON(c, &ep)
if id <= 0 {
ginx.Bomb(400, "invalid id")
}
oldProduct, err := models.GetEmbeddedProductByID(rt.Ctx, id)
ginx.Dangerous(err)
me := c.MustGet("user").(*models.User)
now := time.Now().Unix()
oldProduct.Name = ep.Name
oldProduct.URL = ep.URL
oldProduct.IsPrivate = ep.IsPrivate
oldProduct.TeamIDs = ep.TeamIDs
oldProduct.UpdateBy = me.Username
oldProduct.UpdateAt = now
err = models.UpdateEmbeddedProduct(rt.Ctx, oldProduct)
ginx.NewRender(c).Message(err)
}
func (rt *Router) embeddedProductDelete(c *gin.Context) {
id := ginx.UrlParamInt64(c, "id")
if id <= 0 {
ginx.Bomb(400, "invalid id")
}
err := models.DeleteEmbeddedProduct(rt.Ctx, id)
ginx.NewRender(c).Message(err)
}
func hasEmbeddedProductAccess(ctx *ctx.Context, user *models.User, ep *models.EmbeddedProduct) (bool, error) {
if user.IsAdmin() || !ep.IsPrivate {
return true, nil
}
gids, err := models.MyGroupIds(ctx, user.Id)
if err != nil {
return false, err
}
groupSet := make(map[int64]struct{}, len(gids))
for _, gid := range gids {
groupSet[gid] = struct{}{}
}
for _, tid := range ep.TeamIDs {
if _, ok := groupSet[tid]; ok {
return true, nil
}
}
return false, nil
}

View File

@@ -0,0 +1,254 @@
package router
import (
"net/http"
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
)
// 获取事件Pipeline列表
func (rt *Router) eventPipelinesList(c *gin.Context) {
me := c.MustGet("user").(*models.User)
pipelines, err := models.ListEventPipelines(rt.Ctx)
ginx.Dangerous(err)
allTids := make([]int64, 0)
for _, pipeline := range pipelines {
allTids = append(allTids, pipeline.TeamIds...)
}
ugMap, err := models.UserGroupIdAndNameMap(rt.Ctx, allTids)
ginx.Dangerous(err)
for _, pipeline := range pipelines {
for _, tid := range pipeline.TeamIds {
pipeline.TeamNames = append(pipeline.TeamNames, ugMap[tid])
}
}
gids, err := models.MyGroupIdsMap(rt.Ctx, me.Id)
ginx.Dangerous(err)
if me.IsAdmin() {
ginx.NewRender(c).Data(pipelines, nil)
return
}
res := make([]*models.EventPipeline, 0)
for _, pipeline := range pipelines {
for _, tid := range pipeline.TeamIds {
if _, ok := gids[tid]; ok {
res = append(res, pipeline)
break
}
}
}
ginx.NewRender(c).Data(res, nil)
}
// 获取单个事件Pipeline详情
func (rt *Router) getEventPipeline(c *gin.Context) {
me := c.MustGet("user").(*models.User)
id := ginx.UrlParamInt64(c, "id")
pipeline, err := models.GetEventPipeline(rt.Ctx, id)
ginx.Dangerous(err)
ginx.Dangerous(me.CheckGroupPermission(rt.Ctx, pipeline.TeamIds))
err = pipeline.FillTeamNames(rt.Ctx)
ginx.Dangerous(err)
ginx.NewRender(c).Data(pipeline, nil)
}
// 创建事件Pipeline
func (rt *Router) addEventPipeline(c *gin.Context) {
var pipeline models.EventPipeline
ginx.BindJSON(c, &pipeline)
user := c.MustGet("user").(*models.User)
now := time.Now().Unix()
pipeline.CreateBy = user.Username
pipeline.CreateAt = now
pipeline.UpdateAt = now
pipeline.UpdateBy = user.Username
err := pipeline.Verify()
if err != nil {
ginx.Bomb(http.StatusBadRequest, err.Error())
}
ginx.Dangerous(user.CheckGroupPermission(rt.Ctx, pipeline.TeamIds))
err = models.CreateEventPipeline(rt.Ctx, &pipeline)
ginx.NewRender(c).Message(err)
}
// 更新事件Pipeline
func (rt *Router) updateEventPipeline(c *gin.Context) {
var f models.EventPipeline
ginx.BindJSON(c, &f)
me := c.MustGet("user").(*models.User)
f.UpdateBy = me.Username
f.UpdateAt = time.Now().Unix()
pipeline, err := models.GetEventPipeline(rt.Ctx, f.ID)
if err != nil {
ginx.Bomb(http.StatusNotFound, "No such event pipeline")
}
ginx.Dangerous(me.CheckGroupPermission(rt.Ctx, pipeline.TeamIds))
ginx.NewRender(c).Message(pipeline.Update(rt.Ctx, &f))
}
// 删除事件Pipeline
func (rt *Router) deleteEventPipelines(c *gin.Context) {
var f struct {
Ids []int64 `json:"ids"`
}
ginx.BindJSON(c, &f)
if len(f.Ids) == 0 {
ginx.Bomb(http.StatusBadRequest, "ids required")
}
me := c.MustGet("user").(*models.User)
for _, id := range f.Ids {
pipeline, err := models.GetEventPipeline(rt.Ctx, id)
ginx.Dangerous(err)
ginx.Dangerous(me.CheckGroupPermission(rt.Ctx, pipeline.TeamIds))
}
err := models.DeleteEventPipelines(rt.Ctx, f.Ids)
ginx.NewRender(c).Message(err)
}
// 测试事件Pipeline
func (rt *Router) tryRunEventPipeline(c *gin.Context) {
var f struct {
EventId int64 `json:"event_id"`
PipelineConfig models.EventPipeline `json:"pipeline_config"`
}
ginx.BindJSON(c, &f)
hisEvent, err := models.AlertHisEventGetById(rt.Ctx, f.EventId)
if err != nil || hisEvent == nil {
ginx.Bomb(http.StatusBadRequest, "event not found")
}
event := hisEvent.ToCur()
lang := c.GetHeader("X-Language")
var result string
for _, p := range f.PipelineConfig.ProcessorConfigs {
processor, err := models.GetProcessorByType(p.Typ, p.Config)
if err != nil {
ginx.Bomb(http.StatusBadRequest, "get processor: %+v err: %+v", p, err)
}
event, result, err = processor.Process(rt.Ctx, event)
if err != nil {
ginx.Bomb(http.StatusBadRequest, "processor: %+v err: %+v", p, err)
}
if event == nil {
ginx.NewRender(c).Data(map[string]interface{}{
"event": event,
"result": i18n.Sprintf(lang, "event is dropped"),
}, nil)
return
}
}
m := map[string]interface{}{
"event": event,
"result": i18n.Sprintf(lang, result),
}
ginx.NewRender(c).Data(m, nil)
}
// 测试事件处理器
func (rt *Router) tryRunEventProcessor(c *gin.Context) {
var f struct {
EventId int64 `json:"event_id"`
ProcessorConfig models.ProcessorConfig `json:"processor_config"`
}
ginx.BindJSON(c, &f)
hisEvent, err := models.AlertHisEventGetById(rt.Ctx, f.EventId)
if err != nil || hisEvent == nil {
ginx.Bomb(http.StatusBadRequest, "event not found")
}
event := hisEvent.ToCur()
processor, err := models.GetProcessorByType(f.ProcessorConfig.Typ, f.ProcessorConfig.Config)
if err != nil {
ginx.Bomb(200, "get processor err: %+v", err)
}
event, res, err := processor.Process(rt.Ctx, event)
if err != nil {
ginx.Bomb(200, "processor err: %+v", err)
}
lang := c.GetHeader("X-Language")
ginx.NewRender(c).Data(map[string]interface{}{
"event": event,
"result": i18n.Sprintf(lang, res),
}, nil)
}
func (rt *Router) tryRunEventProcessorByNotifyRule(c *gin.Context) {
var f struct {
EventId int64 `json:"event_id"`
PipelineConfigs []models.PipelineConfig `json:"pipeline_configs"`
}
ginx.BindJSON(c, &f)
hisEvent, err := models.AlertHisEventGetById(rt.Ctx, f.EventId)
if err != nil || hisEvent == nil {
ginx.Bomb(http.StatusBadRequest, "event not found")
}
event := hisEvent.ToCur()
pids := make([]int64, 0)
for _, pc := range f.PipelineConfigs {
if pc.Enable {
pids = append(pids, pc.PipelineId)
}
}
pipelines, err := models.GetEventPipelinesByIds(rt.Ctx, pids)
if err != nil {
ginx.Bomb(http.StatusBadRequest, "processors not found")
}
for _, pl := range pipelines {
for _, p := range pl.ProcessorConfigs {
processor, err := models.GetProcessorByType(p.Typ, p.Config)
if err != nil {
ginx.Bomb(http.StatusBadRequest, "get processor: %+v err: %+v", p, err)
}
event, _, err := processor.Process(rt.Ctx, event)
if err != nil {
ginx.Bomb(http.StatusBadRequest, "processor: %+v err: %+v", p, err)
}
if event == nil {
lang := c.GetHeader("X-Language")
ginx.NewRender(c).Data(map[string]interface{}{
"event": event,
"result": i18n.Sprintf(lang, "event is dropped"),
}, nil)
return
}
}
}
ginx.NewRender(c).Data(event, nil)
}
func (rt *Router) eventPipelinesListByService(c *gin.Context) {
pipelines, err := models.ListEventPipelines(rt.Ctx)
ginx.NewRender(c).Data(pipelines, err)
}

View File

@@ -40,6 +40,10 @@ func (rt *Router) statistic(c *gin.Context) {
model = models.NotifyRule{}
case "notify_channel":
model = models.NotifyChannel{}
case "event_pipeline":
statistics, err = models.EventPipelineStatistics(rt.Ctx)
ginx.NewRender(c).Data(statistics, err)
return
case "datasource":
// datasource update_at is different from others
statistics, err = models.DatasourceStatistics(rt.Ctx)
@@ -169,3 +173,38 @@ func Username(c *gin.Context) string {
}
return username
}
func HasPermission(ctx *ctx.Context, c *gin.Context, sourceType, sourceId string, isAnonymousAccess bool) bool {
if sourceType == "event" && isAnonymousAccess {
return true
}
// 尝试从请求中获取 __token 参数
token := ginx.QueryStr(c, "__token", "")
// 如果有 __token 参数,验证其合法性
if token != "" {
return ValidateSourceToken(ctx, sourceType, sourceId, token)
}
return false
}
func ValidateSourceToken(ctx *ctx.Context, sourceType, sourceId, token string) bool {
if token == "" {
return false
}
// 根据源类型、源ID和令牌获取源令牌记录
sourceToken, err := models.GetSourceTokenBySource(ctx, sourceType, sourceId, token)
if err != nil {
return false
}
// 检查令牌是否过期
if sourceToken.IsExpired() {
return false
}
return true
}

View File

@@ -152,6 +152,13 @@ func (rt *Router) refreshPost(c *gin.Context) {
return
}
// 看这个 token 是否还存在 redis 中
val, err := rt.fetchAuth(c.Request.Context(), refreshUuid)
if err != nil || val == "" {
ginx.NewRender(c, http.StatusUnauthorized).Message("refresh token expired")
return
}
userIdentity, ok := claims["user_identity"].(string)
if !ok {
// Theoretically impossible

View File

@@ -10,10 +10,12 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/slice"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pkg/tplx"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/str"
)
func (rt *Router) messageTemplatesAdd(c *gin.Context) {
@@ -30,9 +32,12 @@ func (rt *Router) messageTemplatesAdd(c *gin.Context) {
ginx.Dangerous(err)
now := time.Now().Unix()
for _, tpl := range lst {
// 生成一个唯一的标识符,以后也不允许修改,前端不需要传这个参数
tpl.Ident = uuid.New().String()
ginx.Dangerous(tpl.Verify())
if !isAdmin && !slice.HaveIntersection(gids, tpl.UserGroupIds) {
ginx.Bomb(http.StatusForbidden, "no permission")
ginx.Bomb(http.StatusForbidden, "forbidden")
}
idents = append(idents, tpl.Ident)
@@ -75,8 +80,8 @@ func (rt *Router) messageTemplatesDel(c *gin.Context) {
gids, err := models.MyGroupIds(rt.Ctx, me.Id)
ginx.Dangerous(err)
for _, t := range lst {
if !slice.HaveIntersection[int64](gids, t.UserGroupIds) {
ginx.Bomb(http.StatusForbidden, "no permission")
if !slice.HaveIntersection(gids, t.UserGroupIds) {
ginx.Bomb(http.StatusForbidden, "forbidden")
}
}
}
@@ -105,8 +110,8 @@ func (rt *Router) messageTemplatePut(c *gin.Context) {
if !me.IsAdmin() {
gids, err := models.MyGroupIds(rt.Ctx, me.Id)
ginx.Dangerous(err)
if !slice.HaveIntersection[int64](gids, mt.UserGroupIds) {
ginx.Bomb(http.StatusForbidden, "no permission")
if !slice.HaveIntersection(gids, mt.UserGroupIds) {
ginx.Bomb(http.StatusForbidden, "forbidden")
}
}
@@ -125,8 +130,8 @@ func (rt *Router) messageTemplateGet(c *gin.Context) {
if mt == nil {
ginx.Bomb(http.StatusNotFound, "message template not found")
}
if mt.Private == 1 && !slice.HaveIntersection[int64](gids, mt.UserGroupIds) {
ginx.Bomb(http.StatusForbidden, "no permission")
if mt.Private == 1 && !slice.HaveIntersection(gids, mt.UserGroupIds) {
ginx.Bomb(http.StatusForbidden, "forbidden")
}
ginx.NewRender(c).Data(mt, nil)
@@ -137,7 +142,7 @@ func (rt *Router) messageTemplatesGet(c *gin.Context) {
if tmp := ginx.QueryStr(c, "notify_channel_idents", ""); tmp != "" {
notifyChannelIdents = strings.Split(tmp, ",")
}
notifyChannelIds := str.IdsInt64(ginx.QueryStr(c, "notify_channel_ids", ""))
notifyChannelIds := strx.IdsInt64ForAPI(ginx.QueryStr(c, "notify_channel_ids", ""))
if len(notifyChannelIds) > 0 {
ginx.Dangerous(models.DB(rt.Ctx).Model(models.NotifyChannelConfig{}).
Where("id in (?)", notifyChannelIds).Pluck("ident", &notifyChannelIdents).Error)
@@ -188,10 +193,9 @@ func (rt *Router) eventsMessage(c *gin.Context) {
events[i] = he.ToCur()
}
var defs = []string{
"{{$events := .}}",
"{{$event := index . 0}}",
}
renderData := make(map[string]interface{})
renderData["events"] = events
defs := models.GetDefs(renderData)
ret := make(map[string]string, len(req.Tpl.Content))
for k, v := range req.Tpl.Content {
text := strings.Join(append(defs, v), "")
@@ -202,7 +206,7 @@ func (rt *Router) eventsMessage(c *gin.Context) {
}
var buf bytes.Buffer
err = tpl.Execute(&buf, events)
err = tpl.Execute(&buf, renderData)
if err != nil {
ret[k] = err.Error()
continue

View File

@@ -6,23 +6,27 @@ import (
"time"
"github.com/ccfos/nightingale/v6/alert/common"
"github.com/ccfos/nightingale/v6/alert/mute"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/str"
"github.com/toolkits/pkg/i18n"
)
// Return all, front-end search and paging
func (rt *Router) alertMuteGetsByBG(c *gin.Context) {
bgid := ginx.UrlParamInt64(c, "id")
lst, err := models.AlertMuteGetsByBG(rt.Ctx, bgid)
prods := strings.Fields(ginx.QueryStr(c, "prods", ""))
query := ginx.QueryStr(c, "query", "")
lst, err := models.AlertMuteGets(rt.Ctx, prods, bgid, -1, query)
ginx.NewRender(c).Data(lst, err)
}
func (rt *Router) alertMuteGetsByGids(c *gin.Context) {
gids := str.IdsInt64(ginx.QueryStr(c, "gids", ""), ",")
gids := strx.IdsInt64ForAPI(ginx.QueryStr(c, "gids", ""), ",")
if len(gids) > 0 {
for _, gid := range gids {
rt.bgroCheck(c, gid)
@@ -63,10 +67,58 @@ func (rt *Router) alertMuteAdd(c *gin.Context) {
username := c.MustGet("username").(string)
f.CreateBy = username
f.UpdateBy = username
f.GroupId = ginx.UrlParamInt64(c, "id")
ginx.NewRender(c).Message(f.Add(rt.Ctx))
}
type MuteTestForm struct {
EventId int64 `json:"event_id" binding:"required"`
AlertMute models.AlertMute `json:"config" binding:"required"`
PassTimeCheck bool `json:"pass_time_check"`
}
func (rt *Router) alertMuteTryRun(c *gin.Context) {
var f MuteTestForm
ginx.BindJSON(c, &f)
ginx.Dangerous(f.AlertMute.Verify())
hisEvent, err := models.AlertHisEventGetById(rt.Ctx, f.EventId)
ginx.Dangerous(err)
if hisEvent == nil {
ginx.Bomb(http.StatusNotFound, "event not found")
}
curEvent := *hisEvent.ToCur()
curEvent.SetTagsMap()
if f.PassTimeCheck {
f.AlertMute.MuteTimeType = models.Periodic
f.AlertMute.PeriodicMutesJson = []models.PeriodicMute{
{
EnableDaysOfWeek: "0 1 2 3 4 5 6",
EnableStime: "00:00",
EnableEtime: "00:00",
},
}
}
match, err := mute.MatchMute(&curEvent, &f.AlertMute)
if err != nil {
// 对错误信息进行 i18n 翻译
translatedErr := i18n.Sprintf(c.GetHeader("X-Language"), err.Error())
ginx.Bomb(http.StatusBadRequest, translatedErr)
}
if !match {
ginx.NewRender(c).Data("event not match mute", nil)
return
}
ginx.NewRender(c).Data("event match mute", nil)
}
// Preview events (alert_cur_event) that match the mute strategy based on the following criteria:
// business group ID (group_id, group_id), product (prod, rule_prod),
// alert event severity (severities, severity), and event tags (tags, tags).

View File

@@ -9,6 +9,7 @@ import (
"strings"
"time"
"github.com/ccfos/nightingale/v6/center/cstats"
"github.com/ccfos/nightingale/v6/models"
"github.com/gin-gonic/gin"
@@ -335,6 +336,12 @@ func (rt *Router) extractTokenMetadata(r *http.Request) (*AccessDetails, error)
return nil, errors.New("failed to parse access_uuid from jwt")
}
// accessUuid 在 redis 里存在才放行
val, err := rt.fetchAuth(r.Context(), accessUuid)
if err != nil || val == "" {
return nil, errors.New("unauthorized")
}
return &AccessDetails{
AccessUuid: accessUuid,
UserIdentity: claims["user_identity"].(string),
@@ -355,29 +362,72 @@ func (rt *Router) extractToken(r *http.Request) string {
}
func (rt *Router) createAuth(ctx context.Context, userIdentity string, td *TokenDetails) error {
username := strings.Split(userIdentity, "-")[1]
// 如果只能有一个账号登录,那么就删除之前的 token
if rt.HTTP.JWTAuth.SingleLogin {
delKeys, err := rt.Redis.SMembers(ctx, rt.wrapJwtKey(username)).Result()
if err != nil {
return err
}
if len(delKeys) > 0 {
errDel := rt.Redis.Del(ctx, delKeys...).Err()
if errDel != nil {
return errDel
}
}
if errDel := rt.Redis.Del(ctx, rt.wrapJwtKey(username)).Err(); errDel != nil {
return errDel
}
}
at := time.Unix(td.AtExpires, 0)
rte := time.Unix(td.RtExpires, 0)
now := time.Now()
errAccess := rt.Redis.Set(ctx, rt.wrapJwtKey(td.AccessUuid), userIdentity, at.Sub(now)).Err()
if errAccess != nil {
return errAccess
if err := rt.Redis.Set(ctx, rt.wrapJwtKey(td.AccessUuid), userIdentity, at.Sub(now)).Err(); err != nil {
cstats.RedisOperationLatency.WithLabelValues("set_token", "fail").Observe(time.Since(now).Seconds())
return err
}
errRefresh := rt.Redis.Set(ctx, rt.wrapJwtKey(td.RefreshUuid), userIdentity, rte.Sub(now)).Err()
if errRefresh != nil {
return errRefresh
if err := rt.Redis.Set(ctx, rt.wrapJwtKey(td.RefreshUuid), userIdentity, rte.Sub(now)).Err(); err != nil {
cstats.RedisOperationLatency.WithLabelValues("set_token", "fail").Observe(time.Since(now).Seconds())
return err
}
cstats.RedisOperationLatency.WithLabelValues("set_token", "success").Observe(time.Since(now).Seconds())
if rt.HTTP.JWTAuth.SingleLogin {
if err := rt.Redis.SAdd(ctx, rt.wrapJwtKey(username), rt.wrapJwtKey(td.AccessUuid), rt.wrapJwtKey(td.RefreshUuid)).Err(); err != nil {
return err
}
}
return nil
}
func (rt *Router) fetchAuth(ctx context.Context, givenUuid string) (string, error) {
return rt.Redis.Get(ctx, rt.wrapJwtKey(givenUuid)).Result()
now := time.Now()
ret, err := rt.Redis.Get(ctx, rt.wrapJwtKey(givenUuid)).Result()
if err != nil {
cstats.RedisOperationLatency.WithLabelValues("get_token", "fail").Observe(time.Since(now).Seconds())
} else {
cstats.RedisOperationLatency.WithLabelValues("get_token", "success").Observe(time.Since(now).Seconds())
}
return ret, err
}
func (rt *Router) deleteAuth(ctx context.Context, givenUuid string) error {
return rt.Redis.Del(ctx, rt.wrapJwtKey(givenUuid)).Err()
err := rt.Redis.Del(ctx, rt.wrapJwtKey(givenUuid)).Err()
if err != nil {
cstats.RedisOperationLatency.WithLabelValues("del_token", "fail").Observe(time.Since(time.Now()).Seconds())
} else {
cstats.RedisOperationLatency.WithLabelValues("del_token", "success").Observe(time.Since(time.Now()).Seconds())
}
return err
}
func (rt *Router) deleteTokens(ctx context.Context, authD *AccessDetails) error {

View File

@@ -17,9 +17,6 @@ import (
func (rt *Router) notifyChannelsAdd(c *gin.Context) {
me := c.MustGet("user").(*models.User)
if !me.IsAdmin() {
ginx.Bomb(http.StatusForbidden, "no permission")
}
var lst []*models.NotifyChannelConfig
ginx.BindJSON(c, &lst)
@@ -55,11 +52,6 @@ func (rt *Router) notifyChannelsAdd(c *gin.Context) {
}
func (rt *Router) notifyChannelsDel(c *gin.Context) {
me := c.MustGet("user").(*models.User)
if !me.IsAdmin() {
ginx.Bomb(http.StatusForbidden, "no permission")
}
var f idsForm
ginx.BindJSON(c, &f)
f.Verify()
@@ -79,9 +71,6 @@ func (rt *Router) notifyChannelsDel(c *gin.Context) {
func (rt *Router) notifyChannelPut(c *gin.Context) {
me := c.MustGet("user").(*models.User)
if !me.IsAdmin() {
ginx.Bomb(http.StatusForbidden, "no permission")
}
var f models.NotifyChannelConfig
ginx.BindJSON(c, &f)

View File

@@ -6,11 +6,12 @@ import (
"time"
"github.com/ccfos/nightingale/v6/alert/dispatch"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/slice"
"github.com/gin-gonic/gin"
"github.com/pkg/errors"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
@@ -31,7 +32,7 @@ func (rt *Router) notifyRulesAdd(c *gin.Context) {
for _, nr := range lst {
ginx.Dangerous(nr.Verify())
if !isAdmin && !slice.HaveIntersection(gids, nr.UserGroupIds) {
ginx.Bomb(http.StatusForbidden, "no permission")
ginx.Bomb(http.StatusForbidden, "forbidden")
}
nr.CreateBy = me.Username
@@ -56,8 +57,8 @@ func (rt *Router) notifyRulesDel(c *gin.Context) {
gids, err := models.MyGroupIds(rt.Ctx, me.Id)
ginx.Dangerous(err)
for _, t := range lst {
if !slice.HaveIntersection[int64](gids, t.UserGroupIds) {
ginx.Bomb(http.StatusForbidden, "no permission")
if !slice.HaveIntersection(gids, t.UserGroupIds) {
ginx.Bomb(http.StatusForbidden, "forbidden")
}
}
}
@@ -79,8 +80,8 @@ func (rt *Router) notifyRulePut(c *gin.Context) {
me := c.MustGet("user").(*models.User)
gids, err := models.MyGroupIds(rt.Ctx, me.Id)
ginx.Dangerous(err)
if !slice.HaveIntersection[int64](gids, nr.UserGroupIds) && !me.IsAdmin() {
ginx.Bomb(http.StatusForbidden, "no permission")
if !slice.HaveIntersection(gids, nr.UserGroupIds) && !me.IsAdmin() {
ginx.Bomb(http.StatusForbidden, "forbidden")
}
f.UpdateBy = me.Username
@@ -99,8 +100,8 @@ func (rt *Router) notifyRuleGet(c *gin.Context) {
ginx.Bomb(http.StatusNotFound, "notify rule not found")
}
if !slice.HaveIntersection[int64](gids, nr.UserGroupIds) && !me.IsAdmin() {
ginx.Bomb(http.StatusForbidden, "no permission")
if !slice.HaveIntersection(gids, nr.UserGroupIds) && !me.IsAdmin() {
ginx.Bomb(http.StatusForbidden, "forbidden")
}
ginx.NewRender(c).Data(nr, nil)
@@ -152,100 +153,117 @@ func (rt *Router) notifyTest(c *gin.Context) {
for _, he := range hisEvents {
event := he.ToCur()
event.SetTagsMap()
if dispatch.NotifyRuleApplicable(&f.NotifyConfig, event) {
events = append(events, event)
if err := dispatch.NotifyRuleMatchCheck(&f.NotifyConfig, event); err != nil {
ginx.Bomb(http.StatusBadRequest, err.Error())
}
events = append(events, event)
}
if len(events) == 0 {
ginx.Bomb(http.StatusBadRequest, "not events applicable")
resp, err := SendNotifyChannelMessage(rt.Ctx, rt.UserCache, rt.UserGroupCache, f.NotifyConfig, events)
if resp == "" {
resp = "success"
}
ginx.NewRender(c).Data(resp, err)
}
func SendNotifyChannelMessage(ctx *ctx.Context, userCache *memsto.UserCacheType, userGroup *memsto.UserGroupCacheType, notifyConfig models.NotifyConfig, events []*models.AlertCurEvent) (string, error) {
notifyChannels, err := models.NotifyChannelGets(ctx, notifyConfig.ChannelID, "", "", -1)
if err != nil {
return "", fmt.Errorf("failed to get notify channels: %v", err)
}
notifyChannels, err := models.NotifyChannelGets(rt.Ctx, f.NotifyConfig.ChannelID, "", "", -1)
ginx.Dangerous(err)
if len(notifyChannels) == 0 {
ginx.Bomb(http.StatusBadRequest, "notify channel not found")
return "", fmt.Errorf("notify channel not found")
}
notifyChannel := notifyChannels[0]
if !notifyChannel.Enable {
ginx.Bomb(http.StatusBadRequest, "notify channel not enabled, please enable it first")
return "", fmt.Errorf("notify channel not enabled, please enable it first")
}
tplContent := make(map[string]interface{})
if notifyChannel.RequestType != "flashtudy" {
messageTemplates, err := models.MessageTemplateGets(rt.Ctx, f.NotifyConfig.TemplateID, "", "")
ginx.Dangerous(err)
if notifyChannel.RequestType != "flashduty" {
messageTemplates, err := models.MessageTemplateGets(ctx, notifyConfig.TemplateID, "", "")
if err != nil {
return "", fmt.Errorf("failed to get message templates: %v", err)
}
if len(messageTemplates) == 0 {
ginx.Bomb(http.StatusBadRequest, "message template not found")
return "", fmt.Errorf("message template not found")
}
tplContent = messageTemplates[0].RenderEvent(events)
}
var contactKey string
if notifyChannel.ParamConfig != nil && notifyChannel.ParamConfig.UserInfo != nil {
contactKey = notifyChannel.ParamConfig.UserInfo.ContactKey
}
sendtos, flashDutyChannelIDs, customParams := dispatch.GetNotifyConfigParams(&f.NotifyConfig, contactKey, rt.UserCache, rt.UserGroupCache)
sendtos, flashDutyChannelIDs, customParams := dispatch.GetNotifyConfigParams(&notifyConfig, contactKey, userCache, userGroup)
var resp string
switch notifyChannel.RequestType {
case "flashduty":
client, err := models.GetHTTPClient(notifyChannel)
ginx.Dangerous(err)
if err != nil {
return "", fmt.Errorf("failed to get http client: %v", err)
}
for i := range flashDutyChannelIDs {
resp, err = notifyChannel.SendFlashDuty(events, flashDutyChannelIDs[i], client)
if err != nil {
break
return "", fmt.Errorf("failed to send flashduty notify: %v", err)
}
}
logger.Infof("channel_name: %v, event:%+v, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0], tplContent, customParams, resp, err)
ginx.NewRender(c).Data(resp, err)
return resp, nil
case "http":
client, err := models.GetHTTPClient(notifyChannel)
ginx.Dangerous(err)
if err != nil {
return "", fmt.Errorf("failed to get http client: %v", err)
}
if notifyChannel.RequestConfig == nil {
ginx.Bomb(http.StatusBadRequest, "request config not found")
return "", fmt.Errorf("request config is nil")
}
if notifyChannel.RequestConfig.HTTPRequestConfig == nil {
ginx.Bomb(http.StatusBadRequest, "http request config not found")
return "", fmt.Errorf("http request config is nil")
}
if dispatch.NeedBatchContacts(notifyChannel.RequestConfig.HTTPRequestConfig) || len(sendtos) == 0 {
resp, err = notifyChannel.SendHTTP(events, tplContent, customParams, sendtos, client)
logger.Infof("channel_name: %v, event:%+v, sendtos:%+v, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0], sendtos, tplContent, customParams, resp, err)
if err != nil {
logger.Errorf("failed to send http notify: %v", err)
return "", fmt.Errorf("failed to send http notify: %v", err)
}
ginx.NewRender(c).Data(resp, err)
return resp, nil
} else {
for i := range sendtos {
resp, err = notifyChannel.SendHTTP(events, tplContent, customParams, []string{sendtos[i]}, client)
logger.Infof("channel_name: %v, event:%+v, tplContent:%s, customParams:%v, sendto:%+v, respBody: %v, err: %v", notifyChannel.Name, events[0], tplContent, customParams, sendtos[i], resp, err)
if err != nil {
logger.Errorf("failed to send http notify: %v", err)
ginx.NewRender(c).Message(err)
return
return "", fmt.Errorf("failed to send http notify: %v", err)
}
}
ginx.NewRender(c).Message(err)
return resp, nil
}
case "smtp":
if len(sendtos) == 0 {
return "", fmt.Errorf("no valid email address in the user and team")
}
err := notifyChannel.SendEmailNow(events, tplContent, sendtos)
ginx.NewRender(c).Message(err)
if err != nil {
return "", fmt.Errorf("failed to send email notify: %v", err)
}
return resp, nil
case "script":
resp, _, err := notifyChannel.SendScript(events, tplContent, customParams, sendtos)
logger.Infof("channel_name: %v, event:%+v, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0], tplContent, customParams, resp, err)
ginx.NewRender(c).Data(resp, err)
return resp, err
default:
logger.Errorf("unsupported request type: %v", notifyChannel.RequestType)
ginx.NewRender(c).Message(errors.New("unsupported request type"))
return "", fmt.Errorf("unsupported request type")
}
}

View File

@@ -45,7 +45,7 @@ func (rt *Router) notifyTplUpdateContent(c *gin.Context) {
ginx.Dangerous(err)
if notifyTpl.CreateBy != user.Username && !user.IsAdmin() {
ginx.Bomb(403, "no permission")
ginx.Bomb(403, "forbidden")
}
f.UpdateAt = time.Now().Unix()
@@ -64,7 +64,7 @@ func (rt *Router) notifyTplUpdate(c *gin.Context) {
ginx.Dangerous(err)
if notifyTpl.CreateBy != user.Username && !user.IsAdmin() {
ginx.Bomb(403, "no permission")
ginx.Bomb(403, "forbidden")
}
// get the count of the same channel and name but different id
@@ -188,7 +188,7 @@ func (rt *Router) notifyTplDel(c *gin.Context) {
ginx.Dangerous(err)
if notifyTpl.CreateBy != user.Username && !user.IsAdmin() {
ginx.Bomb(403, "no permission")
ginx.Bomb(403, "forbidden")
}
ginx.NewRender(c).Message(f.NotifyTplDelete(rt.Ctx, id))

View File

@@ -0,0 +1,58 @@
package router
import (
"github.com/ccfos/nightingale/v6/datasource/opensearch"
"github.com/ccfos/nightingale/v6/dscache"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
func (rt *Router) QueryOSIndices(c *gin.Context) {
var f IndexReq
ginx.BindJSON(c, &f)
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logger.Warningf("cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
indices, err := plug.(*opensearch.OpenSearch).QueryIndices()
ginx.Dangerous(err)
ginx.NewRender(c).Data(indices, nil)
}
func (rt *Router) QueryOSFields(c *gin.Context) {
var f IndexReq
ginx.BindJSON(c, &f)
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logger.Warningf("cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
fields, err := plug.(*opensearch.OpenSearch).QueryFields([]string{f.Index})
ginx.Dangerous(err)
ginx.NewRender(c).Data(fields, nil)
}
func (rt *Router) QueryOSVariable(c *gin.Context) {
var f FieldValueReq
ginx.BindJSON(c, &f)
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logger.Warningf("cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
fields, err := plug.(*opensearch.OpenSearch).QueryFieldValue([]string{f.Index}, f.Query.Field, f.Query.Query)
ginx.Dangerous(err)
ginx.NewRender(c).Data(fields, nil)
}

View File

@@ -7,16 +7,20 @@ import (
"net"
"net/http"
"net/http/httputil"
"regexp"
"strconv"
"strings"
"sync"
"time"
"github.com/ccfos/nightingale/v6/pkg/poster"
pkgprom "github.com/ccfos/nightingale/v6/pkg/prom"
"github.com/ccfos/nightingale/v6/prom"
"github.com/gin-gonic/gin"
"github.com/prometheus/common/model"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/net/httplib"
)
type QueryFormItem struct {
@@ -144,6 +148,8 @@ func (rt *Router) dsProxy(c *gin.Context) {
if ds.AuthJson.BasicAuthUser != "" {
req.SetBasicAuth(ds.AuthJson.BasicAuthUser, ds.AuthJson.BasicAuthPassword)
} else {
req.Header.Del("Authorization")
}
headerCount := len(ds.HTTPJson.Headers)
@@ -235,3 +241,94 @@ func transportPut(dsid, updatedat int64, tran http.RoundTripper) {
updatedAts[dsid] = updatedat
transportsLock.Unlock()
}
const (
DatasourceTypePrometheus = "Prometheus"
DatasourceTypeVictoriaMetrics = "VictoriaMetrics"
)
type deleteDatasourceSeriesForm struct {
DatasourceID int64 `json:"datasource_id"`
Match []string `json:"match"`
Start string `json:"start"`
End string `json:"end"`
}
func (rt *Router) deleteDatasourceSeries(c *gin.Context) {
var ddsf deleteDatasourceSeriesForm
ginx.BindJSON(c, &ddsf)
ds := rt.DatasourceCache.GetById(ddsf.DatasourceID)
if ds == nil {
ginx.Bomb(http.StatusBadRequest, "no such datasource")
return
}
// Get datasource type, now only support prometheus and victoriametrics
datasourceType, ok := ds.SettingsJson["prometheus.tsdb_type"]
if !ok {
ginx.Bomb(http.StatusBadRequest, "datasource type not found, please check your datasource settings")
return
}
target, err := ds.HTTPJson.ParseUrl()
if err != nil {
ginx.Bomb(http.StatusInternalServerError, "invalid urls: %s", ds.HTTPJson.GetUrls())
return
}
timeout := time.Duration(ds.HTTPJson.DialTimeout) * time.Millisecond
matchQuerys := make([]string, 0)
for _, match := range ddsf.Match {
matchQuerys = append(matchQuerys, fmt.Sprintf("match[]=%s", match))
}
matchQuery := strings.Join(matchQuerys, "&")
switch datasourceType {
case DatasourceTypePrometheus:
// Prometheus delete api need POST method
// https://prometheus.io/docs/prometheus/latest/querying/api/#delete-series
url := fmt.Sprintf("http://%s/api/v1/admin/tsdb/delete_series?%s&start=%s&end=%s", target.Host, matchQuery, ddsf.Start, ddsf.End)
go func() {
resp, _, err := poster.PostJSON(url, timeout, nil)
if err != nil {
logger.Errorf("delete series error datasource_id: %d, datasource_name: %s, match: %s, start: %s, end: %s, err: %v",
ddsf.DatasourceID, ds.Name, ddsf.Match, ddsf.Start, ddsf.End, err)
return
}
logger.Infof("delete datasource series datasource_id: %d, datasource_name: %s, match: %s, start: %s, end: %s, respBody: %s",
ddsf.DatasourceID, ds.Name, ddsf.Match, ddsf.Start, ddsf.End, string(resp))
}()
case DatasourceTypeVictoriaMetrics:
// Delete API doesnt support the deletion of specific time ranges.
// Refer: https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-delete-time-series
var url string
// Check VictoriaMetrics is single node or cluster
// Cluster will have /select/<accountID>/prometheus pattern
re := regexp.MustCompile(`/select/(\d+)/prometheus`)
matches := re.FindStringSubmatch(ds.HTTPJson.Url)
if len(matches) > 0 && matches[1] != "" {
accountID, err := strconv.Atoi(matches[1])
if err != nil {
ginx.Bomb(http.StatusInternalServerError, "invalid accountID: %s", matches[1])
}
url = fmt.Sprintf("http://%s/delete/%d/prometheus/api/v1/admin/tsdb/delete_series?%s", target.Host, accountID, matchQuery)
} else {
url = fmt.Sprintf("http://%s/api/v1/admin/tsdb/delete_series?%s", target.Host, matchQuery)
}
go func() {
resp, err := httplib.Get(url).SetTimeout(timeout).Response()
if err != nil {
logger.Errorf("delete series failed | datasource_id: %d, datasource_name: %s, match: %s, start: %s, end: %s, err: %v",
ddsf.DatasourceID, ds.Name, ddsf.Match, ddsf.Start, ddsf.End, err)
return
}
logger.Infof("sending delete series request | datasource_id: %d, datasource_name: %s, match: %s, start: %s, end: %s, respBody: %s",
ddsf.DatasourceID, ds.Name, ddsf.Match, ddsf.Start, ddsf.End, resp.Body)
}()
default:
ginx.Bomb(http.StatusBadRequest, "not support delete series yet")
}
ginx.NewRender(c).Data(nil, nil)
}

View File

@@ -3,6 +3,7 @@ package router
import (
"fmt"
"sort"
"sync"
"github.com/ccfos/nightingale/v6/dscache"
"github.com/ccfos/nightingale/v6/models"
@@ -38,71 +39,116 @@ type LogResp struct {
List []interface{} `json:"list"`
}
func (rt *Router) QueryLogBatch(c *gin.Context) {
var f QueryFrom
ginx.BindJSON(c, &f)
func QueryLogBatchConcurrently(anonymousAccess bool, ctx *gin.Context, f QueryFrom) (LogResp, error) {
var resp LogResp
var errMsg string
var mu sync.Mutex
var wg sync.WaitGroup
var errs []error
for _, q := range f.Queries {
if !rt.Center.AnonymousAccess.PromQuerier && !CheckDsPerm(c, q.Did, q.DsCate, q) {
ginx.Bomb(200, "no permission")
if !anonymousAccess && !CheckDsPerm(ctx, q.Did, q.DsCate, q) {
return LogResp{}, fmt.Errorf("forbidden")
}
plug, exists := dscache.DsCache.Get(q.DsCate, q.Did)
if !exists {
logger.Warningf("cluster:%d not exists query:%+v", q.Did, q)
ginx.Bomb(200, "cluster not exists")
return LogResp{}, fmt.Errorf("cluster not exists")
}
data, total, err := plug.QueryLog(c.Request.Context(), q.Query)
if err != nil {
errMsg += fmt.Sprintf("query data error: %v query:%v\n ", err, q)
logger.Warningf("query data error: %v query:%v", err, q)
continue
}
wg.Add(1)
go func(query Query) {
defer wg.Done()
m := make(map[string]interface{})
m["ref"] = q.Ref
m["ds_id"] = q.Did
m["ds_cate"] = q.DsCate
m["data"] = data
resp.List = append(resp.List, m)
resp.Total += total
data, total, err := plug.QueryLog(ctx.Request.Context(), query.Query)
mu.Lock()
defer mu.Unlock()
if err != nil {
errMsg := fmt.Sprintf("query data error: %v query:%v\n ", err, query)
logger.Warningf(errMsg)
errs = append(errs, err)
return
}
m := make(map[string]interface{})
m["ref"] = query.Ref
m["ds_id"] = query.Did
m["ds_cate"] = query.DsCate
m["data"] = data
resp.List = append(resp.List, m)
resp.Total += total
}(q)
}
if errMsg != "" || len(resp.List) == 0 {
ginx.Bomb(200, errMsg)
wg.Wait()
if len(errs) > 0 {
return LogResp{}, errs[0]
}
if len(resp.List) == 0 {
return LogResp{}, fmt.Errorf("no data")
}
return resp, nil
}
func (rt *Router) QueryLogBatch(c *gin.Context) {
var f QueryFrom
ginx.BindJSON(c, &f)
resp, err := QueryLogBatchConcurrently(rt.Center.AnonymousAccess.PromQuerier, c, f)
if err != nil {
ginx.Bomb(200, "err:%v", err)
}
ginx.NewRender(c).Data(resp, nil)
}
func (rt *Router) QueryData(c *gin.Context) {
var f models.QueryParam
ginx.BindJSON(c, &f)
func QueryDataConcurrently(anonymousAccess bool, ctx *gin.Context, f models.QueryParam) ([]models.DataResp, error) {
var resp []models.DataResp
var err error
var mu sync.Mutex
var wg sync.WaitGroup
var errs []error
for _, q := range f.Querys {
if !rt.Center.AnonymousAccess.PromQuerier && !CheckDsPerm(c, f.DatasourceId, f.Cate, q) {
ginx.Bomb(403, "no permission")
if !anonymousAccess && !CheckDsPerm(ctx, f.DatasourceId, f.Cate, q) {
return nil, fmt.Errorf("forbidden")
}
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logger.Warningf("cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
return nil, fmt.Errorf("cluster not exists")
}
var datas []models.DataResp
datas, err = plug.QueryData(c.Request.Context(), q)
if err != nil {
logger.Warningf("query data error: req:%+v err:%v", q, err)
ginx.Bomb(200, "err:%v", err)
}
logger.Debugf("query data: req:%+v resp:%+v", q, datas)
resp = append(resp, datas...)
wg.Add(1)
go func(query interface{}) {
defer wg.Done()
datas, err := plug.QueryData(ctx.Request.Context(), query)
if err != nil {
logger.Warningf("query data error: req:%+v err:%v", query, err)
mu.Lock()
errs = append(errs, err)
mu.Unlock()
return
}
logger.Debugf("query data: req:%+v resp:%+v", query, datas)
mu.Lock()
resp = append(resp, datas...)
mu.Unlock()
}(q)
}
wg.Wait()
if len(errs) > 0 {
return nil, errs[0]
}
// 面向API的统一处理
// 按照 .Metric 排序
// 确保仪表盘中相同图例的曲线颜色相同
@@ -115,41 +161,80 @@ func (rt *Router) QueryData(c *gin.Context) {
})
}
ginx.NewRender(c).Data(resp, err)
return resp, nil
}
func (rt *Router) QueryData(c *gin.Context) {
var f models.QueryParam
ginx.BindJSON(c, &f)
resp, err := QueryDataConcurrently(rt.Center.AnonymousAccess.PromQuerier, c, f)
if err != nil {
ginx.Bomb(200, "err:%v", err)
}
ginx.NewRender(c).Data(resp, nil)
}
// QueryLogConcurrently 并发查询日志
func QueryLogConcurrently(anonymousAccess bool, ctx *gin.Context, f models.QueryParam) (LogResp, error) {
var resp LogResp
var mu sync.Mutex
var wg sync.WaitGroup
var errs []error
for _, q := range f.Querys {
if !anonymousAccess && !CheckDsPerm(ctx, f.DatasourceId, f.Cate, q) {
return LogResp{}, fmt.Errorf("forbidden")
}
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logger.Warningf("cluster:%d not exists query:%+v", f.DatasourceId, f)
return LogResp{}, fmt.Errorf("cluster not exists")
}
wg.Add(1)
go func(query interface{}) {
defer wg.Done()
data, total, err := plug.QueryLog(ctx.Request.Context(), query)
logger.Debugf("query log: req:%+v resp:%+v", query, data)
if err != nil {
errMsg := fmt.Sprintf("query data error: %v query:%v\n ", err, query)
logger.Warningf(errMsg)
mu.Lock()
errs = append(errs, err)
mu.Unlock()
return
}
mu.Lock()
resp.List = append(resp.List, data...)
resp.Total += total
mu.Unlock()
}(q)
}
wg.Wait()
if len(errs) > 0 {
return LogResp{}, errs[0]
}
if len(resp.List) == 0 {
return LogResp{}, fmt.Errorf("no data")
}
return resp, nil
}
func (rt *Router) QueryLogV2(c *gin.Context) {
var f models.QueryParam
ginx.BindJSON(c, &f)
var resp LogResp
var errMsg string
for _, q := range f.Querys {
if !rt.Center.AnonymousAccess.PromQuerier && !CheckDsPerm(c, f.DatasourceId, f.Cate, q) {
ginx.Bomb(200, "no permission")
}
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logger.Warningf("cluster:%d not exists query:%+v", f.DatasourceId, f)
ginx.Bomb(200, "cluster not exists")
}
data, total, err := plug.QueryLog(c.Request.Context(), q)
if err != nil {
errMsg += fmt.Sprintf("query data error: %v query:%v\n ", err, q)
logger.Warningf("query data error: %v query:%v", err, q)
continue
}
resp.List = append(resp.List, data...)
resp.Total += total
}
if errMsg != "" || len(resp.List) == 0 {
ginx.Bomb(200, errMsg)
}
ginx.NewRender(c).Data(resp, nil)
resp, err := QueryLogConcurrently(rt.Center.AnonymousAccess.PromQuerier, c, f)
ginx.NewRender(c).Data(resp, err)
}
func (rt *Router) QueryLog(c *gin.Context) {
@@ -159,7 +244,7 @@ func (rt *Router) QueryLog(c *gin.Context) {
var resp []interface{}
for _, q := range f.Querys {
if !rt.Center.AnonymousAccess.PromQuerier && !CheckDsPerm(c, f.DatasourceId, f.Cate, q) {
ginx.Bomb(200, "no permission")
ginx.Bomb(200, "forbidden")
}
plug, exists := dscache.DsCache.Get("elasticsearch", f.DatasourceId)

View File

@@ -6,10 +6,10 @@ import (
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/str"
)
func (rt *Router) recordingRuleGets(c *gin.Context) {
@@ -19,7 +19,7 @@ func (rt *Router) recordingRuleGets(c *gin.Context) {
}
func (rt *Router) recordingRuleGetsByGids(c *gin.Context) {
gids := str.IdsInt64(ginx.QueryStr(c, "gids", ""), ",")
gids := strx.IdsInt64ForAPI(ginx.QueryStr(c, "gids", ""), ",")
if len(gids) > 0 {
for _, gid := range gids {
rt.bgroCheck(c, gid)
@@ -149,6 +149,12 @@ func (rt *Router) recordingRulePutFields(c *gin.Context) {
f.Fields["datasource_queries"] = string(bytes)
}
if datasourceIds, ok := f.Fields["datasource_ids"]; ok {
bytes, err := json.Marshal(datasourceIds)
ginx.Dangerous(err)
f.Fields["datasource_ids"] = string(bytes)
}
for i := 0; i < len(f.Ids); i++ {
ar, err := models.RecordingRuleGetById(rt.Ctx, f.Ids[i])
ginx.Dangerous(err)

View File

@@ -0,0 +1,36 @@
package router
import (
"net/http"
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/google/uuid"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
// sourceTokenAdd 生成新的源令牌
func (rt *Router) sourceTokenAdd(c *gin.Context) {
var f models.SourceToken
ginx.BindJSON(c, &f)
if f.ExpireAt > 0 && f.ExpireAt <= time.Now().Unix() {
ginx.Bomb(http.StatusBadRequest, "expire time must be in the future")
}
token := uuid.New().String()
username := c.MustGet("username").(string)
f.Token = token
f.CreateBy = username
f.CreateAt = time.Now().Unix()
err := f.Add(rt.Ctx)
ginx.Dangerous(err)
go models.CleanupExpiredTokens(rt.Ctx)
ginx.NewRender(c).Data(token, nil)
}

View File

@@ -10,13 +10,14 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pushgw/idents"
"github.com/ccfos/nightingale/v6/storage"
"github.com/gin-gonic/gin"
"github.com/prometheus/common/model"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/str"
)
type TargetQuery struct {
@@ -44,7 +45,7 @@ func (rt *Router) targetGetsByHostFilter(c *gin.Context) {
}
func (rt *Router) targetGets(c *gin.Context) {
bgids := str.IdsInt64(ginx.QueryStr(c, "gids", ""), ",")
bgids := strx.IdsInt64ForAPI(ginx.QueryStr(c, "gids", ""), ",")
query := ginx.QueryStr(c, "query", "")
limit := ginx.QueryInt(c, "limit", 30)
downtime := ginx.QueryInt64(c, "downtime", 0)
@@ -56,7 +57,14 @@ func (rt *Router) targetGets(c *gin.Context) {
hosts := queryStrListField(c, "hosts", ",", " ", "\n")
var err error
if len(bgids) == 0 {
if len(bgids) > 0 {
// 如果用户当前查看的是未归组机器,会传入 bgids = [0],此时是不需要校验的,故而排除这种情况
if !(len(bgids) == 1 && bgids[0] == 0) {
for _, gid := range bgids {
rt.bgroCheck(c, gid)
}
}
} else {
user := c.MustGet("user").(*models.User)
if !user.IsAdmin() {
// 如果是非 admin 用户,全部对象的情况,找到用户有权限的业务组
@@ -454,7 +462,7 @@ func (rt *Router) targetBindBgids(c *gin.Context) {
ginx.Dangerous(err)
if !can {
ginx.Bomb(http.StatusForbidden, "No permission. You are not admin of BG(%s)", bg.Name)
ginx.Bomb(http.StatusForbidden, "forbidden")
}
}
isNeverGrouped, checkErr := haveNeverGroupedIdent(rt.Ctx, f.Idents)
@@ -464,7 +472,7 @@ func (rt *Router) targetBindBgids(c *gin.Context) {
can, err := user.CheckPerm(rt.Ctx, "/targets/bind")
ginx.Dangerous(err)
if !can {
ginx.Bomb(http.StatusForbidden, "No permission. Only admin can assign BG")
ginx.Bomb(http.StatusForbidden, "forbidden")
}
}
}
@@ -549,7 +557,7 @@ func (rt *Router) checkTargetPerm(c *gin.Context, idents []string) {
ginx.Dangerous(err)
if len(nopri) > 0 {
ginx.Bomb(http.StatusForbidden, "No permission to operate the targets: %s", strings.Join(nopri, ", "))
ginx.Bomb(http.StatusForbidden, "forbidden")
}
}
@@ -571,6 +579,15 @@ func (rt *Router) targetsOfAlertRule(c *gin.Context) {
ginx.NewRender(c).Data(ret, err)
}
func (rt *Router) checkTargetsExistByIndent(idents []string) {
notExists, err := models.TargetNoExistIdents(rt.Ctx, idents)
ginx.Dangerous(err)
if len(notExists) > 0 {
ginx.Bomb(http.StatusBadRequest, "targets not exist: %s", strings.Join(notExists, ", "))
}
}
func (rt *Router) targetsOfHostQuery(c *gin.Context) {
var queries []models.HostQuery
ginx.BindJSON(c, &queries)
@@ -585,3 +602,10 @@ func (rt *Router) targetsOfHostQuery(c *gin.Context) {
ginx.NewRender(c).Data(lst, nil)
}
func (rt *Router) targetUpdate(c *gin.Context) {
var f idents.TargetUpdate
ginx.BindJSON(c, &f)
ginx.NewRender(c).Message(rt.IdentSet.UpdateTargets(f.Lst, f.Now))
}

View File

@@ -1,15 +1,16 @@
package router
import (
"strings"
"time"
"github.com/ccfos/nightingale/v6/alert/sender"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
"github.com/toolkits/pkg/str"
)
func (rt *Router) taskGets(c *gin.Context) {
@@ -40,7 +41,7 @@ func (rt *Router) taskGets(c *gin.Context) {
}
func (rt *Router) taskGetsByGids(c *gin.Context) {
gids := str.IdsInt64(ginx.QueryStr(c, "gids", ""), ",")
gids := strx.IdsInt64ForAPI(ginx.QueryStr(c, "gids", ""), ",")
if len(gids) > 0 {
for _, gid := range gids {
rt.bgroCheck(c, gid)
@@ -84,20 +85,6 @@ func (rt *Router) taskGetsByGids(c *gin.Context) {
}, nil)
}
type taskForm struct {
Title string `json:"title" binding:"required"`
Account string `json:"account" binding:"required"`
Batch int `json:"batch"`
Tolerance int `json:"tolerance"`
Timeout int `json:"timeout"`
Pause string `json:"pause"`
Script string `json:"script" binding:"required"`
Args string `json:"args"`
Action string `json:"action" binding:"required"`
Creator string `json:"creator"`
Hosts []string `json:"hosts" binding:"required"`
}
func (rt *Router) taskRecordAdd(c *gin.Context) {
var f *models.TaskRecord
ginx.BindJSON(c, &f)
@@ -112,11 +99,21 @@ func (rt *Router) taskAdd(c *gin.Context) {
var f models.TaskForm
ginx.BindJSON(c, &f)
// 把 f.Hosts 中的空字符串过滤掉
hosts := make([]string, 0, len(f.Hosts))
for i := range f.Hosts {
if strings.TrimSpace(f.Hosts[i]) != "" {
hosts = append(hosts, strings.TrimSpace(f.Hosts[i]))
}
}
f.Hosts = hosts
bgid := ginx.UrlParamInt64(c, "id")
user := c.MustGet("user").(*models.User)
f.Creator = user.Username
rt.checkTargetsExistByIndent(f.Hosts)
err := f.Verify()
ginx.Dangerous(err)

View File

@@ -7,6 +7,7 @@ import (
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
@@ -35,7 +36,7 @@ func (rt *Router) taskTplGetsByGids(c *gin.Context) {
query := ginx.QueryStr(c, "query", "")
limit := ginx.QueryInt(c, "limit", 20)
gids := str.IdsInt64(ginx.QueryStr(c, "gids", ""), ",")
gids := strx.IdsInt64ForAPI(ginx.QueryStr(c, "gids", ""), ",")
if len(gids) > 0 {
for _, gid := range gids {
rt.bgroCheck(c, gid)
@@ -118,6 +119,18 @@ type taskTplForm struct {
Hosts []string `json:"hosts"`
}
func (f *taskTplForm) Verify() {
// 传入的 f.Hosts 可能是 []string{"", "a", "b"},需要过滤掉空字符串
args := make([]string, 0, len(f.Hosts))
for _, ident := range f.Hosts {
if strings.TrimSpace(ident) != "" {
args = append(args, strings.TrimSpace(ident))
}
}
f.Hosts = args
}
func (rt *Router) taskTplAdd(c *gin.Context) {
if !rt.Ibex.Enable {
ginx.Bomb(400, i18n.Sprintf(c.GetHeader("X-Language"), "This functionality has not been enabled. Please contact the system administrator to activate it."))
@@ -126,10 +139,13 @@ func (rt *Router) taskTplAdd(c *gin.Context) {
var f taskTplForm
ginx.BindJSON(c, &f)
f.Verify()
user := c.MustGet("user").(*models.User)
now := time.Now().Unix()
rt.checkTargetsExistByIndent(f.Hosts)
sort.Strings(f.Tags)
tpl := &models.TaskTpl{
@@ -167,6 +183,9 @@ func (rt *Router) taskTplPut(c *gin.Context) {
var f taskTplForm
ginx.BindJSON(c, &f)
f.Verify()
rt.checkTargetsExistByIndent(f.Hosts)
sort.Strings(f.Tags)

View File

@@ -1,6 +1,7 @@
package router
import (
"fmt"
"net/http"
"strings"
@@ -12,6 +13,7 @@ import (
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
"gorm.io/gorm"
)
func (rt *Router) userBusiGroupsGets(c *gin.Context) {
@@ -47,12 +49,27 @@ func (rt *Router) userGets(c *gin.Context) {
query := ginx.QueryStr(c, "query", "")
order := ginx.QueryStr(c, "order", "username")
desc := ginx.QueryBool(c, "desc", false)
usernames := strings.Split(ginx.QueryStr(c, "usernames", ""), ",")
phones := strings.Split(ginx.QueryStr(c, "phones", ""), ",")
emails := strings.Split(ginx.QueryStr(c, "emails", ""), ",")
if len(usernames) == 1 && usernames[0] == "" {
usernames = []string{}
}
if len(phones) == 1 && phones[0] == "" {
phones = []string{}
}
if len(emails) == 1 && emails[0] == "" {
emails = []string{}
}
go rt.UserCache.UpdateUsersLastActiveTime()
total, err := models.UserTotal(rt.Ctx, query, stime, etime)
ginx.Dangerous(err)
list, err := models.UserGets(rt.Ctx, query, limit, ginx.Offset(c, limit), stime, etime, order, desc)
list, err := models.UserGets(rt.Ctx, query, limit, ginx.Offset(c, limit), stime, etime, order, desc, usernames, phones, emails)
ginx.Dangerous(err)
user := c.MustGet("user").(*models.User)
@@ -220,3 +237,217 @@ func (rt *Router) userDel(c *gin.Context) {
ginx.NewRender(c).Message(target.Del(rt.Ctx))
}
func (rt *Router) installDateGet(c *gin.Context) {
rootUser, err := models.UserGetByUsername(rt.Ctx, "root")
if err != nil {
logger.Errorf("get root user failed: %v", err)
ginx.NewRender(c).Data(0, nil)
return
}
if rootUser == nil {
logger.Errorf("root user not found")
ginx.NewRender(c).Data(0, nil)
return
}
ginx.NewRender(c).Data(rootUser.CreateAt, nil)
}
// usersPhoneEncrypt 统一手机号加密
func (rt *Router) usersPhoneEncrypt(c *gin.Context) {
users, err := models.UserGetAll(rt.Ctx)
if err != nil {
ginx.NewRender(c).Message(fmt.Errorf("get users failed: %v", err))
return
}
// 获取RSA密钥
_, publicKey, _, err := models.GetRSAKeys(rt.Ctx)
if err != nil {
ginx.NewRender(c).Message(fmt.Errorf("get RSA keys failed: %v", err))
return
}
// 先启用手机号加密功能
err = models.SetPhoneEncryptionEnabled(rt.Ctx, true)
if err != nil {
ginx.NewRender(c).Message(fmt.Errorf("enable phone encryption failed: %v", err))
return
}
// 刷新配置缓存
err = models.RefreshPhoneEncryptionCache(rt.Ctx)
if err != nil {
logger.Errorf("Failed to refresh phone encryption cache: %v", err)
// 回滚配置
models.SetPhoneEncryptionEnabled(rt.Ctx, false)
ginx.NewRender(c).Message(fmt.Errorf("refresh cache failed: %v", err))
return
}
successCount := 0
failCount := 0
var failedUsers []string
// 使用事务处理所有用户的手机号加密
err = models.DB(rt.Ctx).Transaction(func(tx *gorm.DB) error {
// 对每个用户的手机号进行加密
for _, user := range users {
if user.Phone == "" {
continue
}
if isPhoneEncrypted(user.Phone) {
continue
}
encryptedPhone, err := secu.EncryptValue(user.Phone, publicKey)
if err != nil {
logger.Errorf("Failed to encrypt phone for user %s: %v", user.Username, err)
failCount++
failedUsers = append(failedUsers, user.Username)
continue
}
err = tx.Model(&models.User{}).Where("id = ?", user.Id).Update("phone", encryptedPhone).Error
if err != nil {
logger.Errorf("Failed to update phone for user %s: %v", user.Username, err)
failCount++
failedUsers = append(failedUsers, user.Username)
continue
}
successCount++
logger.Debugf("Successfully encrypted phone for user %s", user.Username)
}
// 如果有失败的用户,回滚事务
if failCount > 0 {
return fmt.Errorf("encrypt failed users: %d, failed users: %v", failCount, failedUsers)
}
return nil
})
if err != nil {
// 加密失败,回滚配置
models.SetPhoneEncryptionEnabled(rt.Ctx, false)
models.RefreshPhoneEncryptionCache(rt.Ctx)
ginx.NewRender(c).Message(fmt.Errorf("encrypt phone failed: %v", err))
return
}
ginx.NewRender(c).Data(gin.H{
"success_count": successCount,
"fail_count": failCount,
}, nil)
}
// usersPhoneDecrypt 统一手机号解密
func (rt *Router) usersPhoneDecrypt(c *gin.Context) {
// 先关闭手机号加密功能
err := models.SetPhoneEncryptionEnabled(rt.Ctx, false)
if err != nil {
ginx.NewRender(c).Message(fmt.Errorf("disable phone encryption failed: %v", err))
return
}
// 刷新配置缓存
err = models.RefreshPhoneEncryptionCache(rt.Ctx)
if err != nil {
logger.Errorf("Failed to refresh phone encryption cache: %v", err)
// 回滚配置
models.SetPhoneEncryptionEnabled(rt.Ctx, true)
ginx.NewRender(c).Message(fmt.Errorf("refresh cache failed: %v", err))
return
}
// 获取所有用户(此时加密开关已关闭,直接读取数据库原始数据)
var users []*models.User
err = models.DB(rt.Ctx).Find(&users).Error
if err != nil {
// 回滚配置
models.SetPhoneEncryptionEnabled(rt.Ctx, true)
models.RefreshPhoneEncryptionCache(rt.Ctx)
ginx.NewRender(c).Message(fmt.Errorf("get users failed: %v", err))
return
}
// 获取RSA密钥
privateKey, _, password, err := models.GetRSAKeys(rt.Ctx)
if err != nil {
// 回滚配置
models.SetPhoneEncryptionEnabled(rt.Ctx, true)
models.RefreshPhoneEncryptionCache(rt.Ctx)
ginx.NewRender(c).Message(fmt.Errorf("get RSA keys failed: %v", err))
return
}
successCount := 0
failCount := 0
var failedUsers []string
// 使用事务处理所有用户的手机号解密
err = models.DB(rt.Ctx).Transaction(func(tx *gorm.DB) error {
// 对每个用户的手机号进行解密
for _, user := range users {
if user.Phone == "" {
continue
}
// 检查是否是加密的手机号
if !isPhoneEncrypted(user.Phone) {
continue
}
// 对手机号进行解密
decryptedPhone, err := secu.Decrypt(user.Phone, privateKey, password)
if err != nil {
logger.Errorf("Failed to decrypt phone for user %s: %v", user.Username, err)
failCount++
failedUsers = append(failedUsers, user.Username)
continue
}
// 直接更新数据库中的手机号字段绕过GORM钩子
err = tx.Model(&models.User{}).Where("id = ?", user.Id).Update("phone", decryptedPhone).Error
if err != nil {
logger.Errorf("Failed to update phone for user %s: %v", user.Username, err)
failCount++
failedUsers = append(failedUsers, user.Username)
continue
}
successCount++
logger.Debugf("Successfully decrypted phone for user %s", user.Username)
}
// 如果有失败的用户,回滚事务
if failCount > 0 {
return fmt.Errorf("decrypt failed users: %d, failed users: %v", failCount, failedUsers)
}
return nil
})
if err != nil {
// 解密失败,回滚配置
models.SetPhoneEncryptionEnabled(rt.Ctx, true)
models.RefreshPhoneEncryptionCache(rt.Ctx)
ginx.NewRender(c).Message(fmt.Errorf("decrypt phone failed: %v", err))
return
}
ginx.NewRender(c).Data(gin.H{
"success_count": successCount,
"fail_count": failCount,
}, nil)
}
// isPhoneEncrypted 检查手机号是否已经加密
func isPhoneEncrypted(phone string) bool {
// 检查是否有 "enc:" 前缀标记
return len(phone) > 4 && phone[:4] == "enc:"
}

View File

@@ -6,11 +6,11 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/flashduty"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/str"
)
func (rt *Router) checkBusiGroupPerm(c *gin.Context) {
@@ -32,7 +32,7 @@ func (rt *Router) userGroupGets(c *gin.Context) {
}
func (rt *Router) userGroupGetsByService(c *gin.Context) {
ids := str.IdsInt64(ginx.QueryStr(c, "ids", ""))
ids := strx.IdsInt64ForAPI(ginx.QueryStr(c, "ids", ""))
if len(ids) == 0 {
lst, err := models.UserGroupGetAll(rt.Ctx)
@@ -111,7 +111,6 @@ func (rt *Router) userGroupPut(c *gin.Context) {
me := c.MustGet("user").(*models.User)
ug := c.MustGet("user_group").(*models.UserGroup)
oldUGName := ug.Name
if ug.Name != f.Name {
// name changed, check duplication
@@ -130,7 +129,7 @@ func (rt *Router) userGroupPut(c *gin.Context) {
if f.IsSyncToFlashDuty || flashduty.NeedSyncTeam(rt.Ctx) {
ugs, err := flashduty.NewUserGroupSyncer(rt.Ctx, ug)
ginx.Dangerous(err)
err = ugs.SyncUGPut(oldUGName)
err = ugs.SyncUGPut()
ginx.Dangerous(err)
}
ginx.NewRender(c).Message(ug.Update(rt.Ctx, "Name", "Note", "UpdateAt", "UpdateBy"))
@@ -159,8 +158,11 @@ func (rt *Router) userGroupDel(c *gin.Context) {
if isSyncToFlashDuty || flashduty.NeedSyncTeam(rt.Ctx) {
ugs, err := flashduty.NewUserGroupSyncer(rt.Ctx, ug)
ginx.Dangerous(err)
err = ugs.SyncUGDel(ug.Name)
ginx.Dangerous(err)
err = ugs.SyncUGDel()
// 如果team 在 duty 被引用或者已经删除,会报错,可以忽略报错
if err != nil {
logger.Warningf("failed to sync user group %s to flashduty's team: %v", ug.Name, err)
}
}
ginx.NewRender(c).Message(ug.Del(rt.Ctx))

View File

@@ -40,7 +40,7 @@ func (rt *Router) userVariableConfigPut(context *gin.Context) {
user := context.MustGet("user").(*models.User)
if !user.IsAdmin() && f.CreateBy != user.Username {
// only admin or creator can update
ginx.Bomb(403, "no permission")
ginx.Bomb(403, "forbidden")
}
ginx.NewRender(context).Message(models.ConfigsUserVariableUpdate(rt.Ctx, f))
@@ -54,7 +54,7 @@ func (rt *Router) userVariableConfigDel(context *gin.Context) {
user := context.MustGet("user").(*models.User)
if !user.IsAdmin() && configs.CreateBy != user.Username {
// only admin or creator can delete
ginx.Bomb(403, "no permission")
ginx.Bomb(403, "forbidden")
}
if configs != nil && configs.External == models.ConfigExternal {

View File

@@ -54,7 +54,7 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
targetCache := memsto.NewTargetCache(ctx, syncStats, redis)
busiGroupCache := memsto.NewBusiGroupCache(ctx, syncStats)
configCvalCache := memsto.NewCvalCache(ctx, syncStats)
idents := idents.New(ctx, redis)
idents := idents.New(ctx, redis, config.Pushgw)
metas := metas.New(redis)
writers := writer.NewWriters(config.Pushgw)
pushgwRouter := pushgwrt.New(config.HTTP, config.Pushgw, config.Alert, targetCache, busiGroupCache, idents, metas, writers, ctx)

View File

@@ -14,6 +14,13 @@ func decryptConfig(config *ConfigType, cryptoKey string) error {
config.DB.DSN = decryptDsn
decryptRedisPwd, err := secu.DealWithDecrypt(config.Redis.Password, cryptoKey)
if err != nil {
return fmt.Errorf("failed to decrypt the redis password: %s", err)
}
config.Redis.Password = decryptRedisPwd
for k := range config.HTTP.APIForService.BasicAuth {
decryptPwd, err := secu.DealWithDecrypt(config.HTTP.APIForService.BasicAuth[k], cryptoKey)
if err != nil {

View File

@@ -10,12 +10,20 @@ import (
"github.com/araddon/dateparse"
"github.com/bitly/go-simplejson"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/mitchellh/mapstructure"
"github.com/olivere/elastic/v7"
"github.com/prometheus/common/model"
"github.com/toolkits/pkg/logger"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
)
type FixedField string
const (
FieldIndex FixedField = "_index"
FieldId FixedField = "_id"
)
type Query struct {
@@ -24,6 +32,7 @@ type Query struct {
Index string `json:"index" mapstructure:"index"`
IndexPatternId int64 `json:"index_pattern" mapstructure:"index_pattern"`
Filter string `json:"filter" mapstructure:"filter"`
Offset int64 `json:"offset" mapstructure:"offset"`
MetricAggr MetricAggr `json:"value" mapstructure:"value"`
GroupBy []GroupBy `json:"group_by" mapstructure:"group_by"`
DateField string `json:"date_field" mapstructure:"date_field"`
@@ -36,6 +45,18 @@ type Query struct {
Timeout int `json:"timeout" mapstructure:"timeout"`
MaxShard int `json:"max_shard" mapstructure:"max_shard"`
SearchAfter *SearchAfter `json:"search_after" mapstructure:"search_after"`
}
type SortField struct {
Field string `json:"field" mapstructure:"field"`
Ascending bool `json:"ascending" mapstructure:"ascending"`
}
type SearchAfter struct {
SortFields []SortField `json:"sort_fields" mapstructure:"sort_fields"` // 指定排序字段, 一般是timestamp:desc, _index:asc, _id:asc 三者组合,构成唯一的排序字段
SearchAfter []interface{} `json:"search_after" mapstructure:"search_after"` // 指定排序字段的搜索值搜索值必须和sort_fields的顺序一致为上一次查询的最后一条日志的值
}
type MetricAggr struct {
@@ -270,7 +291,10 @@ func MakeLogQuery(ctx context.Context, query interface{}, eventTags []string, st
}
for i := 0; i < len(eventTags); i++ {
eventTags[i] = strings.Replace(eventTags[i], "=", ":", 1)
arr := strings.SplitN(eventTags[i], "=", 2)
if len(arr) == 2 {
eventTags[i] = fmt.Sprintf("%s:%s", arr[0], strconv.Quote(arr[1]))
}
}
if len(eventTags) > 0 {
@@ -294,7 +318,10 @@ func MakeTSQuery(ctx context.Context, query interface{}, eventTags []string, sta
}
for i := 0; i < len(eventTags); i++ {
eventTags[i] = strings.Replace(eventTags[i], "=", ":", 1)
arr := strings.SplitN(eventTags[i], "=", 2)
if len(arr) == 2 {
eventTags[i] = fmt.Sprintf("%s:%s", arr[0], strconv.Quote(arr[1]))
}
}
if len(eventTags) > 0 {
@@ -347,12 +374,14 @@ func QueryData(ctx context.Context, queryParam interface{}, cliTimeout int64, ve
if ip, ok := GetEsIndexPatternCacheType().Get(param.IndexPatternId); ok {
param.DateField = ip.TimeField
indexArr = []string{ip.Name}
param.Index = ip.Name
} else {
return nil, fmt.Errorf("index pattern:%d not found", param.IndexPatternId)
}
} else {
indexArr = strings.Split(param.Index, ",")
}
q := elastic.NewRangeQuery(param.DateField)
now := time.Now().Unix()
var start, end int64
@@ -370,6 +399,11 @@ func QueryData(ctx context.Context, queryParam interface{}, cliTimeout int64, ve
start = start - delay
}
if param.Offset > 0 {
end = end - param.Offset
start = start - param.Offset
}
q.Gte(time.Unix(start, 0).UnixMilli())
q.Lte(time.Unix(end, 0).UnixMilli())
q.Format("epoch_millis")
@@ -408,7 +442,9 @@ func QueryData(ctx context.Context, queryParam interface{}, cliTimeout int64, ve
MinDocCount(1)
if strings.HasPrefix(version, "7") {
tsAggr.FixedInterval(fmt.Sprintf("%ds", param.Interval))
// 添加偏移量使第一个分桶bucket的左边界对齐为 start 时间
offset := (start % param.Interval) - param.Interval
tsAggr.FixedInterval(fmt.Sprintf("%ds", param.Interval)).Offset(fmt.Sprintf("%ds", offset))
} else {
// 兼容 7.0 以下的版本
// OpenSearch 也使用这个字段
@@ -481,7 +517,7 @@ func QueryData(ctx context.Context, queryParam interface{}, cliTimeout int64, ve
source, _ := queryString.Source()
b, _ := json.Marshal(source)
logger.Debugf("query_data q:%+v tsAggr:%+v query_string:%s", param, tsAggr, string(b))
logger.Debugf("query_data q:%+v indexArr:%+v tsAggr:%+v query_string:%s", param, indexArr, tsAggr, string(b))
searchSource := elastic.NewSearchSource().
Query(queryString).
@@ -528,7 +564,16 @@ func QueryData(ctx context.Context, queryParam interface{}, cliTimeout int64, ve
GetBuckts("", keys, bucketsData, metrics, "", 0, param.MetricAggr.Func)
return TransferData(fmt.Sprintf("%s_%s", field, param.MetricAggr.Func), param.Ref, metrics.Data), nil
items, err := TransferData(fmt.Sprintf("%s_%s", field, param.MetricAggr.Func), param.Ref, metrics.Data), nil
var m map[string]interface{}
bs, _ := json.Marshal(queryParam)
json.Unmarshal(bs, &m)
m["index"] = param.Index
for i := range items {
items[i].Query = fmt.Sprintf("%+v", m)
}
return items, nil
}
func HitFilter(typ string) bool {
@@ -586,14 +631,27 @@ func QueryLog(ctx context.Context, queryParam interface{}, timeout int64, versio
if param.MaxShard < 1 {
param.MaxShard = maxShard
}
// from+size 分页方式获取日志受es 的max_result_window参数限制默认最多返回1w条日志, 可以使用search_after方式获取更多日志
source := elastic.NewSearchSource().
TrackTotalHits(true).
Query(queryString).
From(param.P).
Size(param.Limit).
Sort(param.DateField, param.Ascending)
Size(param.Limit)
// 是否使用search_after方式
if param.SearchAfter != nil {
// 设置默认排序字段
if len(param.SearchAfter.SortFields) == 0 {
source = source.Sort(param.DateField, param.Ascending).Sort(string(FieldIndex), true).Sort(string(FieldId), true)
} else {
for _, field := range param.SearchAfter.SortFields {
source = source.Sort(field.Field, field.Ascending)
}
}
if len(param.SearchAfter.SearchAfter) > 0 {
source = source.SearchAfter(param.SearchAfter.SearchAfter...)
}
} else {
source = source.From(param.P).Sort(param.DateField, param.Ascending)
}
result, err := search(ctx, indexArr, source, param.Timeout, param.MaxShard)
if err != nil {
logger.Warningf("query data error:%v", err)

View File

@@ -53,6 +53,20 @@ func init() {
PluginType: "ck",
PluginTypeName: "ClickHouse",
}
DatasourceTypes[5] = DatasourceType{
Id: 5,
Category: "timeseries",
PluginType: "mysql",
PluginTypeName: "MySQL",
}
DatasourceTypes[6] = DatasourceType{
Id: 6,
Category: "timeseries",
PluginType: "pgsql",
PluginTypeName: "PostgreSQL",
}
}
type NewDatasrouceFn func(settings map[string]interface{}) (Datasource, error)
@@ -98,6 +112,7 @@ func GetDatasourceByType(typ string, settings map[string]interface{}) (Datasourc
type DatasourceInfo struct {
Id int64 `json:"id"`
Name string `json:"name"`
Identifier string `json:"identifier"`
Description string `json:"description"`
ClusterName string `json:"cluster_name"`
Category string `json:"category"`

213
datasource/doris/doris.go Normal file
View File

@@ -0,0 +1,213 @@
package doris
import (
"context"
"fmt"
"strings"
"github.com/ccfos/nightingale/v6/datasource"
"github.com/ccfos/nightingale/v6/dskit/doris"
"github.com/ccfos/nightingale/v6/dskit/types"
"github.com/ccfos/nightingale/v6/pkg/macros"
"github.com/ccfos/nightingale/v6/models"
"github.com/mitchellh/mapstructure"
"github.com/toolkits/pkg/logger"
)
const (
DorisType = "doris"
)
func init() {
datasource.RegisterDatasource(DorisType, new(Doris))
}
type Doris struct {
doris.Doris `json:",inline" mapstructure:",squash"`
}
type QueryParam struct {
Ref string `json:"ref" mapstructure:"ref"`
Database string `json:"database" mapstructure:"database"`
Table string `json:"table" mapstructure:"table"`
SQL string `json:"sql" mapstructure:"sql"`
Keys datasource.Keys `json:"keys" mapstructure:"keys"`
Limit int `json:"limit" mapstructure:"limit"`
From int64 `json:"from" mapstructure:"from"`
To int64 `json:"to" mapstructure:"to"`
TimeField string `json:"time_field" mapstructure:"time_field"`
TimeFormat string `json:"time_format" mapstructure:"time_format"`
}
func (d *Doris) InitClient() error {
if len(d.Addr) == 0 {
return fmt.Errorf("not found doris addr, please check datasource config")
}
if _, err := d.NewConn(context.TODO(), ""); err != nil {
return err
}
return nil
}
func (d *Doris) Init(settings map[string]interface{}) (datasource.Datasource, error) {
newest := new(Doris)
err := mapstructure.Decode(settings, newest)
return newest, err
}
func (d *Doris) Validate(ctx context.Context) error {
if len(d.Addr) == 0 || len(strings.TrimSpace(d.Addr)) == 0 {
return fmt.Errorf("doris addr is invalid, please check datasource setting")
}
if len(strings.TrimSpace(d.User)) == 0 {
return fmt.Errorf("doris user is invalid, please check datasource setting")
}
return nil
}
// Equal compares whether two objects are the same, used for caching
func (d *Doris) Equal(p datasource.Datasource) bool {
newest, ok := p.(*Doris)
if !ok {
logger.Errorf("unexpected plugin type, expected is doris")
return false
}
// only compare first shard
if d.Addr != newest.Addr {
return false
}
if d.User != newest.User {
return false
}
if d.Password != newest.Password {
return false
}
if d.EnableWrite != newest.EnableWrite {
return false
}
if d.FeAddr != newest.FeAddr {
return false
}
if d.MaxQueryRows != newest.MaxQueryRows {
return false
}
if d.Timeout != newest.Timeout {
return false
}
if d.MaxIdleConns != newest.MaxIdleConns {
return false
}
if d.MaxOpenConns != newest.MaxOpenConns {
return false
}
if d.ConnMaxLifetime != newest.ConnMaxLifetime {
return false
}
if d.ClusterName != newest.ClusterName {
return false
}
return true
}
func (d *Doris) MakeLogQuery(ctx context.Context, query interface{}, eventTags []string, start, end int64) (interface{}, error) {
return nil, nil
}
func (d *Doris) MakeTSQuery(ctx context.Context, query interface{}, eventTags []string, start, end int64) (interface{}, error) {
return nil, nil
}
func (d *Doris) QueryMapData(ctx context.Context, query interface{}) ([]map[string]string, error) {
return nil, nil
}
func (d *Doris) QueryData(ctx context.Context, query interface{}) ([]models.DataResp, error) {
dorisQueryParam := new(QueryParam)
if err := mapstructure.Decode(query, dorisQueryParam); err != nil {
return nil, err
}
if dorisQueryParam.Keys.ValueKey == "" {
return nil, fmt.Errorf("valueKey is required")
}
items, err := d.QueryTimeseries(context.TODO(), &doris.QueryParam{
Database: dorisQueryParam.Database,
Sql: dorisQueryParam.SQL,
Keys: types.Keys{
ValueKey: dorisQueryParam.Keys.ValueKey,
LabelKey: dorisQueryParam.Keys.LabelKey,
TimeKey: dorisQueryParam.Keys.TimeKey,
},
})
if err != nil {
logger.Warningf("query:%+v get data err:%v", dorisQueryParam, err)
return []models.DataResp{}, err
}
data := make([]models.DataResp, 0)
for i := range items {
data = append(data, models.DataResp{
Ref: dorisQueryParam.Ref,
Metric: items[i].Metric,
Values: items[i].Values,
})
}
// parse resp to time series data
logger.Infof("req:%+v keys:%+v \n data:%v", dorisQueryParam, dorisQueryParam.Keys, data)
return data, nil
}
func (d *Doris) QueryLog(ctx context.Context, query interface{}) ([]interface{}, int64, error) {
dorisQueryParam := new(QueryParam)
if err := mapstructure.Decode(query, dorisQueryParam); err != nil {
return nil, 0, err
}
if strings.Contains(dorisQueryParam.SQL, "$__") {
var err error
dorisQueryParam.SQL, err = macros.Macro(dorisQueryParam.SQL, dorisQueryParam.From, dorisQueryParam.To)
if err != nil {
return nil, 0, err
}
}
items, err := d.QueryLogs(ctx, &doris.QueryParam{
Database: dorisQueryParam.Database,
Sql: dorisQueryParam.SQL,
})
if err != nil {
logger.Warningf("query:%+v get data err:%v", dorisQueryParam, err)
return []interface{}{}, 0, err
}
logs := make([]interface{}, 0)
for i := range items {
logs = append(logs, items[i])
}
return logs, int64(len(logs)), nil
}
func (d *Doris) DescribeTable(ctx context.Context, query interface{}) ([]*types.ColumnProperty, error) {
dorisQueryParam := new(QueryParam)
if err := mapstructure.Decode(query, dorisQueryParam); err != nil {
return nil, err
}
return d.DescTable(ctx, dorisQueryParam.Database, dorisQueryParam.Table)
}

View File

@@ -187,6 +187,7 @@ func (e *Elasticsearch) QueryData(ctx context.Context, queryParam interface{}) (
search := func(ctx context.Context, indices []string, source interface{}, timeout int, maxShard int) (*elastic.SearchResult, error) {
return e.Client.Search().
Index(indices...).
IgnoreUnavailable(true).
Source(source).
Timeout(fmt.Sprintf("%ds", timeout)).
MaxConcurrentShardRequests(maxShard).
@@ -204,7 +205,7 @@ func (e *Elasticsearch) QueryIndices() ([]string, error) {
func (e *Elasticsearch) QueryFields(indexs []string) ([]string, error) {
var fields []string
result, err := elastic.NewGetFieldMappingService(e.Client).Index(indexs...).Do(context.Background())
result, err := elastic.NewGetFieldMappingService(e.Client).Index(indexs...).IgnoreUnavailable(true).Do(context.Background())
if err != nil {
return fields, err
}
@@ -264,6 +265,7 @@ func (e *Elasticsearch) QueryLog(ctx context.Context, queryParam interface{}) ([
return e.Client.Search().
Index(indices...).
IgnoreUnavailable(true).
MaxConcurrentShardRequests(maxShard).
Source(source).
Timeout(fmt.Sprintf("%ds", timeout)).
@@ -276,6 +278,7 @@ func (e *Elasticsearch) QueryLog(ctx context.Context, queryParam interface{}) ([
func (e *Elasticsearch) QueryFieldValue(indexs []string, field string, query string) ([]string, error) {
var values []string
search := e.Client.Search().
IgnoreUnavailable(true).
Index(indexs...).
Size(0)
@@ -359,6 +362,7 @@ func (e *Elasticsearch) QueryMapData(ctx context.Context, query interface{}) ([]
return e.Client.Search().
Index(indices...).
IgnoreUnavailable(true).
Source(source).
Timeout(fmt.Sprintf("%ds", timeout)).
Do(ctx)

227
datasource/mysql/mysql.go Normal file
View File

@@ -0,0 +1,227 @@
package mysql
import (
"context"
"fmt"
"strings"
"time"
"github.com/ccfos/nightingale/v6/datasource"
"github.com/ccfos/nightingale/v6/dskit/mysql"
"github.com/ccfos/nightingale/v6/dskit/sqlbase"
"github.com/ccfos/nightingale/v6/dskit/types"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/macros"
"github.com/mitchellh/mapstructure"
"github.com/toolkits/pkg/logger"
)
const (
MySQLType = "mysql"
)
func init() {
datasource.RegisterDatasource(MySQLType, new(MySQL))
}
type MySQL struct {
mysql.MySQL `json:",inline" mapstructure:",squash"`
}
type QueryParam struct {
Ref string `json:"ref" mapstructure:"ref"`
Database string `json:"database" mapstructure:"database"`
Table string `json:"table" mapstructure:"table"`
SQL string `json:"sql" mapstructure:"sql"`
Keys datasource.Keys `json:"keys" mapstructure:"keys"`
From int64 `json:"from" mapstructure:"from"`
To int64 `json:"to" mapstructure:"to"`
}
func (m *MySQL) InitClient() error {
if len(m.Shards) == 0 {
return fmt.Errorf("not found mysql addr, please check datasource config")
}
if _, err := m.NewConn(context.TODO(), ""); err != nil {
return err
}
return nil
}
func (m *MySQL) Init(settings map[string]interface{}) (datasource.Datasource, error) {
newest := new(MySQL)
err := mapstructure.Decode(settings, newest)
return newest, err
}
func (m *MySQL) Validate(ctx context.Context) error {
if len(m.Shards) == 0 || len(strings.TrimSpace(m.Shards[0].Addr)) == 0 {
return fmt.Errorf("mysql addr is invalid, please check datasource setting")
}
if len(strings.TrimSpace(m.Shards[0].User)) == 0 {
return fmt.Errorf("mysql user is invalid, please check datasource setting")
}
return nil
}
// Equal compares whether two objects are the same, used for caching
func (m *MySQL) Equal(p datasource.Datasource) bool {
newest, ok := p.(*MySQL)
if !ok {
logger.Errorf("unexpected plugin type, expected is mysql")
return false
}
if len(m.Shards) == 0 || len(newest.Shards) == 0 {
return false
}
oldShard := m.Shards[0]
newShard := newest.Shards[0]
if oldShard.Addr != newShard.Addr {
return false
}
if oldShard.User != newShard.User {
return false
}
if oldShard.Password != newShard.Password {
return false
}
if oldShard.MaxQueryRows != newShard.MaxQueryRows {
return false
}
if oldShard.Timeout != newShard.Timeout {
return false
}
if oldShard.MaxIdleConns != newShard.MaxIdleConns {
return false
}
if oldShard.MaxOpenConns != newShard.MaxOpenConns {
return false
}
if oldShard.ConnMaxLifetime != newShard.ConnMaxLifetime {
return false
}
return true
}
func (m *MySQL) MakeLogQuery(ctx context.Context, query interface{}, eventTags []string, start, end int64) (interface{}, error) {
return nil, nil
}
func (m *MySQL) MakeTSQuery(ctx context.Context, query interface{}, eventTags []string, start, end int64) (interface{}, error) {
return nil, nil
}
func (m *MySQL) QueryMapData(ctx context.Context, query interface{}) ([]map[string]string, error) {
return nil, nil
}
func (m *MySQL) QueryData(ctx context.Context, query interface{}) ([]models.DataResp, error) {
mysqlQueryParam := new(QueryParam)
if err := mapstructure.Decode(query, mysqlQueryParam); err != nil {
return nil, err
}
if strings.Contains(mysqlQueryParam.SQL, "$__") {
var err error
mysqlQueryParam.SQL, err = macros.Macro(mysqlQueryParam.SQL, mysqlQueryParam.From, mysqlQueryParam.To)
if err != nil {
return nil, err
}
}
if mysqlQueryParam.Keys.ValueKey == "" {
return nil, fmt.Errorf("valueKey is required")
}
timeout := m.Shards[0].Timeout
if timeout == 0 {
timeout = 60
}
timeoutCtx, cancel := context.WithTimeout(ctx, time.Duration(timeout)*time.Second)
defer cancel()
items, err := m.QueryTimeseries(timeoutCtx, &sqlbase.QueryParam{
Sql: mysqlQueryParam.SQL,
Keys: types.Keys{
ValueKey: mysqlQueryParam.Keys.ValueKey,
LabelKey: mysqlQueryParam.Keys.LabelKey,
TimeKey: mysqlQueryParam.Keys.TimeKey,
},
})
if err != nil {
logger.Warningf("query:%+v get data err:%v", mysqlQueryParam, err)
return []models.DataResp{}, err
}
data := make([]models.DataResp, 0)
for i := range items {
data = append(data, models.DataResp{
Ref: mysqlQueryParam.Ref,
Metric: items[i].Metric,
Values: items[i].Values,
})
}
return data, nil
}
func (m *MySQL) QueryLog(ctx context.Context, query interface{}) ([]interface{}, int64, error) {
mysqlQueryParam := new(QueryParam)
if err := mapstructure.Decode(query, mysqlQueryParam); err != nil {
return nil, 0, err
}
if strings.Contains(mysqlQueryParam.SQL, "$__") {
var err error
mysqlQueryParam.SQL, err = macros.Macro(mysqlQueryParam.SQL, mysqlQueryParam.From, mysqlQueryParam.To)
if err != nil {
return nil, 0, err
}
}
timeout := m.Shards[0].Timeout
if timeout == 0 {
timeout = 60
}
timeoutCtx, cancel := context.WithTimeout(ctx, time.Duration(timeout)*time.Second)
defer cancel()
items, err := m.Query(timeoutCtx, &sqlbase.QueryParam{
Sql: mysqlQueryParam.SQL,
})
if err != nil {
logger.Warningf("query:%+v get data err:%v", mysqlQueryParam, err)
return []interface{}{}, 0, err
}
logs := make([]interface{}, 0)
for i := range items {
logs = append(logs, items[i])
}
return logs, 0, nil
}
func (m *MySQL) DescribeTable(ctx context.Context, query interface{}) ([]*types.ColumnProperty, error) {
mysqlQueryParam := new(QueryParam)
if err := mapstructure.Decode(query, mysqlQueryParam); err != nil {
return nil, err
}
return m.DescTable(ctx, mysqlQueryParam.Database, mysqlQueryParam.Table)
}

View File

@@ -0,0 +1,401 @@
package opensearch
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net"
"net/http"
"net/url"
"reflect"
"regexp"
"sort"
"strings"
"time"
"github.com/ccfos/nightingale/v6/datasource"
"github.com/ccfos/nightingale/v6/datasource/commons/eslike"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/tlsx"
"github.com/mitchellh/mapstructure"
"github.com/olivere/elastic/v7"
oscliv2 "github.com/opensearch-project/opensearch-go/v2"
osapiv2 "github.com/opensearch-project/opensearch-go/v2/opensearchapi"
)
const (
OpenSearchType = "opensearch"
)
type OpenSearch struct {
Addr string `json:"os.addr" mapstructure:"os.addr"`
Nodes []string `json:"os.nodes" mapstructure:"os.nodes"`
Timeout int64 `json:"os.timeout" mapstructure:"os.timeout"` // millis
Basic BasicAuth `json:"os.basic" mapstructure:"os.basic"`
TLS TLS `json:"os.tls" mapstructure:"os.tls"`
Version string `json:"os.version" mapstructure:"os.version"`
Headers map[string]string `json:"os.headers" mapstructure:"os.headers"`
MinInterval int `json:"os.min_interval" mapstructure:"os.min_interval"` // seconds
MaxShard int `json:"os.max_shard" mapstructure:"os.max_shard"`
ClusterName string `json:"os.cluster_name" mapstructure:"os.cluster_name"`
Client *oscliv2.Client `json:"os.client" mapstructure:"os.client"`
}
type TLS struct {
SkipTlsVerify bool `json:"os.tls.skip_tls_verify" mapstructure:"os.tls.skip_tls_verify"`
}
type BasicAuth struct {
Enable bool `json:"os.auth.enable" mapstructure:"os.auth.enable"`
Username string `json:"os.user" mapstructure:"os.user"`
Password string `json:"os.password" mapstructure:"os.password"`
}
func init() {
datasource.RegisterDatasource(OpenSearchType, new(OpenSearch))
}
func (os *OpenSearch) Init(settings map[string]interface{}) (datasource.Datasource, error) {
newest := new(OpenSearch)
err := mapstructure.Decode(settings, newest)
return newest, err
}
func (os *OpenSearch) InitClient() error {
transport := &http.Transport{
Proxy: http.ProxyFromEnvironment,
DialContext: (&net.Dialer{
Timeout: time.Duration(os.Timeout) * time.Millisecond,
}).DialContext,
ResponseHeaderTimeout: time.Duration(os.Timeout) * time.Millisecond,
}
if len(os.Nodes) > 0 {
os.Addr = os.Nodes[0]
}
if strings.Contains(os.Addr, "https") {
tlsConfig := tlsx.ClientConfig{
InsecureSkipVerify: os.TLS.SkipTlsVerify,
UseTLS: true,
}
cfg, err := tlsConfig.TLSConfig()
if err != nil {
return err
}
transport.TLSClientConfig = cfg
}
headers := http.Header{}
for k, v := range os.Headers {
headers[k] = []string{v}
}
options := oscliv2.Config{
Addresses: os.Nodes,
Transport: transport,
Header: headers,
}
// 只要有用户名就添加认证,不依赖 Enable 字段
if os.Basic.Username != "" {
options.Username = os.Basic.Username
options.Password = os.Basic.Password
}
var err = error(nil)
os.Client, err = oscliv2.NewClient(options)
return err
}
func (os *OpenSearch) Equal(other datasource.Datasource) bool {
sort.Strings(os.Nodes)
sort.Strings(other.(*OpenSearch).Nodes)
if strings.Join(os.Nodes, ",") != strings.Join(other.(*OpenSearch).Nodes, ",") {
return false
}
if os.Basic.Username != other.(*OpenSearch).Basic.Username {
return false
}
if os.Basic.Password != other.(*OpenSearch).Basic.Password {
return false
}
if os.TLS.SkipTlsVerify != other.(*OpenSearch).TLS.SkipTlsVerify {
return false
}
if os.Timeout != other.(*OpenSearch).Timeout {
return false
}
if !reflect.DeepEqual(os.Headers, other.(*OpenSearch).Headers) {
return false
}
return true
}
func (os *OpenSearch) Validate(ctx context.Context) (err error) {
if len(os.Nodes) == 0 {
return fmt.Errorf("need a valid addr")
}
for _, addr := range os.Nodes {
_, err = url.Parse(addr)
if err != nil {
return fmt.Errorf("parse addr error: %v", err)
}
}
// 如果提供了用户名,必须同时提供密码
if len(os.Basic.Username) > 0 && len(os.Basic.Password) == 0 {
return fmt.Errorf("password is required when username is provided")
}
if os.MaxShard == 0 {
os.MaxShard = 5
}
if os.MinInterval < 10 {
os.MinInterval = 10
}
if os.Timeout == 0 {
os.Timeout = 6000
}
if !strings.HasPrefix(os.Version, "2") {
return fmt.Errorf("version must be 2.0+")
}
return nil
}
func (os *OpenSearch) MakeLogQuery(ctx context.Context, query interface{}, eventTags []string, start, end int64) (interface{}, error) {
return eslike.MakeLogQuery(ctx, query, eventTags, start, end)
}
func (os *OpenSearch) MakeTSQuery(ctx context.Context, query interface{}, eventTags []string, start, end int64) (interface{}, error) {
return eslike.MakeTSQuery(ctx, query, eventTags, start, end)
}
func search(ctx context.Context, indices []string, source interface{}, timeout int, cli *oscliv2.Client) (*elastic.SearchResult, error) {
var body *bytes.Buffer = nil
if source != nil {
body = new(bytes.Buffer)
err := json.NewEncoder(body).Encode(source)
if err != nil {
return nil, err
}
}
req := osapiv2.SearchRequest{
Index: indices,
Body: body,
}
if timeout > 0 {
req.Timeout = time.Second * time.Duration(timeout)
}
resp, err := req.Do(ctx, cli)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return nil, fmt.Errorf("opensearch response not 2xx, resp is %v", resp)
}
bs, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
}
result := new(elastic.SearchResult)
err = json.Unmarshal(bs, &result)
if err != nil {
return nil, err
}
return result, nil
}
func (os *OpenSearch) QueryData(ctx context.Context, queryParam interface{}) ([]models.DataResp, error) {
search := func(ctx context.Context, indices []string, source interface{}, timeout int, maxShard int) (*elastic.SearchResult, error) {
return search(ctx, indices, source, timeout, os.Client)
}
return eslike.QueryData(ctx, queryParam, os.Timeout, os.Version, search)
}
func (os *OpenSearch) QueryIndices() ([]string, error) {
cir := osapiv2.CatIndicesRequest{
Format: "json",
}
rsp, err := cir.Do(context.Background(), os.Client)
if err != nil {
return nil, err
}
defer rsp.Body.Close()
bs, err := io.ReadAll(rsp.Body)
if err != nil {
return nil, err
}
resp := make([]struct {
Index string `json:"index"`
}, 0)
err = json.Unmarshal(bs, &resp)
if err != nil {
return nil, err
}
var ret []string
for _, k := range resp {
ret = append(ret, k.Index)
}
return ret, nil
}
func (os *OpenSearch) QueryFields(indices []string) ([]string, error) {
var fields []string
mappingRequest := osapiv2.IndicesGetMappingRequest{
Index: indices,
}
resp, err := mappingRequest.Do(context.Background(), os.Client)
if err != nil {
return fields, err
}
defer resp.Body.Close()
bs, err := io.ReadAll(resp.Body)
if err != nil {
return fields, err
}
result := map[string]interface{}{}
err = json.Unmarshal(bs, &result)
if err != nil {
return fields, err
}
idx := ""
if len(indices) > 0 {
idx = indices[0]
}
mappingIndex := ""
indexReg, _ := regexp.Compile(idx)
for key, value := range result {
mappings, ok := value.(map[string]interface{})
if !ok {
continue
}
if len(mappings) == 0 {
continue
}
if key == idx || strings.Contains(key, idx) ||
(indexReg != nil && indexReg.MatchString(key)) {
mappingIndex = key
break
}
}
if len(mappingIndex) == 0 {
return fields, nil
}
fields = propertyMappingRange(result[mappingIndex], 1)
sort.Strings(fields)
return fields, nil
}
func propertyMappingRange(v interface{}, depth int) (fields []string) {
mapping, ok := v.(map[string]interface{})
if !ok {
return
}
if len(mapping) == 0 {
return
}
for key, value := range mapping {
if reflect.TypeOf(value).Kind() == reflect.Map {
valueMap := value.(map[string]interface{})
if prop, found := valueMap["properties"]; found {
subFields := propertyMappingRange(prop, depth+1)
for i := range subFields {
if depth == 1 {
fields = append(fields, subFields[i])
} else {
fields = append(fields, key+"."+subFields[i])
}
}
} else if typ, found := valueMap["type"]; found {
if eslike.HitFilter(typ.(string)) {
continue
}
fields = append(fields, key)
}
}
}
return
}
func (os *OpenSearch) QueryLog(ctx context.Context, queryParam interface{}) ([]interface{}, int64, error) {
search := func(ctx context.Context, indices []string, source interface{}, timeout int, maxShard int) (*elastic.SearchResult, error) {
return search(ctx, indices, source, timeout, os.Client)
}
return eslike.QueryLog(ctx, queryParam, os.Timeout, os.Version, 0, search)
}
func (os *OpenSearch) QueryFieldValue(indexs []string, field string, query string) ([]string, error) {
var values []string
source := elastic.NewSearchSource().
Size(0)
if query != "" {
source = source.Query(elastic.NewBoolQuery().Must(elastic.NewQueryStringQuery(query)))
}
source = source.Aggregation("distinct", elastic.NewTermsAggregation().Field(field).Size(10000))
result, err := search(context.Background(), indexs, source, 0, os.Client)
if err != nil {
return values, err
}
agg, found := result.Aggregations.Terms("distinct")
if !found {
return values, nil
}
for _, bucket := range agg.Buckets {
values = append(values, bucket.Key.(string))
}
return values, nil
}
func (os *OpenSearch) QueryMapData(ctx context.Context, query interface{}) ([]map[string]string, error) {
return nil, nil
}

View File

@@ -0,0 +1,358 @@
package postgresql
import (
"context"
"fmt"
"regexp"
"strings"
"time"
"github.com/ccfos/nightingale/v6/datasource"
"github.com/ccfos/nightingale/v6/pkg/macros"
"github.com/ccfos/nightingale/v6/dskit/postgres"
"github.com/ccfos/nightingale/v6/dskit/sqlbase"
"github.com/ccfos/nightingale/v6/dskit/types"
"github.com/ccfos/nightingale/v6/models"
"github.com/mitchellh/mapstructure"
"github.com/toolkits/pkg/logger"
)
const (
PostgreSQLType = "pgsql"
)
var (
regx = `(?i)from\s+((?:"[^"]+"|[a-zA-Z0-9_]+))\.((?:"[^"]+"|[a-zA-Z0-9_]+))\.((?:"[^"]+"|[a-zA-Z0-9_]+))`
)
func init() {
datasource.RegisterDatasource(PostgreSQLType, new(PostgreSQL))
}
type PostgreSQL struct {
Shards []*postgres.PostgreSQL `json:"pgsql.shards" mapstructure:"pgsql.shards"`
}
type QueryParam struct {
Ref string `json:"ref" mapstructure:"ref"`
Database string `json:"database" mapstructure:"database"`
Table string `json:"table" mapstructure:"table"`
SQL string `json:"sql" mapstructure:"sql"`
Keys datasource.Keys `json:"keys" mapstructure:"keys"`
From int64 `json:"from" mapstructure:"from"`
To int64 `json:"to" mapstructure:"to"`
}
func (p *PostgreSQL) InitClient() error {
if len(p.Shards) == 0 {
return fmt.Errorf("not found postgresql addr, please check datasource config")
}
for _, shard := range p.Shards {
if db, err := shard.NewConn(context.TODO(), "postgres"); err != nil {
defer sqlbase.CloseDB(db)
return err
}
}
return nil
}
func (p *PostgreSQL) Init(settings map[string]interface{}) (datasource.Datasource, error) {
newest := new(PostgreSQL)
err := mapstructure.Decode(settings, newest)
return newest, err
}
func (p *PostgreSQL) Validate(ctx context.Context) error {
if len(p.Shards) == 0 || len(strings.TrimSpace(p.Shards[0].Addr)) == 0 {
return fmt.Errorf("postgresql addr is invalid, please check datasource setting")
}
if len(strings.TrimSpace(p.Shards[0].User)) == 0 {
return fmt.Errorf("postgresql user is invalid, please check datasource setting")
}
return nil
}
// Equal compares whether two objects are the same, used for caching
func (p *PostgreSQL) Equal(d datasource.Datasource) bool {
newest, ok := d.(*PostgreSQL)
if !ok {
logger.Errorf("unexpected plugin type, expected is postgresql")
return false
}
if len(p.Shards) == 0 || len(newest.Shards) == 0 {
return false
}
oldShard := p.Shards[0]
newShard := newest.Shards[0]
if oldShard.Addr != newShard.Addr {
return false
}
if oldShard.User != newShard.User {
return false
}
if oldShard.Password != newShard.Password {
return false
}
if oldShard.MaxQueryRows != newShard.MaxQueryRows {
return false
}
if oldShard.Timeout != newShard.Timeout {
return false
}
if oldShard.MaxIdleConns != newShard.MaxIdleConns {
return false
}
if oldShard.MaxOpenConns != newShard.MaxOpenConns {
return false
}
if oldShard.ConnMaxLifetime != newShard.ConnMaxLifetime {
return false
}
return true
}
func (p *PostgreSQL) ShowDatabases(ctx context.Context) ([]string, error) {
return p.Shards[0].ShowDatabases(ctx, "")
}
func (p *PostgreSQL) ShowTables(ctx context.Context, database string) ([]string, error) {
p.Shards[0].DB = database
rets, err := p.Shards[0].ShowTables(ctx, "")
if err != nil {
return nil, err
}
tables := make([]string, 0, len(rets))
for scheme, tabs := range rets {
for _, tab := range tabs {
tables = append(tables, scheme+"."+tab)
}
}
return tables, nil
}
func (p *PostgreSQL) MakeLogQuery(ctx context.Context, query interface{}, eventTags []string, start, end int64) (interface{}, error) {
return nil, nil
}
func (p *PostgreSQL) MakeTSQuery(ctx context.Context, query interface{}, eventTags []string, start, end int64) (interface{}, error) {
return nil, nil
}
func (p *PostgreSQL) QueryMapData(ctx context.Context, query interface{}) ([]map[string]string, error) {
return nil, nil
}
func (p *PostgreSQL) QueryData(ctx context.Context, query interface{}) ([]models.DataResp, error) {
postgresqlQueryParam := new(QueryParam)
if err := mapstructure.Decode(query, postgresqlQueryParam); err != nil {
return nil, err
}
postgresqlQueryParam.SQL = formatSQLDatabaseNameWithRegex(postgresqlQueryParam.SQL)
if strings.Contains(postgresqlQueryParam.SQL, "$__") {
var err error
postgresqlQueryParam.SQL, err = macros.Macro(postgresqlQueryParam.SQL, postgresqlQueryParam.From, postgresqlQueryParam.To)
if err != nil {
return nil, err
}
}
if postgresqlQueryParam.Database != "" {
p.Shards[0].DB = postgresqlQueryParam.Database
} else {
db, err := parseDBName(postgresqlQueryParam.SQL)
if err != nil {
return nil, err
}
p.Shards[0].DB = db
}
timeout := p.Shards[0].Timeout
if timeout == 0 {
timeout = 60
}
timeoutCtx, cancel := context.WithTimeout(ctx, time.Duration(timeout)*time.Second)
defer cancel()
items, err := p.Shards[0].QueryTimeseries(timeoutCtx, &sqlbase.QueryParam{
Sql: postgresqlQueryParam.SQL,
Keys: types.Keys{
ValueKey: postgresqlQueryParam.Keys.ValueKey,
LabelKey: postgresqlQueryParam.Keys.LabelKey,
TimeKey: postgresqlQueryParam.Keys.TimeKey,
},
})
if err != nil {
logger.Warningf("query:%+v get data err:%v", postgresqlQueryParam, err)
return []models.DataResp{}, err
}
data := make([]models.DataResp, 0)
for i := range items {
data = append(data, models.DataResp{
Ref: postgresqlQueryParam.Ref,
Metric: items[i].Metric,
Values: items[i].Values,
})
}
// parse resp to time series data
logger.Infof("req:%+v keys:%+v \n data:%v", postgresqlQueryParam, postgresqlQueryParam.Keys, data)
return data, nil
}
func (p *PostgreSQL) QueryLog(ctx context.Context, query interface{}) ([]interface{}, int64, error) {
postgresqlQueryParam := new(QueryParam)
if err := mapstructure.Decode(query, postgresqlQueryParam); err != nil {
return nil, 0, err
}
if postgresqlQueryParam.Database != "" {
p.Shards[0].DB = postgresqlQueryParam.Database
} else {
db, err := parseDBName(postgresqlQueryParam.SQL)
if err != nil {
return nil, 0, err
}
p.Shards[0].DB = db
}
postgresqlQueryParam.SQL = formatSQLDatabaseNameWithRegex(postgresqlQueryParam.SQL)
if strings.Contains(postgresqlQueryParam.SQL, "$__") {
var err error
postgresqlQueryParam.SQL, err = macros.Macro(postgresqlQueryParam.SQL, postgresqlQueryParam.From, postgresqlQueryParam.To)
if err != nil {
return nil, 0, err
}
}
timeout := p.Shards[0].Timeout
if timeout == 0 {
timeout = 60
}
timeoutCtx, cancel := context.WithTimeout(ctx, time.Duration(timeout)*time.Second)
defer cancel()
items, err := p.Shards[0].Query(timeoutCtx, &sqlbase.QueryParam{
Sql: postgresqlQueryParam.SQL,
})
if err != nil {
logger.Warningf("query:%+v get data err:%v", postgresqlQueryParam, err)
return []interface{}{}, 0, err
}
logs := make([]interface{}, 0)
for i := range items {
logs = append(logs, items[i])
}
return logs, 0, nil
}
func (p *PostgreSQL) DescribeTable(ctx context.Context, query interface{}) ([]*types.ColumnProperty, error) {
postgresqlQueryParam := new(QueryParam)
if err := mapstructure.Decode(query, postgresqlQueryParam); err != nil {
return nil, err
}
p.Shards[0].DB = postgresqlQueryParam.Database
pairs := strings.Split(postgresqlQueryParam.Table, ".") // format: scheme.table_name
scheme := ""
table := postgresqlQueryParam.Table
if len(pairs) == 2 {
scheme = pairs[0]
table = pairs[1]
}
return p.Shards[0].DescTable(ctx, scheme, table)
}
func parseDBName(sql string) (db string, err error) {
re := regexp.MustCompile(regx)
matches := re.FindStringSubmatch(sql)
if len(matches) != 4 {
return "", fmt.Errorf("no valid table name in format database.schema.table found")
}
return strings.Trim(matches[1], `"`), nil
}
// formatSQLDatabaseNameWithRegex 只对 dbname.scheme.tabname 格式进行数据库名称格式化,转为 "dbname".scheme.tabname
// 在pgsql中大小写是通过"" 双引号括起来区分的,默认pg都是转为小写的所以这里转为 "dbname".scheme."tabname"
func formatSQLDatabaseNameWithRegex(sql string) string {
// 匹配 from dbname.scheme.table_name 的模式
// 使用捕获组来精确匹配数据库名称确保后面跟着scheme和table
re := regexp.MustCompile(`(?i)\bfrom\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\.\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*\.\s*([a-zA-Z_][a-zA-Z0-9_]*)`)
return re.ReplaceAllString(sql, `from "$1"."$2"."$3"`)
}
func extractColumns(sql string) ([]string, error) {
// 将 SQL 转换为小写以简化匹配
sql = strings.ToLower(sql)
// 匹配 SELECT 和 FROM 之间的内容
re := regexp.MustCompile(`select\s+(.*?)\s+from`)
matches := re.FindStringSubmatch(sql)
if len(matches) < 2 {
return nil, fmt.Errorf("no columns found or invalid SQL syntax")
}
// 提取列部分
columnsString := matches[1]
// 分割列
columns := splitColumns(columnsString)
// 清理每个列名
for i, col := range columns {
columns[i] = strings.TrimSpace(col)
}
return columns, nil
}
func splitColumns(columnsString string) []string {
var columns []string
var currentColumn strings.Builder
parenthesesCount := 0
inQuotes := false
for _, char := range columnsString {
switch char {
case '(':
parenthesesCount++
currentColumn.WriteRune(char)
case ')':
parenthesesCount--
currentColumn.WriteRune(char)
case '\'', '"':
inQuotes = !inQuotes
currentColumn.WriteRune(char)
case ',':
if parenthesesCount == 0 && !inQuotes {
columns = append(columns, currentColumn.String())
currentColumn.Reset()
} else {
currentColumn.WriteRune(char)
}
default:
currentColumn.WriteRune(char)
}
}
if currentColumn.Len() > 0 {
columns = append(columns, currentColumn.String())
}
return columns
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 384 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 345 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 336 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 497 KiB

BIN
doc/img/readme/logos.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 956 KiB

View File

@@ -5,7 +5,7 @@ RunMode = "release"
# log write dir
Dir = "logs"
# log level: DEBUG INFO WARNING ERROR
Level = "DEBUG"
Level = "INFO"
# stdout, stderr, file
Output = "stdout"
# # rotate by time
@@ -75,15 +75,6 @@ DefaultRoles = ["Standard"]
[HTTP.RSA]
# open RSA
OpenRSA = false
# Before replacing the key file, make sure that there are no encrypted variables in the database "configs".
# It is recommended to decrypt and remove all encrypted values from the database before replacing the key file.
# This will prevent any potential issues with accessing or decrypting the variables using the new key file.
# RSA public key (auto carete)
RSAPublicKeyPath = "etc/rsa/public.pem"
# RSA private key (auto carete)
RSAPrivateKeyPath = "etc/rsa/private.pem"
# RSA private key password
RSAPassWord = "n9e@n9e!"
[DB]
# postgres: DSN="host=127.0.0.1 port=5432 user=root dbname=n9e_v6 password=1234 sslmode=disable"
@@ -138,8 +129,6 @@ AlertDetail = false
[Pushgw]
# use target labels in database instead of in series
LabelRewrite = true
# # default busigroup key name
# BusiGroupLabelKey = "busigroup"
ForceUseServerTS = true
# [Pushgw.DebugSample]

View File

@@ -5,7 +5,7 @@ RunMode = "release"
# log write dir
Dir = "logs"
# log level: DEBUG INFO WARNING ERROR
Level = "DEBUG"
Level = "INFO"
# stdout, stderr, file
Output = "file"
# # rotate by time
@@ -71,15 +71,6 @@ DefaultRoles = ["Standard"]
[HTTP.RSA]
# open RSA
OpenRSA = false
# Before replacing the key file, make sure that there are no encrypted variables in the database "configs".
# It is recommended to decrypt and remove all encrypted values from the database before replacing the key file.
# This will prevent any potential issues with accessing or decrypting the variables using the new key file.
# RSA public key (auto carete)
RSAPublicKeyPath = "etc/rsa/public.pem"
# RSA private key (auto carete)
RSAPrivateKeyPath = "etc/rsa/private.pem"
# RSA private key password
RSAPassWord = "n9e@n9e!"
[DB]
# postgres: host=%s port=%s user=%s dbname=%s password=%s sslmode=%s
@@ -135,8 +126,6 @@ AlertDetail = true
[Pushgw]
# use target labels in database instead of in series
LabelRewrite = true
# # default busigroup key name
# BusiGroupLabelKey = "busigroup"
ForceUseServerTS = true
# [Pushgw.DebugSample]

View File

@@ -5,7 +5,7 @@ RunMode = "release"
# log write dir
Dir = "logs"
# log level: DEBUG INFO WARNING ERROR
Level = "DEBUG"
Level = "INFO"
# stdout, stderr, file
Output = "stdout"
# # rotate by time
@@ -71,15 +71,6 @@ DefaultRoles = ["Standard"]
[HTTP.RSA]
# open RSA
OpenRSA = false
# Before replacing the key file, make sure that there are no encrypted variables in the database "configs".
# It is recommended to decrypt and remove all encrypted values from the database before replacing the key file.
# This will prevent any potential issues with accessing or decrypting the variables using the new key file.
# RSA public key (auto carete)
RSAPublicKeyPath = "etc/rsa/public.pem"
# RSA private key (auto carete)
RSAPrivateKeyPath = "etc/rsa/private.pem"
# RSA private key password
RSAPassWord = "n9e@n9e!"
[DB]
# postgres: host=%s port=%s user=%s dbname=%s password=%s sslmode=%s
@@ -135,8 +126,6 @@ AlertDetail = true
[Pushgw]
# use target labels in database instead of in series
LabelRewrite = true
# # default busigroup key name
# BusiGroupLabelKey = "busigroup"
ForceUseServerTS = true
# [Pushgw.DebugSample]

View File

@@ -204,6 +204,7 @@ CREATE TABLE board (
public smallint not null default 0 ,
built_in smallint not null default 0 ,
hide smallint not null default 0 ,
public_cate bigint NOT NULL DEFAULT 0,
create_at bigint not null default 0,
create_by varchar(64) not null default '',
update_at bigint not null default 0,
@@ -217,6 +218,7 @@ COMMENT ON COLUMN board.tags IS 'split by space';
COMMENT ON COLUMN board.public IS '0:false 1:true';
COMMENT ON COLUMN board.built_in IS '0:false 1:true';
COMMENT ON COLUMN board.hide IS '0:false 1:true';
COMMENT ON COLUMN board.public_cate IS '0 anonymous 1 login 2 busi';
-- for dashboard new version
@@ -429,43 +431,31 @@ CREATE TABLE target (
ident varchar(191) not null,
note varchar(255) not null default '',
tags varchar(512) not null default '',
host_tags text,
host_ip varchar(15) default '',
agent_version varchar(255) default '',
engine_name varchar(255) default '',
os varchar(31) default '',
update_at bigint not null default 0,
PRIMARY KEY (id),
UNIQUE (ident)
);
CREATE INDEX ON target (group_id);
CREATE INDEX idx_host_ip ON target (host_ip);
CREATE INDEX idx_agent_version ON target (agent_version);
CREATE INDEX idx_engine_name ON target (engine_name);
CREATE INDEX idx_os ON target (os);
COMMENT ON COLUMN target.group_id IS 'busi group id';
COMMENT ON COLUMN target.ident IS 'target id';
COMMENT ON COLUMN target.note IS 'append to alert event as field';
COMMENT ON COLUMN target.tags IS 'append to series data as tags, split by space, append external space at suffix';
COMMENT ON COLUMN target.host_tags IS 'global labels set in conf file';
COMMENT ON COLUMN target.host_ip IS 'IPv4 string';
COMMENT ON COLUMN target.agent_version IS 'agent version';
COMMENT ON COLUMN target.engine_name IS 'engine_name';
-- case1: target_idents; case2: target_tags
-- CREATE TABLE collect_rule (
-- id bigserial,
-- group_id bigint not null default 0 comment 'busi group id',
-- cluster varchar(128) not null,
-- target_idents varchar(512) not null default '' comment 'ident list, split by space',
-- target_tags varchar(512) not null default '' comment 'filter targets by tags, split by space',
-- name varchar(191) not null default '',
-- note varchar(255) not null default '',
-- step int not null,
-- type varchar(64) not null comment 'e.g. port proc log plugin',
-- data text not null,
-- append_tags varchar(255) not null default '' comment 'split by space: e.g. mod=n9e dept=cloud',
-- create_at bigint not null default 0,
-- create_by varchar(64) not null default '',
-- update_at bigint not null default 0,
-- update_by varchar(64) not null default '',
-- PRIMARY KEY (id),
-- KEY (group_id, type, name)
-- ) ;
COMMENT ON COLUMN target.os IS 'os type';
CREATE TABLE metric_view (
id bigserial,
@@ -734,6 +724,7 @@ CREATE TABLE datasource
(
id serial,
name varchar(191) not null default '',
identifier varchar(255) not null default '',
description varchar(255) not null default '',
category varchar(255) not null default '',
plugin_id int not null default 0,
@@ -751,8 +742,8 @@ CREATE TABLE datasource
updated_by varchar(64) not null default '',
UNIQUE (name),
PRIMARY KEY (id)
) ;
) ;
CREATE TABLE builtin_cate (
id bigserial,
name varchar(191) not null,
@@ -795,10 +786,12 @@ CREATE TABLE es_index_pattern (
create_by varchar(64) default '',
update_at bigint default '0',
update_by varchar(64) default '',
note varchar(4096) not null default '',
PRIMARY KEY (id),
UNIQUE (datasource_id, name)
) ;
COMMENT ON COLUMN es_index_pattern.datasource_id IS 'datasource id';
COMMENT ON COLUMN es_index_pattern.note IS 'description of metric in Chinese';
CREATE TABLE builtin_metrics (
id bigserial,
@@ -813,6 +806,7 @@ CREATE TABLE builtin_metrics (
created_by varchar(191) NOT NULL DEFAULT '',
updated_at bigint NOT NULL DEFAULT 0,
updated_by varchar(191) NOT NULL DEFAULT '',
uuid BIGINT NOT NULL DEFAULT 0,
PRIMARY KEY (id),
UNIQUE (lang, collector, typ, name)
);
@@ -834,6 +828,7 @@ COMMENT ON COLUMN builtin_metrics.created_at IS 'create time';
COMMENT ON COLUMN builtin_metrics.created_by IS 'creator';
COMMENT ON COLUMN builtin_metrics.updated_at IS 'update time';
COMMENT ON COLUMN builtin_metrics.updated_by IS 'updater';
COMMENT ON COLUMN builtin_metrics.uuid IS 'unique identifier';
CREATE TABLE metric_filter (
id BIGSERIAL PRIMARY KEY,
@@ -903,4 +898,128 @@ CREATE TABLE dash_annotation (
create_by varchar(64) not null default '',
update_at bigint not null default 0,
update_by varchar(64) not null default ''
);
CREATE TABLE source_token (
id bigserial PRIMARY KEY,
source_type varchar(64) NOT NULL DEFAULT '',
source_id varchar(255) NOT NULL DEFAULT '',
token varchar(255) NOT NULL DEFAULT '',
expire_at bigint NOT NULL DEFAULT 0,
create_at bigint NOT NULL DEFAULT 0,
create_by varchar(64) NOT NULL DEFAULT ''
);
CREATE INDEX idx_source_token_type_id_token ON source_token (source_type, source_id, token);
CREATE TABLE notification_record (
id BIGSERIAL PRIMARY KEY,
notify_rule_id BIGINT NOT NULL DEFAULT 0,
event_id bigint NOT NULL,
sub_id bigint DEFAULT NULL,
channel varchar(255) NOT NULL,
status bigint DEFAULT NULL,
target varchar(1024) NOT NULL,
details varchar(2048) DEFAULT '',
created_at bigint NOT NULL
);
CREATE INDEX idx_evt ON notification_record (event_id);
COMMENT ON COLUMN notification_record.event_id IS 'event history id';
COMMENT ON COLUMN notification_record.sub_id IS 'subscribed rule id';
COMMENT ON COLUMN notification_record.channel IS 'notification channel name';
COMMENT ON COLUMN notification_record.status IS 'notification status';
COMMENT ON COLUMN notification_record.target IS 'notification target';
COMMENT ON COLUMN notification_record.details IS 'notification other info';
COMMENT ON COLUMN notification_record.created_at IS 'create time';
CREATE TABLE target_busi_group (
id BIGSERIAL PRIMARY KEY,
target_ident varchar(191) NOT NULL,
group_id bigint NOT NULL,
update_at bigint NOT NULL
);
CREATE UNIQUE INDEX idx_target_group ON target_busi_group (target_ident, group_id);
CREATE TABLE user_token (
id BIGSERIAL PRIMARY KEY,
username varchar(255) NOT NULL DEFAULT '',
token_name varchar(255) NOT NULL DEFAULT '',
token varchar(255) NOT NULL DEFAULT '',
create_at bigint NOT NULL DEFAULT 0,
last_used bigint NOT NULL DEFAULT 0
);
CREATE TABLE notify_rule (
id bigserial PRIMARY KEY,
name varchar(255) NOT NULL,
description text,
enable boolean DEFAULT false,
user_group_ids varchar(255) NOT NULL DEFAULT '',
notify_configs text,
pipeline_configs text,
create_at bigint NOT NULL DEFAULT 0,
create_by varchar(64) NOT NULL DEFAULT '',
update_at bigint NOT NULL DEFAULT 0,
update_by varchar(64) NOT NULL DEFAULT ''
);
CREATE TABLE notify_channel (
id bigserial PRIMARY KEY,
name varchar(255) NOT NULL,
ident varchar(255) NOT NULL,
description text,
enable boolean DEFAULT false,
param_config text,
request_type varchar(50) NOT NULL,
request_config text,
weight int NOT NULL DEFAULT 0,
create_at bigint NOT NULL DEFAULT 0,
create_by varchar(64) NOT NULL DEFAULT '',
update_at bigint NOT NULL DEFAULT 0,
update_by varchar(64) NOT NULL DEFAULT ''
);
CREATE TABLE message_template (
id bigserial PRIMARY KEY,
name varchar(64) NOT NULL,
ident varchar(64) NOT NULL,
content text,
user_group_ids varchar(64),
notify_channel_ident varchar(64) NOT NULL DEFAULT '',
private int NOT NULL DEFAULT 0,
weight int NOT NULL DEFAULT 0,
create_at bigint NOT NULL DEFAULT 0,
create_by varchar(64) NOT NULL DEFAULT '',
update_at bigint NOT NULL DEFAULT 0,
update_by varchar(64) NOT NULL DEFAULT ''
);
CREATE TABLE event_pipeline (
id bigserial PRIMARY KEY,
name varchar(128) NOT NULL,
team_ids text,
description varchar(255) NOT NULL DEFAULT '',
filter_enable smallint NOT NULL DEFAULT 0,
label_filters text,
attribute_filters text,
processors text,
create_at bigint NOT NULL DEFAULT 0,
create_by varchar(64) NOT NULL DEFAULT '',
update_at bigint NOT NULL DEFAULT 0,
update_by varchar(64) NOT NULL DEFAULT ''
);
CREATE TABLE embedded_product (
id bigserial PRIMARY KEY,
name varchar(255) DEFAULT NULL,
url varchar(255) DEFAULT NULL,
is_private boolean DEFAULT NULL,
team_ids varchar(255),
create_at bigint NOT NULL DEFAULT 0,
create_by varchar(64) NOT NULL DEFAULT '',
update_at bigint NOT NULL DEFAULT 0,
update_by varchar(64) NOT NULL DEFAULT ''
);

View File

@@ -5,7 +5,7 @@ RunMode = "release"
# log write dir
Dir = "logs"
# log level: DEBUG INFO WARNING ERROR
Level = "DEBUG"
Level = "INFO"
# stdout, stderr, file
Output = "stdout"
# # rotate by time
@@ -130,8 +130,6 @@ AlertDetail = true
[Pushgw]
# use target labels in database instead of in series
LabelRewrite = true
# # default busigroup key name
# BusiGroupLabelKey = "busigroup"
ForceUseServerTS = true
# [Pushgw.DebugSample]

View File

@@ -1,25 +0,0 @@
#### {{if .IsRecovered}}<font color="#008800">💚{{.RuleName}}</font>{{else}}<font color="#FF0000">💔{{.RuleName}}</font>{{end}}
---
{{$time_duration := sub now.Unix .FirstTriggerTime }}{{if .IsRecovered}}{{$time_duration = sub .LastEvalTime .FirstTriggerTime }}{{end}}
- **告警级别**: {{.Severity}}
{{- if .RuleNote}}
- **规则备注**: {{.RuleNote}}
{{- end}}
{{- if not .IsRecovered}}
- **当次触发时值**: {{.TriggerValue}}
- **当次触发时间**: {{timeformat .TriggerTime}}
- **告警持续时长**: {{humanizeDurationInterface $time_duration}}
{{- else}}
{{- if .AnnotationsJSON.recovery_value}}
- **恢复时值**: {{formatDecimal .AnnotationsJSON.recovery_value 4}}
{{- end}}
- **恢复时间**: {{timeformat .LastEvalTime}}
- **告警持续时长**: {{humanizeDurationInterface $time_duration}}
{{- end}}
- **告警事件标签**:
{{- range $key, $val := .TagsMap}}
{{- if ne $key "rulename" }}
- `{{$key}}`: `{{$val}}`
{{- end}}
{{- end}}

View File

@@ -1,224 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta http-equiv="X-UA-Compatible" content="ie=edge">
<title>夜莺告警通知</title>
<style type="text/css">
.wrapper {
background-color: #f8f8f8;
padding: 15px;
height: 100%;
}
.main {
width: 600px;
padding: 30px;
margin: 0 auto;
background-color: #fff;
font-size: 12px;
font-family: verdana,'Microsoft YaHei',Consolas,'Deja Vu Sans Mono','Bitstream Vera Sans Mono';
}
header {
border-radius: 2px 2px 0 0;
}
header .title {
font-size: 14px;
color: #333333;
margin: 0;
}
header .sub-desc {
color: #333;
font-size: 14px;
margin-top: 6px;
margin-bottom: 0;
}
hr {
margin: 20px 0;
height: 0;
border: none;
border-top: 1px solid #e5e5e5;
}
em {
font-weight: 600;
}
table {
margin: 20px 0;
width: 100%;
}
table tbody tr{
font-weight: 200;
font-size: 12px;
color: #666;
height: 32px;
}
.succ {
background-color: green;
color: #fff;
}
.fail {
background-color: red;
color: #fff;
}
.succ th, .succ td, .fail th, .fail td {
color: #fff;
}
table tbody tr th {
width: 80px;
text-align: right;
}
.text-right {
text-align: right;
}
.body {
margin-top: 24px;
}
.body-text {
color: #666666;
-webkit-font-smoothing: antialiased;
}
.body-extra {
-webkit-font-smoothing: antialiased;
}
.body-extra.text-right a {
text-decoration: none;
color: #333;
}
.body-extra.text-right a:hover {
color: #666;
}
.button {
width: 200px;
height: 50px;
margin-top: 20px;
text-align: center;
border-radius: 2px;
background: #2D77EE;
line-height: 50px;
font-size: 20px;
color: #FFFFFF;
cursor: pointer;
}
.button:hover {
background: rgb(25, 115, 255);
border-color: rgb(25, 115, 255);
color: #fff;
}
footer {
margin-top: 10px;
text-align: right;
}
.footer-logo {
text-align: right;
}
.footer-logo-image {
width: 108px;
height: 27px;
margin-right: 10px;
}
.copyright {
margin-top: 10px;
font-size: 12px;
text-align: right;
color: #999;
-webkit-font-smoothing: antialiased;
}
</style>
</head>
<body>
<div class="wrapper">
<div class="main">
<header>
<h3 class="title">{{.RuleName}}</h3>
<p class="sub-desc"></p>
</header>
<hr>
<div class="body">
<table cellspacing="0" cellpadding="0" border="0">
<tbody>
{{if .IsRecovered}}
<tr class="succ">
<th>级别状态:</th>
<td>S{{.Severity}} Recovered</td>
</tr>
{{else}}
<tr class="fail">
<th>级别状态:</th>
<td>S{{.Severity}} Triggered</td>
</tr>
{{end}}
<tr>
<th>策略备注:</th>
<td>{{.RuleNote}}</td>
</tr>
<tr>
<th>设备备注:</th>
<td>{{.TargetNote}}</td>
</tr>
{{if not .IsRecovered}}
<tr>
<th>触发时值:</th>
<td>{{.TriggerValue}}</td>
</tr>
{{end}}
{{if .TargetIdent}}
<tr>
<th>监控对象:</th>
<td>{{.TargetIdent}}</td>
</tr>
{{end}}
<tr>
<th>监控指标:</th>
<td>{{.TagsJSON}}</td>
</tr>
{{if .IsRecovered}}
<tr>
<th>恢复时间:</th>
<td>{{timeformat .LastEvalTime}}</td>
</tr>
{{else}}
<tr>
<th>触发时间:</th>
<td>
{{timeformat .TriggerTime}}
</td>
</tr>
{{end}}
<tr>
<th>发送时间:</th>
<td>
{{timestamp}}
</td>
</tr>
<tr>
<th>PromQL</th>
<td>
{{.PromQl}}
</td>
</tr>
</tbody>
</table>
<hr>
<footer>
<div class="copyright" style="font-style: italic">
报警太多?使用 <a href="https://flashcat.cloud/product/flashduty/" target="_blank">FlashDuty</a> 做告警聚合降噪、排班OnCall
</div>
</footer>
</div>
</div>
</div>
</body>
</html>

View File

@@ -1,7 +0,0 @@
级别状态: S{{.Severity}} {{if .IsRecovered}}Recovered{{else}}Triggered{{end}}
规则名称: {{.RuleName}}{{if .RuleNote}}
规则备注: {{.RuleNote}}{{end}}
监控指标: {{.TagsJSON}}
{{if .IsRecovered}}恢复时间:{{timeformat .LastEvalTime}}{{else}}触发时间: {{timeformat .TriggerTime}}
触发时值: {{.TriggerValue}}{{end}}
发送时间: {{timestamp}}

View File

@@ -1,7 +0,0 @@
级别状态: S{{.Severity}} {{if .IsRecovered}}Recovered{{else}}Triggered{{end}}
规则名称: {{.RuleName}}{{if .RuleNote}}
规则备注: {{.RuleNote}}{{end}}
监控指标: {{.TagsJSON}}
{{if .IsRecovered}}恢复时间:{{timeformat .LastEvalTime}}{{else}}触发时间: {{timeformat .TriggerTime}}
触发时值: {{.TriggerValue}}{{end}}
发送时间: {{timestamp}}

View File

@@ -1 +0,0 @@
{{if .IsRecovered}}Recovered{{else}}Triggered{{end}}: {{.RuleName}} {{.TagsJSON}}

View File

@@ -1,7 +0,0 @@
**级别状态**: {{if .IsRecovered}}<font color="info">S{{.Severity}} Recovered</font>{{else}}<font color="warning">S{{.Severity}} Triggered</font>{{end}}
**规则标题**: {{.RuleName}}{{if .RuleNote}}
**规则备注**: {{.RuleNote}}{{end}}
**监控指标**: {{.TagsJSON}}
{{if .IsRecovered}}**恢复时间**{{timeformat .LastEvalTime}}{{else}}**触发时间**: {{timeformat .TriggerTime}}
**触发时值**: {{.TriggerValue}}{{end}}
**发送时间**: {{timestamp}}

View File

@@ -1,7 +0,0 @@
**级别状态**: {{if .IsRecovered}}<font color="info">S{{.Severity}} Recovered</font>{{else}}<font color="warning">S{{.Severity}} Triggered</font>{{end}}
**规则标题**: {{.RuleName}}{{if .RuleNote}}
**规则备注**: {{.RuleNote}}{{end}}
**监控指标**: {{.TagsJSON}}
{{if .IsRecovered}}**恢复时间**{{timeformat .LastEvalTime}}{{else}}**触发时间**: {{timeformat .TriggerTime}}
**触发时值**: {{.TriggerValue}}{{end}}
**发送时间**: {{timestamp}}

View File

@@ -107,12 +107,6 @@ insert into `role_operation`(role_name, operation) values('Standard', '/help/mig
insert into `role_operation`(role_name, operation) values('Standard', '/alert-rules-built-in');
insert into `role_operation`(role_name, operation) values('Standard', '/dashboards-built-in');
insert into `role_operation`(role_name, operation) values('Standard', '/trace/dependencies');
insert into `role_operation`(role_name, operation) values('Admin', '/help/source');
insert into `role_operation`(role_name, operation) values('Admin', '/help/sso');
insert into `role_operation`(role_name, operation) values('Admin', '/help/notification-tpls');
insert into `role_operation`(role_name, operation) values('Admin', '/help/notification-settings');
insert into `role_operation`(role_name, operation) values('Standard', '/users');
insert into `role_operation`(role_name, operation) values('Standard', '/user-groups');
insert into `role_operation`(role_name, operation) values('Standard', '/user-groups/add');
@@ -291,6 +285,8 @@ CREATE TABLE `alert_rule` (
`append_tags` varchar(255) not null default '' comment 'split by space: service=n9e mod=api',
`annotations` text not null comment 'annotations',
`extra_config` text,
`notify_rule_ids` varchar(1024) DEFAULT '',
`notify_version` int DEFAULT 0,
`create_at` bigint not null default 0,
`create_by` varchar(64) not null default '',
`update_at` bigint not null default 0,
@@ -351,6 +347,8 @@ CREATE TABLE `alert_subscribe` (
`extra_config` text,
`redefine_webhooks` tinyint(1) default 0,
`for_duration` bigint not null default 0,
`notify_rule_ids` varchar(1024) DEFAULT '',
`notify_version` int DEFAULT 0,
`create_at` bigint not null default 0,
`create_by` varchar(64) not null default '',
`update_at` bigint not null default 0,
@@ -467,6 +465,7 @@ CREATE TABLE `alert_cur_event` (
`rule_config` text not null comment 'annotations',
`tags` varchar(1024) not null default '' comment 'merge data_tags rule_tags, split by ,,',
`original_tags` text comment 'labels key=val,,k2=v2',
`notify_rule_ids` text COMMENT 'notify rule ids',
PRIMARY KEY (`id`),
KEY (`hash`),
KEY (`rule_id`),
@@ -509,6 +508,7 @@ CREATE TABLE `alert_his_event` (
`original_tags` text comment 'labels key=val,,k2=v2',
`annotations` text not null comment 'annotations',
`rule_config` text not null comment 'annotations',
`notify_rule_ids` text COMMENT 'notify rule ids',
PRIMARY KEY (`id`),
INDEX `idx_last_eval_time` (`last_eval_time`),
KEY (`hash`),
@@ -533,7 +533,7 @@ CREATE TABLE `builtin_components` (
`updated_by` varchar(191) NOT NULL DEFAULT '' COMMENT '''updater''',
`disabled` int NOT NULL DEFAULT 0 COMMENT '''is disabled or not''',
PRIMARY KEY (`id`),
UNIQUE KEY `idx_ident` (`ident`)
KEY (`ident`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
CREATE TABLE `builtin_payloads` (
@@ -560,6 +560,7 @@ CREATE TABLE `builtin_payloads` (
CREATE TABLE notification_record (
`id` BIGINT PRIMARY KEY AUTO_INCREMENT,
`notify_rule_id` BIGINT NOT NULL DEFAULT 0,
`event_id` bigint NOT NULL COMMENT 'event history id',
`sub_id` bigint COMMENT 'subscribed rule id',
`channel` varchar(255) NOT NULL COMMENT 'notification channel name',
@@ -640,6 +641,7 @@ CREATE TABLE `datasource`
(
`id` int unsigned NOT NULL AUTO_INCREMENT,
`name` varchar(191) not null default '',
`identifier` varchar(255) not null default '',
`description` varchar(255) not null default '',
`category` varchar(255) not null default '',
`plugin_id` int unsigned not null default 0,
@@ -696,6 +698,7 @@ CREATE TABLE `es_index_pattern` (
`allow_hide_system_indices` tinyint(1) not null default 0,
`fields_format` varchar(4096) not null default '',
`cross_cluster_enabled` int not null default 0,
`note` varchar(1024) not null default '',
`create_at` bigint default '0',
`create_by` varchar(64) default '',
`update_at` bigint default '0',
@@ -720,7 +723,6 @@ CREATE TABLE `builtin_metrics` (
`updated_by` varchar(191) NOT NULL DEFAULT '' COMMENT '''updater''',
`uuid` bigint NOT NULL DEFAULT 0 COMMENT '''uuid''',
PRIMARY KEY (`id`),
UNIQUE KEY `idx_collector_typ_name` (`lang`,`collector`, `typ`, `name`),
INDEX `idx_uuid` (`uuid`),
INDEX `idx_collector` (`collector`),
INDEX `idx_typ` (`typ`),
@@ -786,6 +788,7 @@ CREATE TABLE `notify_rule` (
`enable` tinyint(1) not null default 0,
`user_group_ids` varchar(255) not null default '',
`notify_configs` text,
`pipeline_configs` text,
`create_at` bigint not null default 0,
`create_by` varchar(64) not null default '',
`update_at` bigint not null default 0,
@@ -802,6 +805,7 @@ CREATE TABLE `notify_channel` (
`param_config` text,
`request_type` varchar(50) not null,
`request_config` text,
`weight` int not null default 0,
`create_at` bigint not null default 0,
`create_by` varchar(64) not null default '',
`update_at` bigint not null default 0,
@@ -817,6 +821,7 @@ CREATE TABLE `message_template` (
`user_group_ids` varchar(64),
`notify_channel_ident` varchar(64) not null default '',
`private` int not null default 0,
`weight` int not null default 0,
`create_at` bigint not null default 0,
`create_by` varchar(64) not null default '',
`update_at` bigint not null default 0,
@@ -824,6 +829,35 @@ CREATE TABLE `message_template` (
PRIMARY KEY (`id`)
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
CREATE TABLE `event_pipeline` (
`id` bigint unsigned not null auto_increment,
`name` varchar(128) not null,
`team_ids` text,
`description` varchar(255) not null default '',
`filter_enable` tinyint(1) not null default 0,
`label_filters` text,
`attr_filters` text,
`processor_configs` text,
`create_at` bigint not null default 0,
`create_by` varchar(64) not null default '',
`update_at` bigint not null default 0,
`update_by` varchar(64) not null default '',
PRIMARY KEY (`id`)
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
CREATE TABLE `embedded_product` (
`id` bigint unsigned NOT NULL AUTO_INCREMENT,
`name` varchar(255) DEFAULT NULL,
`url` varchar(255) DEFAULT NULL,
`is_private` boolean DEFAULT NULL,
`team_ids` varchar(255),
`create_at` bigint not null default 0,
`create_by` varchar(64) not null default '',
`update_at` bigint not null default 0,
`update_by` varchar(64) not null default '',
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
CREATE TABLE `task_meta`
(
`id` bigint unsigned NOT NULL AUTO_INCREMENT,
@@ -2180,4 +2214,16 @@ CREATE TABLE task_host_99
UNIQUE KEY `idx_id_host` (`id`, `host`),
PRIMARY KEY (`ii`)
) ENGINE = InnoDB
DEFAULT CHARSET = utf8mb4;
DEFAULT CHARSET = utf8mb4;
CREATE TABLE `source_token` (
`id` bigint unsigned NOT NULL AUTO_INCREMENT,
`source_type` varchar(64) NOT NULL DEFAULT '' COMMENT 'source type',
`source_id` varchar(255) NOT NULL DEFAULT '' COMMENT 'source identifier',
`token` varchar(255) NOT NULL DEFAULT '' COMMENT 'access token',
`expire_at` bigint NOT NULL DEFAULT 0 COMMENT 'expire timestamp',
`create_at` bigint NOT NULL DEFAULT 0 COMMENT 'create timestamp',
`create_by` varchar(64) NOT NULL DEFAULT '' COMMENT 'creator',
PRIMARY KEY (`id`),
KEY `idx_source_type_id_token` (`source_type`, `source_id`, `token`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;

View File

@@ -13,7 +13,6 @@ CREATE TABLE `builtin_metrics` (
`updated_at` bigint NOT NULL DEFAULT 0 COMMENT 'update time',
`updated_by` varchar(191) NOT NULL DEFAULT '' COMMENT 'updater',
PRIMARY KEY (`id`),
UNIQUE KEY `idx_collector_typ_name` (`lang`,`collector`, `typ`, `name`),
INDEX `idx_collector` (`collector`),
INDEX `idx_typ` (`typ`),
INDEX `idx_name` (`name`),
@@ -210,7 +209,77 @@ CREATE TABLE `message_template` (
PRIMARY KEY (`id`)
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
ALTER TABLE `alert_rule` ADD COLUMN `notify_rule_ids` varchar(1024) DEFAULT '';
ALTER TABLE `alert_rule` ADD COLUMN `notify_version` int DEFAULT 0;
ALTER TABLE `alert_subscribe` ADD COLUMN `notify_rule_ids` varchar(1024) DEFAULT '';
ALTER TABLE `alert_subscribe` ADD COLUMN `notify_version` int DEFAULT 0;
ALTER TABLE `notification_record` ADD COLUMN `notify_rule_id` BIGINT NOT NULL DEFAULT 0;
/* v8.0.0-beta.9 2025-03-17 */
ALTER TABLE `message_template` ADD COLUMN `weight` int not null default 0;
ALTER TABLE `notify_channel` ADD COLUMN `weight` int not null default 0;
/* v8.0.0-beta.11 2025-04-10 */
ALTER TABLE `es_index_pattern` ADD COLUMN `note` varchar(1024) not null default '';
ALTER TABLE `datasource` ADD COLUMN `identifier` varchar(255) not null default '';
/* v8.0.0-beta.11 2025-05-15 */
ALTER TABLE `notify_rule` ADD COLUMN `pipeline_configs` text;
CREATE TABLE `event_pipeline` (
`id` bigint unsigned not null auto_increment,
`name` varchar(128) not null,
`team_ids` text,
`description` varchar(255) not null default '',
`filter_enable` tinyint(1) not null default 0,
`attr_filters` text,
`processor_configs` text,
`create_at` bigint not null default 0,
`create_by` varchar(64) not null default '',
`update_at` bigint not null default 0,
`update_by` varchar(64) not null default '',
PRIMARY KEY (`id`)
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
/* v8.0.0 2025-05-15 */
CREATE TABLE `embedded_product` (
`id` bigint unsigned NOT NULL AUTO_INCREMENT,
`name` varchar(255) DEFAULT NULL,
`url` varchar(255) DEFAULT NULL,
`is_private` boolean DEFAULT NULL,
`team_ids` varchar(255),
`create_at` bigint not null default 0,
`create_by` varchar(64) not null default '',
`update_at` bigint not null default 0,
`update_by` varchar(64) not null default '',
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
/* v8.0.0 2025-05-29 */
CREATE TABLE `source_token` (
`id` bigint unsigned NOT NULL AUTO_INCREMENT,
`source_type` varchar(64) NOT NULL DEFAULT '' COMMENT 'source type',
`source_id` varchar(255) NOT NULL DEFAULT '' COMMENT 'source identifier',
`token` varchar(255) NOT NULL DEFAULT '' COMMENT 'access token',
`expire_at` bigint NOT NULL DEFAULT 0 COMMENT 'expire timestamp',
`create_at` bigint NOT NULL DEFAULT 0 COMMENT 'create timestamp',
`create_by` varchar(64) NOT NULL DEFAULT '' COMMENT 'creator',
PRIMARY KEY (`id`),
KEY `idx_source_type_id_token` (`source_type`, `source_id`, `token`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
/* Add translation column for builtin metrics */
ALTER TABLE `builtin_metrics` ADD COLUMN `translation` TEXT COMMENT 'translation of metric' AFTER `lang`;
/* v8.0.0-beta.12 2025-06-03 */
ALTER TABLE `alert_his_event` ADD COLUMN `notify_rule_ids` text COMMENT 'notify rule ids';
ALTER TABLE `alert_cur_event` ADD COLUMN `notify_rule_ids` text COMMENT 'notify rule ids';
/* v8.0.0-beta.13 */
-- 删除 builtin_metrics 表的 idx_collector_typ_name 唯一索引
DROP INDEX IF EXISTS `idx_collector_typ_name` ON `builtin_metrics`;

View File

@@ -656,7 +656,6 @@ CREATE TABLE `builtin_metrics` (
`uuid integer` not null default 0
);
CREATE UNIQUE INDEX idx_collector_typ_name ON builtin_metrics (lang, collector, typ, name);
CREATE INDEX idx_collector ON builtin_metrics (collector);
CREATE INDEX idx_typ ON builtin_metrics (typ);
CREATE INDEX idx_builtinmetric_name ON builtin_metrics (name);

View File

@@ -8,7 +8,11 @@ import (
"github.com/ccfos/nightingale/v6/datasource"
_ "github.com/ccfos/nightingale/v6/datasource/ck"
_ "github.com/ccfos/nightingale/v6/datasource/doris"
"github.com/ccfos/nightingale/v6/datasource/es"
_ "github.com/ccfos/nightingale/v6/datasource/mysql"
_ "github.com/ccfos/nightingale/v6/datasource/opensearch"
_ "github.com/ccfos/nightingale/v6/datasource/postgresql"
"github.com/ccfos/nightingale/v6/dskit/tdengine"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
@@ -18,6 +22,8 @@ import (
var FromAPIHook func()
var DatasourceProcessHook func(items []datasource.DatasourceInfo) []datasource.DatasourceInfo
func Init(ctx *ctx.Context, fromAPI bool) {
go getDatasourcesFromDBLoop(ctx, fromAPI)
}
@@ -47,6 +53,7 @@ var PromDefaultDatasourceId int64
func getDatasourcesFromDBLoop(ctx *ctx.Context, fromAPI bool) {
for {
if !fromAPI {
foundDefaultDatasource := false
items, err := models.GetDatasources(ctx)
if err != nil {
logger.Errorf("get datasource from database fail: %v", err)
@@ -58,6 +65,7 @@ func getDatasourcesFromDBLoop(ctx *ctx.Context, fromAPI bool) {
for _, item := range items {
if item.PluginType == "prometheus" && item.IsDefault {
atomic.StoreInt64(&PromDefaultDatasourceId, item.Id)
foundDefaultDatasource = true
}
logger.Debugf("get datasource: %+v", item)
@@ -78,8 +86,6 @@ func getDatasourcesFromDBLoop(ctx *ctx.Context, fromAPI bool) {
if item.PluginType == "elasticsearch" {
esN9eToDatasourceInfo(&ds, item)
} else if item.PluginType == "opensearch" {
osN9eToDatasourceInfo(&ds, item)
} else if item.PluginType == "tdengine" {
tdN9eToDatasourceInfo(&ds, item)
} else {
@@ -90,6 +96,16 @@ func getDatasourcesFromDBLoop(ctx *ctx.Context, fromAPI bool) {
}
dss = append(dss, ds)
}
if !foundDefaultDatasource && atomic.LoadInt64(&PromDefaultDatasourceId) != 0 {
logger.Debugf("no default datasource found")
atomic.StoreInt64(&PromDefaultDatasourceId, 0)
}
if DatasourceProcessHook != nil {
dss = DatasourceProcessHook(dss)
}
PutDatasources(dss)
} else {
FromAPIHook()
@@ -134,24 +150,6 @@ func esN9eToDatasourceInfo(ds *datasource.DatasourceInfo, item models.Datasource
ds.Settings["es.enable_write"] = item.SettingsJson["enable_write"]
}
// for opensearch
func osN9eToDatasourceInfo(ds *datasource.DatasourceInfo, item models.Datasource) {
ds.Settings = make(map[string]interface{})
ds.Settings["os.nodes"] = []string{item.HTTPJson.Url}
ds.Settings["os.timeout"] = item.HTTPJson.Timeout
ds.Settings["os.basic"] = es.BasicAuth{
Username: item.AuthJson.BasicAuthUser,
Password: item.AuthJson.BasicAuthPassword,
}
ds.Settings["os.tls"] = es.TLS{
SkipTlsVerify: item.HTTPJson.TLS.SkipTlsVerify,
}
ds.Settings["os.version"] = item.SettingsJson["version"]
ds.Settings["os.headers"] = item.HTTPJson.Headers
ds.Settings["os.min_interval"] = item.SettingsJson["min_interval"]
ds.Settings["os.max_shard"] = item.SettingsJson["max_shard"]
}
func PutDatasources(items []datasource.DatasourceInfo) {
ids := make([]int64, 0)
for _, item := range items {
@@ -171,7 +169,7 @@ func PutDatasources(items []datasource.DatasourceInfo) {
ds, err := datasource.GetDatasourceByType(typ, item.Settings)
if err != nil {
logger.Warningf("get plugin:%+v fail: %v", item, err)
logger.Debugf("get plugin:%+v fail: %v", item, err)
continue
}
@@ -183,7 +181,14 @@ func PutDatasources(items []datasource.DatasourceInfo) {
ids = append(ids, item.Id)
// 异步初始化 client 不然数据源同步的会很慢
go DsCache.Put(typ, item.Id, ds)
go func() {
defer func() {
if r := recover(); r != nil {
logger.Errorf("panic in datasource item: %+v panic:%v", item, r)
}
}()
DsCache.Put(typ, item.Id, ds)
}()
}
logger.Debugf("get plugin by type success Ids:%v", ids)

View File

@@ -129,9 +129,7 @@ func (c *Clickhouse) QueryRows(ctx context.Context, query string) (*sql.Rows, er
// ShowDatabases lists all databases in Clickhouse
func (c *Clickhouse) ShowDatabases(ctx context.Context) ([]string, error) {
var (
res []string
)
res := make([]string, 0)
rows, err := c.QueryRows(ctx, ShowDatabases)
if err != nil {
@@ -151,9 +149,7 @@ func (c *Clickhouse) ShowDatabases(ctx context.Context) ([]string, error) {
// ShowTables lists all tables in a given database
func (c *Clickhouse) ShowTables(ctx context.Context, database string) ([]string, error) {
var (
res []string
)
res := make([]string, 0)
showTables := fmt.Sprintf(ShowTables, database)
rows, err := c.QueryRows(ctx, showTables)

543
dskit/doris/doris.go Normal file
View File

@@ -0,0 +1,543 @@
package doris
import (
"context"
"database/sql"
"encoding/json"
"errors"
"fmt"
"reflect"
"strings"
"time"
"unicode"
"github.com/ccfos/nightingale/v6/dskit/pool"
"github.com/ccfos/nightingale/v6/dskit/types"
_ "github.com/go-sql-driver/mysql" // MySQL driver
"github.com/mitchellh/mapstructure"
)
// Doris struct to hold connection details and the connection object
type Doris struct {
Addr string `json:"doris.addr" mapstructure:"doris.addr"` // fe mysql endpoint
FeAddr string `json:"doris.fe_addr" mapstructure:"doris.fe_addr"` // fe http endpoint
User string `json:"doris.user" mapstructure:"doris.user"` //
Password string `json:"doris.password" mapstructure:"doris.password"` //
Timeout int `json:"doris.timeout" mapstructure:"doris.timeout"`
MaxIdleConns int `json:"doris.max_idle_conns" mapstructure:"doris.max_idle_conns"`
MaxOpenConns int `json:"doris.max_open_conns" mapstructure:"doris.max_open_conns"`
ConnMaxLifetime int `json:"doris.conn_max_lifetime" mapstructure:"doris.conn_max_lifetime"`
MaxQueryRows int `json:"doris.max_query_rows" mapstructure:"doris.max_query_rows"`
ClusterName string `json:"doris.cluster_name" mapstructure:"doris.cluster_name"`
EnableWrite bool `json:"doris.enable_write" mapstructure:"doris.enable_write"`
}
// NewDorisWithSettings initializes a new Doris instance with the given settings
func NewDorisWithSettings(ctx context.Context, settings interface{}) (*Doris, error) {
newest := new(Doris)
settingsMap := map[string]interface{}{}
if reflect.TypeOf(settings).Kind() == reflect.String {
if err := json.Unmarshal([]byte(settings.(string)), &settingsMap); err != nil {
return nil, err
}
} else {
var assert bool
settingsMap, assert = settings.(map[string]interface{})
if !assert {
return nil, errors.New("settings type invalid")
}
}
if err := mapstructure.Decode(settingsMap, newest); err != nil {
return nil, err
}
return newest, nil
}
// NewConn establishes a new connection to Doris
func (d *Doris) NewConn(ctx context.Context, database string) (*sql.DB, error) {
if len(d.Addr) == 0 {
return nil, errors.New("empty fe-node addr")
}
// Set default values similar to postgres implementation
if d.Timeout == 0 {
d.Timeout = 60
}
if d.MaxIdleConns == 0 {
d.MaxIdleConns = 10
}
if d.MaxOpenConns == 0 {
d.MaxOpenConns = 100
}
if d.ConnMaxLifetime == 0 {
d.ConnMaxLifetime = 14400
}
if d.MaxQueryRows == 0 {
d.MaxQueryRows = 500
}
var keys []string
keys = append(keys, d.Addr)
keys = append(keys, d.Password, d.User)
if len(database) > 0 {
keys = append(keys, database)
}
cachedkey := strings.Join(keys, ":")
// cache conn with database
conn, ok := pool.PoolClient.Load(cachedkey)
if ok {
return conn.(*sql.DB), nil
}
var db *sql.DB
var err error
defer func() {
if db != nil && err == nil {
pool.PoolClient.Store(cachedkey, db)
}
}()
// Simplified connection logic for Doris using MySQL driver
dsn := fmt.Sprintf("%s:%s@tcp(%s)/%s?charset=utf8", d.User, d.Password, d.Addr, database)
db, err = sql.Open("mysql", dsn)
if err != nil {
return nil, err
}
// Set connection pool configuration
db.SetMaxIdleConns(d.MaxIdleConns)
db.SetMaxOpenConns(d.MaxOpenConns)
db.SetConnMaxLifetime(time.Duration(d.ConnMaxLifetime) * time.Second)
return db, nil
}
// createTimeoutContext creates a context with timeout based on Doris configuration
func (d *Doris) createTimeoutContext(ctx context.Context) (context.Context, context.CancelFunc) {
timeout := d.Timeout
if timeout == 0 {
timeout = 60
}
return context.WithTimeout(ctx, time.Duration(timeout)*time.Second)
}
// ShowDatabases lists all databases in Doris
func (d *Doris) ShowDatabases(ctx context.Context) ([]string, error) {
timeoutCtx, cancel := d.createTimeoutContext(ctx)
defer cancel()
db, err := d.NewConn(timeoutCtx, "")
if err != nil {
return []string{}, err
}
rows, err := db.QueryContext(timeoutCtx, "SHOW DATABASES")
if err != nil {
return nil, err
}
defer rows.Close()
databases := make([]string, 0)
for rows.Next() {
var dbName string
if err := rows.Scan(&dbName); err != nil {
continue
}
databases = append(databases, dbName)
}
return databases, nil
}
// ShowResources lists all resources with type resourceType in Doris
func (d *Doris) ShowResources(ctx context.Context, resourceType string) ([]string, error) {
timeoutCtx, cancel := d.createTimeoutContext(ctx)
defer cancel()
db, err := d.NewConn(timeoutCtx, "")
if err != nil {
return []string{}, err
}
// 使用 SHOW RESOURCES 命令
query := fmt.Sprintf("SHOW RESOURCES WHERE RESOURCETYPE = '%s'", resourceType)
rows, err := db.QueryContext(timeoutCtx, query)
if err != nil {
return nil, fmt.Errorf("failed to execute query: %w", err)
}
defer rows.Close()
distinctName := make(map[string]struct{})
// 获取列信息
columns, err := rows.Columns()
if err != nil {
return nil, fmt.Errorf("failed to get columns: %w", err)
}
// 准备接收数据的变量
values := make([]interface{}, len(columns))
valuePtrs := make([]interface{}, len(columns))
for i := range values {
valuePtrs[i] = &values[i]
}
// 遍历结果集
for rows.Next() {
err := rows.Scan(valuePtrs...)
if err != nil {
return nil, fmt.Errorf("error scanning row: %w", err)
}
// 提取资源名称并添加到 map 中(自动去重)
if name, ok := values[0].([]byte); ok {
distinctName[string(name)] = struct{}{}
} else if nameStr, ok := values[0].(string); ok {
distinctName[nameStr] = struct{}{}
}
}
if err := rows.Err(); err != nil {
return nil, fmt.Errorf("error iterating rows: %w", err)
}
// 将 map 转换为切片
resources := make([]string, 0)
for name := range distinctName {
resources = append(resources, name)
}
return resources, nil
}
// ShowTables lists all tables in a given database
func (d *Doris) ShowTables(ctx context.Context, database string) ([]string, error) {
timeoutCtx, cancel := d.createTimeoutContext(ctx)
defer cancel()
db, err := d.NewConn(timeoutCtx, database)
if err != nil {
return nil, err
}
query := fmt.Sprintf("SHOW TABLES IN %s", database)
rows, err := db.QueryContext(timeoutCtx, query)
if err != nil {
return nil, err
}
defer rows.Close()
tables := make([]string, 0)
for rows.Next() {
var tableName string
if err := rows.Scan(&tableName); err != nil {
continue
}
tables = append(tables, tableName)
}
return tables, nil
}
// DescTable describes the schema of a specified table in Doris
func (d *Doris) DescTable(ctx context.Context, database, table string) ([]*types.ColumnProperty, error) {
timeoutCtx, cancel := d.createTimeoutContext(ctx)
defer cancel()
db, err := d.NewConn(timeoutCtx, database)
if err != nil {
return nil, err
}
query := fmt.Sprintf("DESCRIBE %s.%s", database, table)
rows, err := db.QueryContext(timeoutCtx, query)
if err != nil {
return nil, err
}
defer rows.Close()
// 日志报表中需要把 .type 转化成内部类型
// TODO: 是否有复合类型, Array/JSON/Tuple/Nested, 是否有更多的类型
convertDorisType := func(origin string) (string, bool) {
lower := strings.ToLower(origin)
switch lower {
case "double":
return types.LogExtractValueTypeFloat, true
case "datetime", "date":
return types.LogExtractValueTypeDate, false
case "text":
return types.LogExtractValueTypeText, true
default:
if strings.Contains(lower, "int") {
return types.LogExtractValueTypeLong, true
}
// 日期类型统一按照.date处理
if strings.HasPrefix(lower, "date") {
return types.LogExtractValueTypeDate, false
}
if strings.HasPrefix(lower, "varchar") || strings.HasPrefix(lower, "char") {
return types.LogExtractValueTypeText, true
}
if strings.HasPrefix(lower, "decimal") {
return types.LogExtractValueTypeFloat, true
}
}
return origin, false
}
var columns []*types.ColumnProperty
for rows.Next() {
var (
field string
typ string
null string
key string
defaultValue sql.NullString
extra string
)
if err := rows.Scan(&field, &typ, &null, &key, &defaultValue, &extra); err != nil {
continue
}
type2, indexable := convertDorisType(typ)
columns = append(columns, &types.ColumnProperty{
Field: field,
Type: typ, // You might want to convert MySQL types to your custom types
Type2: type2,
Indexable: indexable,
})
}
return columns, nil
}
// SelectRows selects rows from a specified table in Doris based on a given query with MaxQueryRows check
func (d *Doris) SelectRows(ctx context.Context, database, table, query string) ([]map[string]interface{}, error) {
sql := fmt.Sprintf("SELECT * FROM %s.%s", database, table)
if query != "" {
sql += " " + query
}
// 检查查询结果行数
err := d.CheckMaxQueryRows(ctx, database, sql)
if err != nil {
return nil, err
}
return d.ExecQuery(ctx, database, sql)
}
// ExecQuery executes a given SQL query in Doris and returns the results
func (d *Doris) ExecQuery(ctx context.Context, database string, sql string) ([]map[string]interface{}, error) {
timeoutCtx, cancel := d.createTimeoutContext(ctx)
defer cancel()
db, err := d.NewConn(timeoutCtx, database)
if err != nil {
return nil, err
}
rows, err := db.QueryContext(timeoutCtx, sql)
if err != nil {
return nil, err
}
defer rows.Close()
columns, err := rows.Columns()
if err != nil {
return nil, err
}
var results []map[string]interface{}
for rows.Next() {
columnValues := make([]interface{}, len(columns))
columnPointers := make([]interface{}, len(columns))
for i := range columnValues {
columnPointers[i] = &columnValues[i]
}
if err := rows.Scan(columnPointers...); err != nil {
continue
}
rowMap := make(map[string]interface{})
for i, colName := range columns {
val := columnValues[i]
bytes, ok := val.([]byte)
if ok {
rowMap[colName] = string(bytes)
} else {
rowMap[colName] = val
}
}
results = append(results, rowMap)
}
return results, nil
}
// ExecContext executes a given SQL query in Doris and returns the results
func (d *Doris) ExecContext(ctx context.Context, database string, sql string) error {
timeoutCtx, cancel := d.createTimeoutContext(ctx)
defer cancel()
db, err := d.NewConn(timeoutCtx, database)
if err != nil {
return err
}
_, err = db.ExecContext(timeoutCtx, sql)
return err
}
// ExecBatchSQL 执行多条 SQL 语句
func (d *Doris) ExecBatchSQL(ctx context.Context, database string, sqlBatch string) error {
// 分割 SQL 语句
sqlStatements := SplitSQLStatements(sqlBatch)
// 逐条执行 SQL 语句
for _, ql := range sqlStatements {
// 跳过空语句
ql = strings.TrimSpace(ql)
if ql == "" {
continue
}
// 检查是否是 CREATE DATABASE 语句
isCreateDB := strings.HasPrefix(strings.ToUpper(ql), "CREATE DATABASE")
// strings.HasPrefix(strings.ToUpper(sql), "CREATE SCHEMA") // 暂时不支持CREATE SCHEMA
// 对于 CREATE DATABASE 语句,使用空数据库名连接
currentDB := database
if isCreateDB {
currentDB = ""
}
// 执行单条 SQLExecContext 内部已经包含超时处理
err := d.ExecContext(ctx, currentDB, ql)
if err != nil {
return fmt.Errorf("exec sql failed, sql:%s, err:%w", sqlBatch, err)
}
}
return nil
}
// SplitSQLStatements 将多条 SQL 语句分割成单独的语句
func SplitSQLStatements(sqlBatch string) []string {
var statements []string
var currentStatement strings.Builder
// 状态标记
var (
inString bool // 是否在字符串内
inComment bool // 是否在单行注释内
inMultilineComment bool // 是否在多行注释内
escaped bool // 前一个字符是否为转义字符
)
for i := 0; i < len(sqlBatch); i++ {
char := sqlBatch[i]
currentStatement.WriteByte(char)
// 处理转义字符
if inString && char == '\\' {
escaped = !escaped
continue
}
// 处理字符串
if char == '\'' && !inComment && !inMultilineComment {
if !escaped {
inString = !inString
}
escaped = false
continue
}
// 处理单行注释
if !inString && !inMultilineComment && !inComment && char == '-' && i+1 < len(sqlBatch) && sqlBatch[i+1] == '-' {
inComment = true
currentStatement.WriteByte(sqlBatch[i+1]) // 写入第二个'-'
i++
continue
}
// 处理多行注释开始
if !inString && !inComment && char == '/' && i+1 < len(sqlBatch) && sqlBatch[i+1] == '*' {
inMultilineComment = true
currentStatement.WriteByte(sqlBatch[i+1]) // 写入'*'
i++
continue
}
// 处理多行注释结束
if inMultilineComment && char == '*' && i+1 < len(sqlBatch) && sqlBatch[i+1] == '/' {
inMultilineComment = false
currentStatement.WriteByte(sqlBatch[i+1]) // 写入'/'
i++
continue
}
// 处理换行符,结束单行注释
if inComment && (char == '\n' || char == '\r') {
inComment = false
}
// 分割SQL语句
if char == ';' && !inString && !inMultilineComment && !inComment {
// 收集到分号后面的单行注释(如果有)
for j := i + 1; j < len(sqlBatch); j++ {
nextChar := sqlBatch[j]
// 检查是否是注释开始
if nextChar == '-' && j+1 < len(sqlBatch) && sqlBatch[j+1] == '-' {
// 找到了注释,添加到当前语句
currentStatement.WriteByte(nextChar) // 添加'-'
currentStatement.WriteByte(sqlBatch[j+1]) // 添加第二个'-'
j++
// 读取直到行尾
for k := j + 1; k < len(sqlBatch); k++ {
commentChar := sqlBatch[k]
currentStatement.WriteByte(commentChar)
j = k
if commentChar == '\n' || commentChar == '\r' {
break
}
}
i = j
break
} else if !isWhitespace(nextChar) {
// 非注释且非空白字符,停止收集
break
} else {
// 是空白字符,添加到当前语句
currentStatement.WriteByte(nextChar)
i = j
}
}
statements = append(statements, strings.TrimSpace(currentStatement.String()))
currentStatement.Reset()
continue
}
escaped = false
}
// 处理最后一条可能没有分号的语句
lastStatement := strings.TrimSpace(currentStatement.String())
if lastStatement != "" {
statements = append(statements, lastStatement)
}
return statements
}
// 判断字符是否为空白字符
func isWhitespace(c byte) bool {
return unicode.IsSpace(rune(c))
}

36
dskit/doris/logs.go Normal file
View File

@@ -0,0 +1,36 @@
package doris
import (
"context"
"sort"
)
// 日志相关的操作
const (
TimeseriesAggregationTimestamp = "__ts__"
)
// TODO: 待测试, MAP/ARRAY/STRUCT/JSON 等类型能否处理
func (d *Doris) QueryLogs(ctx context.Context, query *QueryParam) ([]map[string]interface{}, error) {
// 等同于 Query()
return d.Query(ctx, query)
}
// 本质是查询时序数据, 取第一组, SQL由上层封装, 不再做复杂的解析和截断
func (d *Doris) QueryHistogram(ctx context.Context, query *QueryParam) ([][]float64, error) {
values, err := d.QueryTimeseries(ctx, query)
if err != nil {
return [][]float64{}, nil
}
if len(values) > 0 && len(values[0].Values) > 0 {
items := values[0].Values
sort.Slice(items, func(i, j int) bool {
if len(items[i]) > 0 && len(items[j]) > 0 {
return items[i][0] < items[j][0]
}
return false
})
return items, nil
}
return [][]float64{}, nil
}

Some files were not shown because too many files have changed in this diff Show More