Compare commits

...

263 Commits

Author SHA1 Message Date
ning
b4c16b98f3 fix: edge eval with var 2025-03-17 15:51:11 +08:00
Yening Qin
30519fe909 refactor: optimize post func (#2455) 2025-02-08 18:27:56 +08:00
ning
5b8fb144ac refactor: print log 2025-01-22 20:17:22 +08:00
Yening Qin
696f058975 feat: es alert support nodata alert (#2446) 2025-01-21 18:03:47 +08:00
Yening Qin
6195904715 refactor: optimize data recv and push 2025-01-16 15:52:57 +08:00
Yening Qin
47ace00ffa refactor data recv limit (#2430) 2025-01-14 16:17:27 +08:00
Yening Qin
d8feeec345 refactor: change metric type filter (#2426) 2025-01-10 17:32:57 +08:00
ning
6a31521b62 fix: es group by int 2025-01-09 12:09:48 +08:00
ning
61512857a5 refactor: builtin_components 2025-01-08 20:48:37 +08:00
ning
cb56037ef8 refactor: change eval sleep 2025-01-08 19:40:04 +08:00
ning
2ebd64dfa0 Merge branch 'main' of github.com:ccfos/nightingale 2025-01-08 11:28:58 +08:00
ning
4d2ffdf096 refactor: change builtin component model 2025-01-08 11:24:50 +08:00
kongfei605
1915701ce0 chore: default interval for cloudwatch collecting (#2418) 2025-01-07 10:35:53 +08:00
kongfei605
7fd9cd5a3d chore: update tpl and readme for cloudwatch (#2417) 2025-01-06 19:13:45 +08:00
ning
0e2f386419 refactor: change tdengine 2025-01-06 17:29:58 +08:00
ning
b96b08fb9e refactor: change tdengine 2025-01-06 17:22:22 +08:00
ning
eebd1021de refactor: change tdengine 2025-01-06 17:04:46 +08:00
ning
ef61a4cfa7 refactor: change tdengine 2025-01-06 16:50:29 +08:00
ning
2563d2891d feat: add cron pattern validation for alert rule 2025-01-06 11:45:43 +08:00
ning
6ae8ef0d9f feat: add random delay before starting alert rule worker 2025-01-06 11:21:26 +08:00
Yening Qin
38adbefe9c feat: dashboard support add annotations (#2416) 2025-01-05 17:46:17 +08:00
Yening Qin
3f5e0c056d feat: support elasticsearch alert (#2400)
Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
2025-01-05 17:44:17 +08:00
flashbo
b0131a3799 feat: support setting builtin component to disabled (#2406) 2025-01-02 11:11:54 +08:00
Yening Qin
cbb03a7c63 refactor: optimize enum type for alert rule with var
Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
2024-12-28 22:18:39 +08:00
ning
080d412124 docs: add sql 2024-12-26 14:29:50 +08:00
ning
752e02f32d docs: add sql 2024-12-26 14:26:15 +08:00
CRISPpp
e05d59d72a refactor: update target update group part (#2388) 2024-12-25 15:35:19 +08:00
Yening Qin
854e30551a fix: docker compose of postgres init error (#2370) (#2392)
Co-authored-by: CRISPpp <78430796+CRISPpp@users.noreply.github.com>
2024-12-24 15:55:03 +08:00
Yening Qin
0b6dc5beba refactor get user from context (#2391)
Co-authored-by: flashbo <36443248+lwb0214@users.noreply.github.com>
2024-12-24 15:52:55 +08:00
ning
8685a95fa5 Merge branch 'main' of github.com:ccfos/nightingale 2024-12-24 15:40:22 +08:00
ning
7ca7fd8d66 refactor: event set AnnotationsJSON 2024-12-24 15:40:08 +08:00
Yening Qin
1b5dc81b6c fix: the dedup logic when adding tags to target (#2386)
Co-authored-by: flashbo <36443248+lwb0214@users.noreply.github.com>
2024-12-24 11:48:58 +08:00
Ulric Qin
04495f0892 set ignore_host to true 2024-12-20 18:10:48 +08:00
Yening Qin
8158ce1b90 refactor: global webhook add env proxy (#2375)
Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
2024-12-20 14:21:47 +08:00
Yening Qin
a43952e168 refactor: es_index_pattern add cross_cluster_enabled (#2372) 2024-12-19 14:12:27 +08:00
Yening Qin
5702fc81d0 refactor: group delete check (#2368)
Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
2024-12-18 17:00:18 +08:00
Xu Bin
7cc65a2ca7 refactor: add id for configsGetAll (#2361) 2024-12-16 20:38:53 +08:00
ning
7bb6c6541a chore: uodate gomod 2024-12-15 19:42:00 +08:00
ning
8b4cfe65e3 Merge branch 'main' of github.com:ccfos/nightingale 2024-12-13 10:56:27 +08:00
ning
7227de8c22 docs: update migrate.sql 2024-12-13 10:56:15 +08:00
CRISPpp
069e267af8 docs: update sqlite.sql (#2356) 2024-12-13 10:18:59 +08:00
ning
7c5c9a95c3 refactor: change sqlite driver 2024-12-12 21:32:51 +08:00
ning
e3da7f344b docs: update goreleaser.yaml 2024-12-12 21:12:57 +08:00
Yening Qin
dd741a177f docs: rename es integration 2024-12-12 19:27:36 +08:00
ning
4fdd25f020 docs: set HTTP.APIForService.Enable to false 2024-12-12 19:24:07 +08:00
Yening Qin
62350bfbc6 fix: alert rule with var (#2357) 2024-12-12 16:59:09 +08:00
CRISPpp
5ee1baaf07 feat: add config dir and config file check (#2350)
Co-authored-by: Yening Qin <710leo@gmail.com>
2024-12-12 13:24:11 +08:00
Xu Bin
fa12889f06 fix: alert rule check with var when not exact match (#2354) 2024-12-12 11:04:48 +08:00
Yening Qin
39306a5bf0 refactor: optimize webhook send (#2352) 2024-12-11 17:51:20 +08:00
ning
0aea38e564 refactor: write queue limit 2024-12-10 21:03:55 +08:00
CRISPpp
45e9253b2a feat: add global metric write rate control (#2347) 2024-12-10 20:43:02 +08:00
CRISPpp
9385ca9931 feat: add pre check for deleting busi_group (#2346) 2024-12-09 20:32:46 +08:00
ning
fdd3d14871 docs: change default db type to sqlite 2024-12-06 21:04:10 +08:00
Yening Qin
e890034c19 feat: auto init db (#2345)
Co-authored-by: CRISPpp <78430796+CRISPpp@users.noreply.github.com>
2024-12-06 20:32:17 +08:00
Yening Qin
3aaab9e6ad fix: event prom eval interval (#2343) 2024-12-06 20:24:49 +08:00
CRISPpp
7f7d707cfc fix: role_operation abnormal count (#2338) 2024-12-06 16:31:47 +08:00
Xu Bin
98402e9f8a fix: quotation mark for alert rule var (#2339) 2024-12-06 16:07:47 +08:00
Xu Bin
017094fd78 fix: var support for aggregate function (#2334) 2024-12-06 11:57:51 +08:00
Yening Qin
8b6b896362 feat: redis support miniredis type (#2337)
Co-authored-by: CRISPpp <78430796+CRISPpp@users.noreply.github.com>
2024-12-06 10:46:05 +08:00
ning
acaa00cfb6 refactor: migrate add more log 2024-12-05 17:55:27 +08:00
flashbo
87f3d8595d fix: targets filter logic (#2333) 2024-12-05 14:31:57 +08:00
flashbo
42791a374d feat: targets support sorting by time (#2331) 2024-12-05 14:20:30 +08:00
kongfei605
3855c25805 chore: update dashboards for mongodb (#2332) 2024-12-04 16:19:21 +08:00
Xu Bin
10ec0ccbd1 fix: alert rule cron eval (#2330) 2024-12-03 16:58:30 +08:00
flashbo
94cf304222 refactor: improve target bind bg logic (#2327) 2024-12-03 15:00:25 +08:00
ning
994de4635a docs: update n9e.sql 2024-12-02 20:18:32 +08:00
ning
9a0013a406 docs: update n9e.sql 2024-12-02 09:16:50 +08:00
CRISPpp
6dcd5dd01e docs: complete initialization n9e.sql (#2325) 2024-11-29 18:52:00 +08:00
ning
70126e3aec refactor: sync.map clear 2024-11-29 18:23:45 +08:00
ning
767482d358 refactor: optimize prom query 2024-11-28 15:58:31 +08:00
YangHgRi
9a46106cc0 refactor: specify parameter type in function to improve type safety and clarity (#2317) 2024-11-26 20:55:49 +08:00
Yening Qin
da9ea67cee feat: alert rule annotation support prom query template func (#2314)
Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
2024-11-22 16:58:00 +08:00
6666walnut
c13ecd780b fix: get busi-groups api err (#2313)
Co-authored-by: wangjing17 <wangjing17@foundersc.com>
2024-11-22 16:55:32 +08:00
ning
cab37c796a refactor: target_busi_group set utf8mb4_general_ci 2024-11-22 13:28:36 +08:00
ning
078578772b refactor: change builtin component logo type 2024-11-21 20:22:37 +08:00
Yening Qin
31883ec844 refactor: alert rule support cron (#2309)
Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
2024-11-21 13:14:19 +08:00
ning
6100cd084a refactor: event recovery notify 2024-11-21 11:00:30 +08:00
Yening Qin
b82e260d65 feat: alert rule support var (#2307)
Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
2024-11-20 20:34:33 +08:00
ning
3983386af3 fix: get notify-record api err 2024-11-20 20:02:38 +08:00
Ulric Qin
83f2054062 lock version of prom 2024-11-20 17:49:21 +08:00
Ulric Qin
83e0b3cb98 Merge branch 'main' of github.com:ccfos/nightingale 2024-11-20 17:23:29 +08:00
Ulric Qin
f6bfa17e2e update init sql 2024-11-20 17:23:17 +08:00
flashbo
3d8019b738 refactor: event notify record (#2296) 2024-11-19 20:25:17 +08:00
Ulric Qin
ee1be71be6 Merge branch 'main' of github.com:ccfos/nightingale 2024-11-19 19:47:14 +08:00
Ulric Qin
7f2fb459bb update mysql and redis dashboard 2024-11-19 19:47:00 +08:00
ning
fde6a9c75e refactor: change is_recovered type 2024-11-19 19:23:21 +08:00
Ulric Qin
a2b506e263 add input.redis in docker-compose 2024-11-19 17:28:56 +08:00
Ulric Qin
30024a4951 add redis dashboard 2024-11-19 17:26:33 +08:00
Ulric Qin
2c3996812a remove global labels: source=categraf 2024-11-19 17:24:10 +08:00
Ulric Qin
51d35900f2 add input.mysql in docker-compose/etc-categraf 2024-11-19 17:05:46 +08:00
Ulric Qin
852fd2ea6e add field is_recovered when call ibex 2024-11-19 16:49:28 +08:00
Ulric Qin
e1a57217ab update mysql dashboard 2024-11-19 11:28:22 +08:00
Ulric Qin
1e7dad1a67 Merge branch 'main' of github.com:ccfos/nightingale 2024-11-19 11:26:40 +08:00
Ulric Qin
534e40ad63 add mysql dashboard 2024-11-19 11:26:27 +08:00
CRISPpp
15daa3826c feat: add console log with n9e address and root username/passwd when init root (#2302) 2024-11-19 10:31:41 +08:00
ning
d5efb5b6d4 update go.mod 2024-11-19 10:29:39 +08:00
ning
7ebd776881 docs: update doris dashboard tpl 2024-11-18 20:03:29 +08:00
Ulric Qin
0e5cda1cee support proxy when call center 2024-11-18 16:04:15 +08:00
Ulric Qin
64dad19377 Merge branch 'main' of github.com:ccfos/nightingale 2024-11-18 16:01:12 +08:00
Ulric Qin
48f199f8f5 sender support ProxyFromEnvironment 2024-11-18 16:00:56 +08:00
Yening Qin
f7e4df7415 refactor: self monitor metric (#2285)
Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
2024-11-18 11:46:34 +08:00
ning
37fe01ab54 docs: update workflows 2024-11-15 17:45:17 +08:00
ning
cbfe661bce docs: update go version in mod 2024-11-15 17:35:54 +08:00
Yening Qin
890c12f0d4 feat: alert rule query add unit (#2299)
Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
2024-11-15 17:09:41 +08:00
Yening Qin
643c6c997c fix: proxy parse url (#2297) 2024-11-15 16:37:01 +08:00
robin
b201836b40 fix:create user info for notify_tpl (#2292) 2024-11-15 11:57:23 +08:00
robin
b5eced1540 docs: add doris template (#2260) 2024-11-14 22:50:45 +08:00
Yening Qin
a13004eab7 feat: allow override global webhook (#2257)
Co-authored-by: flashbo <36443248+lwb0214@users.noreply.github.com>
2024-11-14 22:35:23 +08:00
Yening Qin
a0c56548e5 refactor: migrate label (#2293) 2024-11-14 22:25:20 +08:00
ning
e3d97386a8 refactor: dash tpl uuid 2024-11-14 22:14:18 +08:00
ning
051b0ca045 Merge branch 'main' of github.com:ccfos/nightingale 2024-11-14 19:18:20 +08:00
ning
2941ced011 fix: import builtin board 2024-11-14 19:15:02 +08:00
Ulric Qin
97d6908edd fix mongodb dashboard 2024-11-14 18:28:52 +08:00
710leo
c7117b9461 fix: proxy api parse url 2024-11-13 23:00:49 +08:00
Yening Qin
78417b1d5b refactor: optimize rule datasource set (#2288)
Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
2024-11-13 20:28:50 +08:00
Yening Qin
79f3404810 refactor event notify (#2287) 2024-11-13 19:50:11 +08:00
ning
81e51c60eb refactor: subscribe add check 2024-11-12 20:26:56 +08:00
shardingHe
af9cd55ca5 docs: add metrics config for oracle (#2276)
Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2024-11-12 13:59:35 +08:00
710leo
d4afdb2b6e refactor: change log 2024-11-06 22:34:30 +08:00
flashbo
2befc8b0f1 refactor: migrate bg label (#2269) 2024-11-06 21:48:29 +08:00
Yening Qin
14fd2eb26d refactor: update tdengine query (#2270) 2024-11-06 20:27:21 +08:00
ning
0a938518d7 refactor: target_busi_group table name 2024-11-06 13:00:35 +08:00
ning
0eed5afa7e refactor: update target_busi_group character 2024-11-05 14:46:41 +08:00
Yening Qin
f82eaf0a1f refactor: optimize tdentine (#2262) 2024-11-04 17:33:18 +08:00
ning
f03278d68d refactor: append tags 2024-11-04 16:43:39 +08:00
shardingHe
7d1e143f60 docs: sync configurations for bind & ldap (#2253)
Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2024-11-02 16:49:49 +08:00
ning
078a0c7b1c refactor: prom query log 2024-11-01 15:28:23 +08:00
flashbo
d9cac65a18 refactor: improve prom_rule import (#2251) 2024-10-30 14:28:00 +08:00
ning
dd025ca87c refactor: migrate db and host_miss tag append 2024-10-30 14:20:16 +08:00
ning
04734b8940 Merge branch 'main' of github.com:ccfos/nightingale 2024-10-29 12:09:50 +08:00
ning
bf7bcf4196 docs: update notify tpl 2024-10-29 12:09:26 +08:00
ulricqin
16195abb89 Update docker-compose.yaml 2024-10-29 12:08:40 +08:00
ning
3f4891d65d refactor: event queue push 2024-10-28 20:51:21 +08:00
Yening Qin
102549c6a1 refactor: webhook send event (#2248)
Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
2024-10-28 20:33:29 +08:00
Yening Qin
5213b1d7f1 refactor: es update config (#2247)
Co-authored-by: flashbo <36443248+lwb0214@users.noreply.github.com>
2024-10-28 20:32:45 +08:00
Yening Qin
24de97fb1e refactor: update default engine name (#2245) 2024-10-28 15:50:52 +08:00
ning
9c2cf679e0 refactor: center set default engine_name 2024-10-28 13:37:55 +08:00
Yening Qin
2aa4941010 refactor: optimize recover notify(#2242)
Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
2024-10-25 16:53:44 +08:00
flashbo
a812f14442 refactor: record notify for callback (#2231) 2024-10-25 16:50:12 +08:00
flashbo
4fb7e8e2b5 refactor: fill group names in target (#2241) 2024-10-25 16:30:09 +08:00
ulricqin
113ad67104 Update README.md 2024-10-25 12:10:28 +08:00
flashbo
49d843540a refactor: add ExtraInfoMap in alert event (#2240) 2024-10-25 11:03:56 +08:00
Yening Qin
21f0e3310f fix: event relabel when target_label is blank (#2228)
Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
2024-10-24 14:09:41 +08:00
ulricqin
31b3434e87 Update README.md 2024-10-22 14:19:33 +08:00
ning
2576a0f815 fix: edge get all configs 2024-10-21 19:30:13 +08:00
ning
0ac4bc7421 docs: update linux dashboard tpl 2024-10-21 18:07:52 +08:00
ning
95e6ea98f4 refactor: prom client query api add retry 2024-10-21 17:57:31 +08:00
ning
dc60c74c0d docs: update automq dashboard tpl 2024-10-21 16:50:36 +08:00
ning
a15adc196d docs: update linux dashboard tpl 2024-10-21 16:35:53 +08:00
ning
f89ef04e85 refactor: optimize code robustness 2024-10-21 14:54:48 +08:00
Yening Qin
f55cd9b32e feat: config access log in web (#2227) 2024-10-21 12:11:19 +08:00
Xu Bin
305a898f8b feat: alert recover ckeck (#2226) 2024-10-21 12:07:54 +08:00
Yening Qin
60c31d8eb2 feat: support query set opration (#2225) 2024-10-20 21:18:12 +08:00
ning
7da49a8c68 refactor: update go.mod 2024-10-20 14:04:31 +08:00
flashbo
65b1410b09 refactor: support output logs to one file (#2209) 2024-10-20 14:02:44 +08:00
ning
3901671c0e docs: update n9e.sql 2024-10-18 15:24:33 +08:00
Xu Bin
9c02937e81 refactor: alert mute retain (#2223) 2024-10-18 12:08:31 +08:00
flashbo
0a255ee33a fix: unbind bgids when delete target (#2219) 2024-10-16 10:00:08 +08:00
Xu Bin
8dc198b4b1 fix: smtp update (#2213) 2024-10-12 11:37:14 +08:00
Yening Qin
9696f63a71 rename tpl name 2024-10-11 16:23:57 +08:00
Xu Bin
03f56f73b4 feat: ldap support multi basecn (#2198) 2024-10-08 16:06:21 +08:00
Ulric Qin
7b415c91af update qrcode 2024-10-08 15:40:34 +08:00
flashbo
2abf089444 feat: rule list add user nickname (#2201) 2024-10-08 15:25:25 +08:00
mt
e504dab359 fix: update router_alert_cur_event.go (#2210) 2024-10-03 00:27:31 +08:00
710leo
989ed62e8d refactor: update GetAnomalyPoint 2024-09-29 19:34:25 +08:00
nl594
b7197d10eb docs: add new ipmi dashboard (#2204)
* add new ipmi dashboard

* Update IPMI_by_prometheus.json

---------

Co-authored-by: niulong <niulong@xylink.com>
Co-authored-by: Yening Qin <710leo@gmail.com>
2024-09-29 13:24:56 +08:00
Xu Bin
f4de256388 refactor: target delete hook (#2202) 2024-09-27 15:43:57 +08:00
Xu Bin
3f5126923f feat: get build payload by UUID (#2203) 2024-09-27 15:43:18 +08:00
flashbo
5d3e70bc4c refactor: datasouce support force save (#2200) 2024-09-27 14:40:48 +08:00
710leo
bb2c5202ad Merge branch 'main' of github.com:ccfos/nightingale 2024-09-27 14:26:48 +08:00
710leo
3acf3d7bf9 refactor: migrate target bg 2024-09-27 14:26:35 +08:00
shardingHe
a79810b15d add deployment & statefulset dashboard (#2196)
Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2024-09-26 17:38:47 +08:00
710leo
f61cb532f8 Merge branch 'main' of github.com:ccfos/nightingale 2024-09-26 15:49:28 +08:00
710leo
34a5a752f4 refactor: update aconf check 2024-09-26 15:49:14 +08:00
Ulric Qin
9be3deeebd update wechat qrcode 2024-09-26 10:14:16 +08:00
710leo
2ceed84120 fix: host alert host filter by busigroup 2024-09-25 15:07:54 +08:00
710leo
8fbe257090 docs: update i18n 2024-09-24 16:27:51 +08:00
710leo
ae35d780c6 refactor: update busigroup del api 2024-09-24 15:49:14 +08:00
710leo
4d2cdfce53 optimize target fill group 2024-09-24 15:29:51 +08:00
710leo
a0e4d0d46e refactor: target bind api 2024-09-24 15:20:21 +08:00
710leo
dd07d04e2f refactor: update target api 2024-09-24 14:37:27 +08:00
710leo
61203e8b75 feat: add boards api 2024-09-24 10:27:43 +08:00
710leo
f24bc53c94 refactor: update target bind group api 2024-09-23 13:13:09 +08:00
710leo
ef6abe3fdc refactor: update target bind api 2024-09-22 23:00:32 +08:00
710leo
461361d3d0 fix: heartbeat api auth check for n9e-edge 2024-09-22 21:05:31 +08:00
710leo
52b3afbd97 fix: recovery event tags map split 2024-09-22 19:14:24 +08:00
710leo
652439bb85 Merge branch 'main' of github.com:ccfos/nightingale 2024-09-22 00:33:40 +08:00
710leo
6f0c13d4e7 fix: edge target cache 2024-09-22 00:33:28 +08:00
ulricqin
c9f46bad02 Remove duplicate fields UseTLS 2024-09-21 20:44:02 +08:00
710leo
75146f3626 docs: add target_busi_group sql 2024-09-20 18:14:15 +08:00
710leo
50aafbd73d refactor: update target query 2024-09-20 18:09:07 +08:00
710leo
b975cb3c9d refactor: update append_labels 2024-09-20 16:24:47 +08:00
flashbo
11deb4ba26 feat: host bind muti group (#2185) 2024-09-19 20:32:08 +08:00
flashbo
ec927297d6 feat:support query alert event by rule id (#2179) 2024-09-19 11:04:14 +08:00
Yening Qin
f476d7cd63 fix: incorrect content in feishucard when sending a large number of messages (#2180) 2024-09-18 18:00:13 +08:00
ulricqin
410f3bbceb Update README.md wechat qrcode 2024-09-18 08:13:42 +08:00
cui fliter
2ad53d6862 refactor: make uids in NotifyTarget (#2169) 2024-09-13 19:26:18 +08:00
710leo
fc392e4af1 docs: update linux metrics tpl 2024-09-13 19:10:33 +08:00
fangpsh
9c83c7881a docs: update oom_kill alert rule tpl (#2170)
Co-authored-by: fangpsh <fangpsh@zego.im>
2024-09-13 19:07:08 +08:00
flashbo
f1259d1dff refactor: alert rule callback url dedup (#2165) 2024-09-13 16:24:04 +08:00
Yening Qin
d9d59b3205 fix: recording rule update (#2168) 2024-09-13 16:20:48 +08:00
Ulric Qin
d11cfb0278 Merge branch 'main' of github.com:ccfos/nightingale 2024-09-09 11:49:37 +08:00
Ulric Qin
5adcfc6eaa update README 2024-09-09 11:49:26 +08:00
710leo
037152ad72 refactor: update alert-cur-event api 2024-09-03 18:17:28 +08:00
Ulric Qin
2de304d4f2 move sqlite.sql to docker dir 2024-09-03 17:51:35 +08:00
Ulric Qin
03c56d048f modify column trigger_value to text 2024-09-03 17:50:13 +08:00
Ulric Qin
1cddb4eca0 Merge branch 'main' of github.com:ccfos/nightingale 2024-09-03 17:46:57 +08:00
Ulric Qin
2dc033944d bugfix InitBuiltinPayloads 2024-09-03 17:46:43 +08:00
flashbo
63e6c78e71 feat: targets support multi idents query (#2119) 2024-09-03 15:32:05 +08:00
Ulric Qin
e1f04eebe7 update README 2024-08-30 17:58:49 +08:00
Yening Qin
ce17e09f66 feat: notify event add target info (#2137)
* fix: tpl center update (#2125)
* put target into alert cur event (#2128)

---------

Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
Co-authored-by: flashbo <36443248+lwb0214@users.noreply.github.com>
2024-08-30 16:28:31 +08:00
710leo
c98c1d3b90 docs: update sql 2024-08-30 16:19:50 +08:00
710leo
ae3218e6d5 fix: target query api 2024-08-30 11:27:16 +08:00
Yening Qin
7497cc0f28 refactor: update alert rule clone api (#2126) 2024-08-28 20:15:25 +08:00
710leo
96c4cc7c98 refactor: import prom rule api 2024-08-28 15:54:39 +08:00
710leo
1f7314f6b4 refactor: sub rule event enable run notify script 2024-08-27 15:48:09 +08:00
yangkaa
86d478a0d4 fix: update notify template failed (#2117)
Signed-off-by: yangk <yangk@goodrain.com>
2024-08-27 11:26:05 +08:00
710leo
b45023630f merge main 2024-08-27 11:14:58 +08:00
710leo
2177049487 refactor: update target tag api 2024-08-27 11:14:20 +08:00
shardingHe
d3d1e7019f docs: update jvm dashboards (SpringBoot Actuator) (#2121) 2024-08-27 10:46:02 +08:00
710leo
f2ad0b9594 refactor: update targets api 2024-08-26 20:34:04 +08:00
Yening Qin
9c79233b3c feat: target add host tags (#2120)
* update target tags (#2091)

---------

Co-authored-by: flashbo <36443248+lwb0214@users.noreply.github.com>
2024-08-26 19:41:38 +08:00
710leo
9ea5de1257 refactor: users get api 2024-08-26 11:56:57 +08:00
ning
3ec97665ac refactor: set e.FirstTriggerTime 2024-08-22 20:18:19 +08:00
ning
bb4eeca2ab code refactor 2024-08-22 15:38:47 +08:00
ning
cc6a5be27f Merge branch 'main' of github.com:ccfos/nightingale 2024-08-22 15:02:32 +08:00
ning
630df8a954 fix: recover event when prom_for_duration is 0 2024-08-22 15:02:26 +08:00
ning
e28ab6368b fix: recover event when prom_for_duration is 0 2024-08-22 14:30:30 +08:00
ulricqin
751c78be4b Update a-n9e.sql 2024-08-22 10:01:04 +08:00
Xu Bin
5311bf90d5 feat: trigger set support operation (#2107) 2024-08-21 20:16:31 +08:00
ning
c464689c6a Merge branch 'main' of github.com:ccfos/nightingale 2024-08-21 19:56:10 +08:00
ning
442426be38 fix: event first trigger time 2024-08-21 19:55:56 +08:00
Yening Qin
9a28139d43 refactor: hostmeta add config (#2112) 2024-08-20 15:26:13 +08:00
710leo
25b768188f refactor: matchTag add strings.TrimSpace 2024-08-19 21:46:22 +08:00
ning
b794b62960 refactor: RecoverAlertCurEventFromDb 2024-08-19 17:33:40 +08:00
flashbo
d7e00a5a49 refactor: import promethues alert rule (improve api) (#2104) 2024-08-16 11:23:53 +08:00
Xu Bin
19e6cfe7d2 feat: generic are supported in alert rule calculation formulas (#2097) 2024-08-15 17:26:24 +08:00
shardingHe
63baa7b6f3 docs: remove process-exporter alerts & dashboards (#2102)
Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2024-08-15 16:57:41 +08:00
Vicla
407fc90677 update readme_en (#2099) 2024-08-14 12:20:11 +08:00
Dan218
7da4c99d92 feat: Provide Target.AfterFind to automatically calculated fields for Target (#2073) 2024-08-13 19:16:40 +08:00
Xu Bin
6b46e7e83f feat: support clone rules by idents (#2095) 2024-08-13 19:12:31 +08:00
ning
514ccd5f90 Merge branch 'main' of github.com:ccfos/nightingale 2024-08-13 15:11:52 +08:00
ning
4565b80717 fix: edge delete event 2024-08-13 15:11:40 +08:00
Ulric Qin
2bac6588c4 Merge branch 'main' of github.com:ccfos/nightingale 2024-08-13 11:25:42 +08:00
Ulric Qin
fc293cb01c use node_uname_info as dashboard var filter 2024-08-13 11:25:30 +08:00
ning
73f9548242 Merge branch 'main' of github.com:ccfos/nightingale 2024-08-13 10:59:39 +08:00
ning
7c91e51c08 fix: edge record notify 2024-08-13 10:59:17 +08:00
qinguoyi
a4867c406d fix:get configcache return nil need exit (#2094) 2024-08-12 19:52:22 +08:00
qinguoyi
bfea83ae75 fix: alert_cur_event return unmarshal json err (#2090) 2024-08-12 13:06:58 +08:00
Yening Qin
7a2832c377 fix: process recover duration (#2092) 2024-08-12 13:04:08 +08:00
ning
3f6c54a712 refactor: subscribe not run ibex and script 2024-08-09 17:42:48 +08:00
Yening Qin
1bb590ce6d feat: support record event notify detail (#2088)
* feat: record alert notification (#2045)

* record notification

---------

Co-authored-by: wenbo <bupt.lwb@gmail.com>
Co-authored-by: wenbo <1027758873@qq.com>
2024-08-09 17:06:49 +08:00
ning
656326458f refactor: event add task tpl name 2024-08-08 22:41:51 +08:00
Yening Qin
c6ab3ad2b3 feat: alert rule support import promethues alert rule (#2080) (#2085)
Co-authored-by: flashbo <36443248+lwb0214@users.noreply.github.com>
2024-08-07 16:05:10 +08:00
Yening Qin
d050cf72e9 Update elasticsearch_by_categraf.json 2024-08-06 16:42:49 +08:00
ning
084cc1893e docs: update migrate sql 2024-08-06 14:54:40 +08:00
Yening Qin
cd01123b59 feat: alert rule support add task tpl (#2079) 2024-08-05 17:54:23 +08:00
ning
23ce84d41c refactor: optimize event relabel process 2024-08-05 11:43:21 +08:00
Yening Qin
4764cc2419 feat: alert rule batch update support annotations (#2074)
* feat: batch update annotation (#2072)

* fix: annotations_del

---------

Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
2024-08-02 13:05:11 +08:00
ning
da66401576 docs: update tpl doc 2024-08-02 11:17:53 +08:00
ning
0024c9d99c docs: add migrate sql 2024-08-01 18:56:16 +08:00
flashbo
96d3b48f10 feat: target add os type (#2071) 2024-08-01 17:08:59 +08:00
Yening Qin
6a0e7a810f refactor: webhook notify support batch send events (#2070) 2024-08-01 15:25:39 +08:00
238 changed files with 31540 additions and 5563 deletions

View File

@@ -5,7 +5,7 @@ on:
tags:
- 'v*'
env:
GO_VERSION: 1.18
GO_VERSION: 1.23
jobs:
goreleaser:

3
.gitignore vendored
View File

@@ -9,6 +9,7 @@
*.o
*.a
*.so
*.db
*.sw[po]
*.tar.gz
*.[568vq]
@@ -64,3 +65,5 @@ queries.active
/n9e-*
n9e.sql
!/datasource

View File

@@ -29,15 +29,16 @@
## 夜莺 Nightingale 是什么
夜莺监控是一款开源云原生观测分析工具,采用 All-in-One 的设计理念,集数据采集、可视化、监控告警、数据分析于一体,与云原生生态紧密集成,提供开箱即用的企业级监控分析和告警能力。夜莺于 2020 年 3 月 20 日,在 github 上发布 v1 版本,已累计迭代 100 多个版本。
夜莺监控是一款开源云原生观测分析工具,采用 All-in-One 的设计理念,集数据采集、可视化、监控告警、数据分析于一体,与云原生生态紧密集成,提供开箱即用的企业级监控分析和告警能力。夜莺于 2020 年 3 月 20 日,在 GitHub 上发布 v1 版本,已累计迭代 100 多个版本。
夜莺最初由滴滴开发和开源,并于 2022 年 5 月 11 日捐赠予中国计算机学会开源发展委员会CCF ODC为 CCF ODC 成立后接受捐赠的第一个开源项目。夜莺的核心研发团队,也是 Open-Falcon 项目原核心研发人员,从 2014 年Open-Falcon 是 2014 年开源)算起来,也有 10 年了,只为把监控这个事情做好。
## 快速开始
- 👉[文档中心](https://flashcat.cloud/docs/) | [下载中心](https://flashcat.cloud/download/nightingale/)
- ❤️[报告 Bug](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=&projects=&template=question.yml)
- ℹ️为了提供更快速的访问体验,上述文档和下载站点托管于 [FlashcatCloud](https://flashcat.cloud)
- 👉 [文档中心](https://flashcat.cloud/docs/) | [下载中心](https://flashcat.cloud/download/nightingale/)
- ❤️ [报告 Bug](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=&projects=&template=question.yml)
- 为了提供更快速的访问体验,上述文档和下载站点托管于 [FlashcatCloud](https://flashcat.cloud)
- 💡 前后端代码分离,前端代码仓库:[https://github.com/n9e/fe](https://github.com/n9e/fe)
## 功能特点
@@ -50,6 +51,11 @@
## 截图演示
你可以在页面的右上角,切换语言和主题,目前我们支持英语、简体中文、繁体中文。
![语言切换](https://download.flashcat.cloud/ulric/n9e-switch-i18n.png)
即时查询,类似 Prometheus 内置的查询分析页面,做 ad-hoc 查询,夜莺做了一些 UI 优化,同时提供了一些内置 promql 指标,让不太了解 promql 的用户也可以快速查询。
![即时查询](https://download.flashcat.cloud/ulric/20240513103305.png)
@@ -83,15 +89,16 @@
- 报告Bug优先推荐提交[夜莺GitHub Issue](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Fbug&projects=&template=bug_report.yml)
- 推荐完整浏览[夜莺文档站点](https://flashcat.cloud/docs/content/flashcat-monitor/nightingale-v7/introduction/),了解更多信息
- 推荐搜索关注夜莺公众号,第一时间获取社区动态:`夜莺监控Nightingale`
- 日常问题交流推荐加入[知识星球](https://download.flashcat.cloud/ulric/20240319095409.png),也可以加我微信 `picobyte`,备注:`夜莺加群-<公司>-<姓名>` 拉入微信群,不过研发人员主要是关注 github issue 和星球,微信群关注较少
- 日常问题交流
- QQ群730841964
- [加入微信群](https://download.flashcat.cloud/ulric/20241022141621.png),如果二维码过期了,可以联系我(我的微信:`picobyte`)拉群,备注: `夜莺互助群`
## 广受关注
[![Stargazers over time](https://api.star-history.com/svg?repos=ccfos/nightingale&type=Date)](https://star-history.com/#ccfos/nightingale&Date)
## 社区共建
- ❇️请阅读浏览[夜莺开源项目和社区治理架构草案](./doc/community-governance.md),真诚欢迎每一位用户、开发者、公司以及组织,使用夜莺监控、积极反馈 Bug、提交功能需求、分享最佳实践共建专业、活跃的夜莺开源社区。
- 夜莺贡献者❤️
- ❇️ 请阅读浏览[夜莺开源项目和社区治理架构草案](./doc/community-governance.md),真诚欢迎每一位用户、开发者、公司以及组织,使用夜莺监控、积极反馈 Bug、提交功能需求、分享最佳实践共建专业、活跃的夜莺开源社区。
- ❤️ 夜莺贡献者
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
<img src="https://contrib.rocks/image?repo=ccfos/nightingale" />
</a>

View File

@@ -1,104 +1,113 @@
<p align="center">
<a href="https://github.com/ccfos/nightingale">
<img src="doc/img/Nightingale_L_V.png" alt="nightingale - cloud native monitoring" width="240" /></a>
<img src="doc/img/Nightingale_L_V.png" alt="nightingale - cloud native monitoring" width="100" /></a>
</p>
<p align="center">
<b>Open-source Alert Management Expert, an Integrated Observability Platform</b>
</p>
<p align="center">
<img alt="GitHub latest release" src="https://img.shields.io/github/v/release/ccfos/nightingale"/>
<a href="https://n9e.github.io">
<a href="https://flashcat.cloud/docs/">
<img alt="Docs" src="https://img.shields.io/badge/docs-get%20started-brightgreen"/></a>
<a href="https://hub.docker.com/u/flashcatcloud">
<img alt="Docker pulls" src="https://img.shields.io/docker/pulls/flashcatcloud/nightingale"/></a>
<img alt="GitHub Repo stars" src="https://img.shields.io/github/stars/ccfos/nightingale">
<img alt="GitHub Repo issues" src="https://img.shields.io/github/issues/ccfos/nightingale">
<img alt="GitHub Repo issues closed" src="https://img.shields.io/github/issues-closed/ccfos/nightingale">
<img alt="GitHub forks" src="https://img.shields.io/github/forks/ccfos/nightingale">
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
<img alt="GitHub contributors" src="https://img.shields.io/github/contributors-anon/ccfos/nightingale"/></a>
<img alt="GitHub Repo stars" src="https://img.shields.io/github/stars/ccfos/nightingale">
<img alt="GitHub forks" src="https://img.shields.io/github/forks/ccfos/nightingale">
<br/><img alt="GitHub Repo issues" src="https://img.shields.io/github/issues/ccfos/nightingale">
<img alt="GitHub Repo issues closed" src="https://img.shields.io/github/issues-closed/ccfos/nightingale">
<img alt="GitHub latest release" src="https://img.shields.io/github/v/release/ccfos/nightingale"/>
<img alt="License" src="https://img.shields.io/badge/license-Apache--2.0-blue"/>
<a href="https://n9e-talk.slack.com/">
<img alt="GitHub contributors" src="https://img.shields.io/badge/join%20slack-%23n9e-brightgreen.svg"/></a>
<img alt="License" src="https://img.shields.io/badge/license-Apache--2.0-blue"/>
</p>
<p align="center">
An open-source cloud-native monitoring system that is <b>all-in-one</b> <br/>
<b>Out-of-the-box</b>, it integrates data collection, visualization, and monitoring alert <br/>
We recommend upgrading your <b>Prometheus + AlertManager + Grafana</b> combination to Nightingale!
</p>
[English](./README_en.md) | [中文](./README.md)
## What is Nightingale
## Highlighted Features
Nightingale aims to combine the advantages of Prometheus and Grafana. It manages alert rules and visualizes metrics, logs, traces in a beautiful WebUI.
- **Out-of-the-box**
- Supports multiple deployment methods such as **Docker, Helm Chart, and cloud services**, integrates data collection, monitoring, and alerting into one system, and comes with various monitoring dashboards, quick views, and alert rule templates. **It greatly reduces the construction cost, learning cost, and usage cost of cloud-native monitoring systems**.
- **Professional Alerting**
- Provides visual alert configuration and management, supports various alert rules, offers the ability to configure silence and subscription rules, supports multiple alert delivery channels, and has features such as alert self-healing and event management.
- **Cloud-Native**
- Quickly builds an enterprise-level cloud-native monitoring system through a turnkey approach, supports multiple collectors such as [Categraf](https://github.com/flashcatcloud/categraf), Telegraf, and Grafana-agent, supports multiple data sources such as Prometheus, VictoriaMetrics, M3DB, ElasticSearch, and Jaeger, and is compatible with importing Grafana dashboards. **It seamlessly integrates with the cloud-native ecosystem**.
- **High Performance and High Availability**
- Due to the multi-data-source management engine of Nightingale and its excellent architecture design, and utilizing a high-performance time-series database, it can handle data collection, storage, and alert analysis scenarios with billions of time-series data, saving a lot of costs.
- Nightingale components can be horizontally scaled with no single point of failure. It has been deployed in thousands of enterprises and tested in harsh production practices. Many leading Internet companies have used Nightingale for cluster machines with hundreds of nodes, processing billions of time-series data.
- **Flexible Extension and Centralized Management**
- Nightingale can be deployed on a 1-core 1G cloud host, deployed in a cluster of hundreds of machines, or run in Kubernetes. Time-series databases, alert engines, and other components can also be decentralized to various data centers and regions, balancing edge deployment with centralized management. **It solves the problem of data fragmentation and lack of unified views**.
Originally developed and open-sourced by Didi, Nightingale was donated to the China Computer Federation Open Source Development Committee (CCF ODC) on May 11, 2022, becoming the first open-source project accepted by the CCF ODC after its establishment.
#### If you are using Prometheus and have one or more of the following requirement scenarios, it is recommended that you upgrade to Nightingale:
## Quick Start
- Multiple systems such as Prometheus, Alertmanager, Grafana, etc. are fragmented and lack a unified view and cannot be used out of the box;
- The way to manage Prometheus and Alertmanager by modifying configuration files has a big learning curve and is difficult to collaborate;
- Too much data to scale-up your Prometheus cluster;
- Multiple Prometheus clusters running in production environments, which faced high management and usage costs;
- 👉 [Documentation](https://flashcat.cloud/docs/) | [Download](https://flashcat.cloud/download/nightingale/)
- ❤️ [Report a Bug](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=&projects=&template=question.yml)
- For faster access, the above documentation and download sites are hosted on [FlashcatCloud](https://flashcat.cloud).
#### If you are using Zabbix and have the following scenarios, it is recommended that you upgrade to Nightingale:
## Features
- Monitoring too much data and wanting a better scalable solution;
- A high learning curve and a desire for better efficiency of collaborative use in a multi-person, multi-team model;
- Microservice and cloud-native architectures with variable monitoring data lifecycles and high monitoring data dimension bases, which are not easily adaptable to the Zabbix data model;
- **Integration with Multiple Time-Series Databases:** Supports integration with various time-series databases such as Prometheus, VictoriaMetrics, Thanos, Mimir, M3DB, and TDengine, enabling unified alert management.
- **Advanced Alerting Capabilities:** Comes with built-in support for multiple alerting rules, extensible to common notification channels. It also supports alert suppression, silencing, subscription, self-healing, and alert event management.
- **High-Performance Visualization Engine:** Offers various chart styles with numerous built-in dashboard templates and the ability to import Grafana templates. Ready to use with a business-friendly open-source license.
- **Support for Common Collectors:** Compatible with [Categraf](https://flashcat.cloud/product/categraf), Telegraf, Grafana-agent, Datadog-agent, and various exporters as collectors—there's no data that can't be monitored.
- **Seamless Integration with [Flashduty](https://flashcat.cloud/product/flashcat-duty/):** Enables alert aggregation, acknowledgment, escalation, scheduling, and IM integration, ensuring no alerts are missed, reducing unnecessary interruptions, and enhancing efficient collaboration.
#### If you are using [open-falcon](https://github.com/open-falcon/falcon-plus), we recommend you to upgrade to Nightingale
- For more information about open-falcon and Nightingale, please refer to read [Ten features and trends of cloud-native monitoring](https://mp.weixin.qq.com/s?__biz=MzkzNjI5OTM5Nw==&mid=2247483738&idx=1&sn=e8bdbb974a2cd003c1abcc2b5405dd18&chksm=c2a19fb0f5d616a63185cd79277a79a6b80118ef2185890d0683d2bb20451bd9303c78d083c5#rd)。
## Getting Started
[https://n9e.github.io/](https://n9e.github.io/)
## Screenshots
https://user-images.githubusercontent.com/792850/216888712-2565fcea-9df5-47bd-a49e-d60af9bd76e8.mp4
You can switch languages and themes in the top right corner. We now support English, Simplified Chinese, and Traditional Chinese.
![18n switch](https://download.flashcat.cloud/ulric/n9e-switch-i18n.png)
### Instant Query
Similar to the built-in query analysis page in Prometheus, Nightingale offers an ad-hoc query feature with UI enhancements. It also provides built-in PromQL metrics, allowing users unfamiliar with PromQL to quickly perform queries.
![Instant Query](https://download.flashcat.cloud/ulric/20240513103305.png)
### Metric View
Alternatively, you can use the Metric View to access data. With this feature, Instant Query becomes less necessary, as it caters more to advanced users. Regular users can easily perform queries using the Metric View.
![Metric View](https://download.flashcat.cloud/ulric/20240513103530.png)
### Built-in Dashboards
Nightingale includes commonly used dashboards that can be imported and used directly. You can also import Grafana dashboards, although compatibility is limited to basic Grafana charts. If youre accustomed to Grafana, its recommended to continue using it for visualization, with Nightingale serving as an alerting engine.
![Built-in Dashboards](https://download.flashcat.cloud/ulric/20240513103628.png)
### Built-in Alert Rules
In addition to the built-in dashboards, Nightingale also comes with numerous alert rules that are ready to use out of the box.
![Built-in Alert Rules](https://download.flashcat.cloud/ulric/20240513103825.png)
## Architecture
<img src="doc/img/arch-product.png" width="600">
In most community scenarios, Nightingale is primarily used as an alert engine, integrating with multiple time-series databases to unify alert rule management. Grafana remains the preferred tool for visualization. As an alert engine, the product architecture of Nightingale is as follows:
Nightingale monitoring can receive monitoring data reported by various collectors (such as [Categraf](https://github.com/flashcatcloud/categraf) , telegraf, grafana-agent, Prometheus, etc.) and write them to various popular time-series databases (such as Prometheus, M3DB, VictoriaMetrics, Thanos, TDEngine, etc.). It provides configuration capabilities for alert rules, silence rules, and subscription rules, as well as the ability to view monitoring data. It also provides automatic alarm self-healing mechanisms (such as automatically calling back to a webhook address or executing a script after an alarm is triggered), and the ability to store and manage historical alarm events and view them in groups.
![Product Architecture](https://download.flashcat.cloud/ulric/20240221152601.png)
If the performance of a standalone time-series database (such as Prometheus) has bottlenecks or poor disaster recovery, we recommend using [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics). The VictoriaMetrics architecture is relatively simple, has excellent performance, and is easy to deploy and maintain. The architecture diagram is as shown above. For more detailed documentation on VictoriaMetrics, please refer to its [official website](https://victoriametrics.com/).
For certain edge data centers with poor network connectivity to the central Nightingale server, we offer a distributed deployment mode for the alert engine. In this mode, even if the network is disconnected, the alerting functionality remains unaffected.
**We welcome you to participate in the Nightingale open-source project and community in various ways, including but not limited to**
- Adding and improving documentation => [n9e.github.io](https://n9e.github.io/)
- Sharing your best practices and experience in using Nightingale monitoring => [Article sharing]((https://n9e.github.io/docs/prologue/share/))
- Submitting product suggestions => [github issue](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Ffeature&template=enhancement.md)
- Submitting code to make Nightingale monitoring faster, more stable, and easier to use => [github pull request](https://github.com/didi/nightingale/pulls)
![Edge Deployment Mode](https://download.flashcat.cloud/ulric/20240222102119.png)
**Respecting, recognizing, and recording the work of every contributor** is the first guiding principle of the Nightingale open-source community. We advocate effective questioning, which not only respects the developer's time but also contributes to the accumulation of knowledge in the entire community
- Before asking a question, please first refer to the [FAQ](https://www.gitlink.org.cn/ccfos/nightingale/wiki/faq)
- We use [GitHub Discussions](https://github.com/ccfos/nightingale/discussions) as the communication forum. You can search and ask questions here.
- We also recommend that you join ours [Slack channel](https://n9e-talk.slack.com/) to exchange experiences with other Nightingale users.
## Communication Channels
## Who is using Nightingale
You can register your usage and share your experience by posting on **[Who is Using Nightingale](https://github.com/ccfos/nightingale/issues/897)**.
- **Report Bugs:** It is highly recommended to submit issues via the [Nightingale GitHub Issue tracker](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Fbug&projects=&template=bug_report.yml).
- **Documentation:** For more information, we recommend thoroughly browsing the [Nightingale Documentation Site](https://flashcat.cloud/docs/content/flashcat-monitor/nightingale-v7/introduction/).
## Stargazers over time
[![Stargazers over time](https://starchart.cc/ccfos/nightingale.svg)](https://starchart.cc/ccfos/nightingale)
## Contributors
[![Stargazers over time](https://api.star-history.com/svg?repos=ccfos/nightingale&type=Date)](https://star-history.com/#ccfos/nightingale&Date)
## Community Co-Building
- ❇️ Please read the [Nightingale Open Source Project and Community Governance Draft](./doc/community-governance.md). We sincerely welcome every user, developer, company, and organization to use Nightingale, actively report bugs, submit feature requests, share best practices, and help build a professional and active open-source community.
- ❤️ Nightingale Contributors
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
<img src="https://contrib.rocks/image?repo=ccfos/nightingale" />
</a>
## License
[Apache License V2.0](https://github.com/didi/nightingale/blob/main/LICENSE)
- [Apache License V2.0](https://github.com/didi/nightingale/blob/main/LICENSE)

View File

@@ -32,6 +32,7 @@ type Alerting struct {
Timeout int64
TemplatesDir string
NotifyConcurrency int
WebhookBatchSend bool
}
type CallPlugin struct {
@@ -59,10 +60,6 @@ func (a *Alert) PreCheck(configDir string) {
a.Heartbeat.Interval = 1000
}
if a.Heartbeat.EngineName == "" {
a.Heartbeat.EngineName = "default"
}
if a.EngineDelay == 0 {
a.EngineDelay = 30
}

View File

@@ -4,6 +4,8 @@ import (
"context"
"fmt"
"github.com/ccfos/nightingale/v6/dscache"
"github.com/ccfos/nightingale/v6/alert/aconf"
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/alert/dispatch"
@@ -21,12 +23,11 @@ import (
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/httpx"
"github.com/ccfos/nightingale/v6/pkg/logx"
"github.com/ccfos/nightingale/v6/pkg/macros"
"github.com/ccfos/nightingale/v6/prom"
"github.com/ccfos/nightingale/v6/pushgw/pconf"
"github.com/ccfos/nightingale/v6/pushgw/writer"
"github.com/ccfos/nightingale/v6/storage"
"github.com/ccfos/nightingale/v6/tdengine"
"github.com/flashcatcloud/ibex/src/cmd/ibex"
)
@@ -62,15 +63,19 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
userCache := memsto.NewUserCache(ctx, syncStats)
userGroupCache := memsto.NewUserGroupCache(ctx, syncStats)
taskTplsCache := memsto.NewTaskTplCache(ctx)
configCvalCache := memsto.NewCvalCache(ctx, syncStats)
promClients := prom.NewPromClient(ctx)
tdengineClients := tdengine.NewTdengineClient(ctx, config.Alert.Heartbeat)
dispatch.InitRegisterQueryFunc(promClients)
externalProcessors := process.NewExternalProcessors()
Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache, alertRuleCache, notifyConfigCache, taskTplsCache, dsCache, ctx, promClients, tdengineClients, userCache, userGroupCache)
macros.RegisterMacro(macros.MacroInVain)
dscache.Init(ctx, false)
Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache, alertRuleCache, notifyConfigCache, taskTplsCache, dsCache, ctx, promClients, userCache, userGroupCache)
r := httpx.GinEngine(config.Global.RunMode, config.HTTP)
r := httpx.GinEngine(config.Global.RunMode, config.HTTP,
configCvalCache.PrintBodyPaths, configCvalCache.PrintAccessLog)
rt := router.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors)
if config.Ibex.Enable {
@@ -90,7 +95,7 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
func Start(alertc aconf.Alert, pushgwc pconf.Pushgw, syncStats *memsto.Stats, alertStats *astats.Stats, externalProcessors *process.ExternalProcessorsType, targetCache *memsto.TargetCacheType, busiGroupCache *memsto.BusiGroupCacheType,
alertMuteCache *memsto.AlertMuteCacheType, alertRuleCache *memsto.AlertRuleCacheType, notifyConfigCache *memsto.NotifyConfigCacheType, taskTplsCache *memsto.TaskTplCache, datasourceCache *memsto.DatasourceCacheType, ctx *ctx.Context,
promClients *prom.PromClientMap, tdendgineClients *tdengine.TdengineClientMap, userCache *memsto.UserCacheType, userGroupCache *memsto.UserGroupCacheType) {
promClients *prom.PromClientMap, userCache *memsto.UserCacheType, userGroupCache *memsto.UserGroupCacheType) {
alertSubscribeCache := memsto.NewAlertSubscribeCache(ctx, syncStats)
recordingRuleCache := memsto.NewRecordingRuleCache(ctx, syncStats)
targetsOfAlertRulesCache := memsto.NewTargetOfAlertRuleCache(ctx, alertc.Heartbeat.EngineName, syncStats)
@@ -100,10 +105,10 @@ func Start(alertc aconf.Alert, pushgwc pconf.Pushgw, syncStats *memsto.Stats, al
naming := naming.NewNaming(ctx, alertc.Heartbeat, alertStats)
writers := writer.NewWriters(pushgwc)
record.NewScheduler(alertc, recordingRuleCache, promClients, writers, alertStats)
record.NewScheduler(alertc, recordingRuleCache, promClients, writers, alertStats, datasourceCache)
eval.NewScheduler(alertc, externalProcessors, alertRuleCache, targetCache, targetsOfAlertRulesCache,
busiGroupCache, alertMuteCache, datasourceCache, promClients, tdendgineClients, naming, ctx, alertStats)
busiGroupCache, alertMuteCache, datasourceCache, promClients, naming, ctx, alertStats)
dp := dispatch.NewDispatch(alertRuleCache, userCache, userGroupCache, alertSubscribeCache, targetCache, notifyConfigCache, taskTplsCache, alertc.Alerting, ctx, alertStats)
consumer := dispatch.NewConsumer(alertc.Alerting, ctx, dp, promClients)
@@ -112,5 +117,5 @@ func Start(alertc aconf.Alert, pushgwc pconf.Pushgw, syncStats *memsto.Stats, al
go consumer.LoopConsume()
go queue.ReportQueueSize(alertStats)
go sender.InitEmailSender(notifyConfigCache)
go sender.InitEmailSender(ctx, notifyConfigCache)
}

View File

@@ -38,7 +38,7 @@ func NewSyncStats() *Stats {
Subsystem: subsystem,
Name: "rule_eval_error_total",
Help: "Number of rule eval error.",
}, []string{"datasource", "stage"})
}, []string{"datasource", "stage", "busi_group", "rule_id"})
CounterQueryDataErrorTotal := prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,

View File

@@ -2,6 +2,7 @@ package common
import (
"fmt"
"strings"
"github.com/ccfos/nightingale/v6/models"
)
@@ -34,9 +35,9 @@ func MatchGroupsName(groupName string, groupFilter []models.TagFilter) bool {
func matchTag(value string, filter models.TagFilter) bool {
switch filter.Func {
case "==":
return filter.Value == value
return strings.TrimSpace(filter.Value) == strings.TrimSpace(value)
case "!=":
return filter.Value != value
return strings.TrimSpace(filter.Value) != strings.TrimSpace(value)
case "in":
_, has := filter.Vset[value]
return has

View File

@@ -1,6 +1,7 @@
package dispatch
import (
"context"
"encoding/json"
"fmt"
"strings"
@@ -13,8 +14,10 @@ import (
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/poster"
promsdk "github.com/ccfos/nightingale/v6/pkg/prom"
"github.com/ccfos/nightingale/v6/pkg/tplx"
"github.com/ccfos/nightingale/v6/prom"
"github.com/prometheus/common/model"
"github.com/toolkits/pkg/concurrent/semaphore"
"github.com/toolkits/pkg/logger"
)
@@ -27,6 +30,18 @@ type Consumer struct {
promClients *prom.PromClientMap
}
func InitRegisterQueryFunc(promClients *prom.PromClientMap) {
tplx.RegisterQueryFunc(func(datasourceID int64, promql string) model.Value {
if promClients.IsNil(datasourceID) {
return nil
}
readerClient := promClients.GetCli(datasourceID)
value, _, _ := readerClient.Query(context.Background(), promql, time.Now())
return value
})
}
// 创建一个 Consumer 实例
func NewConsumer(alerting aconf.Alerting, ctx *ctx.Context, dispatch *Dispatch, promClients *prom.PromClientMap) *Consumer {
return &Consumer{
@@ -113,7 +128,7 @@ func (e *Consumer) persist(event *models.AlertCurEvent) {
event.Id, err = poster.PostByUrlsWithResp[int64](e.ctx, "/v1/n9e/event-persist", event)
if err != nil {
logger.Errorf("event:%+v persist err:%v", event, err)
e.dispatch.Astats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", event.DatasourceId), "persist_event").Inc()
e.dispatch.Astats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", event.DatasourceId), "persist_event", event.GroupName, fmt.Sprintf("%v", event.RuleId)).Inc()
}
return
}
@@ -121,7 +136,7 @@ func (e *Consumer) persist(event *models.AlertCurEvent) {
err := models.EventPersist(e.ctx, event)
if err != nil {
logger.Errorf("event%+v persist err:%v", event, err)
e.dispatch.Astats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", event.DatasourceId), "persist_event").Inc()
e.dispatch.Astats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", event.DatasourceId), "persist_event", event.GroupName, fmt.Sprintf("%v", event.RuleId)).Inc()
}
}
@@ -169,7 +184,7 @@ func (e *Consumer) queryRecoveryVal(event *models.AlertCurEvent) {
logger.Errorf("rule_eval:%s promql:%s, warnings:%v", getKey(event), promql, warnings)
}
anomalyPoints := common.ConvertAnomalyPoints(value)
anomalyPoints := models.ConvertAnomalyPoints(value)
if len(anomalyPoints) == 0 {
logger.Warningf("rule_eval:%s promql:%s, result is empty", getKey(event), promql)
event.AnnotationsJSON["recovery_promql_error"] = fmt.Sprintf("promql:%s error:%s", promql, "result is empty")

View File

@@ -139,6 +139,12 @@ func (e *Dispatch) HandleEventNotify(event *models.AlertCurEvent, isSubscribe bo
if rule == nil {
return
}
if e.blockEventNotify(rule, event) {
logger.Infof("block event notify: rule_id:%d event:%+v", rule.Id, event)
return
}
fillUsers(event, e.userCache, e.userGroupCache)
var (
@@ -167,7 +173,7 @@ func (e *Dispatch) HandleEventNotify(event *models.AlertCurEvent, isSubscribe bo
}
// 处理事件发送,这里用一个goroutine处理一个event的所有发送事件
go e.Send(rule, event, notifyTarget)
go e.Send(rule, event, notifyTarget, isSubscribe)
// 如果是不是订阅规则出现的event, 则需要处理订阅规则的event
if !isSubscribe {
@@ -175,6 +181,25 @@ func (e *Dispatch) HandleEventNotify(event *models.AlertCurEvent, isSubscribe bo
}
}
func (e *Dispatch) blockEventNotify(rule *models.AlertRule, event *models.AlertCurEvent) bool {
ruleType := rule.GetRuleType()
// 若为机器则先看机器是否删除
if ruleType == models.HOST {
host, ok := e.targetCache.Get(event.TagsMap["ident"])
if !ok || host == nil {
return true
}
}
// 恢复通知,检测规则配置是否改变
// if event.IsRecovered && event.RuleHash != rule.Hash() {
// return true
// }
return false
}
func (e *Dispatch) handleSubs(event *models.AlertCurEvent) {
// handle alert subscribes
subscribes := make([]*models.AlertSubscribe, 0)
@@ -238,11 +263,12 @@ func (e *Dispatch) handleSub(sub *models.AlertSubscribe, event models.AlertCurEv
e.HandleEventNotify(&event, true)
}
func (e *Dispatch) Send(rule *models.AlertRule, event *models.AlertCurEvent, notifyTarget *NotifyTarget) {
func (e *Dispatch) Send(rule *models.AlertRule, event *models.AlertCurEvent, notifyTarget *NotifyTarget, isSubscribe bool) {
needSend := e.BeforeSenderHook(event)
if needSend {
for channel, uids := range notifyTarget.ToChannelUserMap() {
msgCtx := sender.BuildMessageContext(rule, []*models.AlertCurEvent{event}, uids, e.userCache, e.Astats)
msgCtx := sender.BuildMessageContext(e.ctx, rule, []*models.AlertCurEvent{event},
uids, e.userCache, e.Astats)
e.RwLock.RLock()
s := e.Senders[channel]
e.RwLock.RUnlock()
@@ -265,22 +291,40 @@ func (e *Dispatch) Send(rule *models.AlertRule, event *models.AlertCurEvent, not
e.SendCallbacks(rule, notifyTarget, event)
// handle global webhooks
sender.SendWebhooks(notifyTarget.ToWebhookList(), event, e.Astats)
if !event.OverrideGlobalWebhook() {
if e.alerting.WebhookBatchSend {
sender.BatchSendWebhooks(e.ctx, notifyTarget.ToWebhookMap(), event, e.Astats)
} else {
sender.SingleSendWebhooks(e.ctx, notifyTarget.ToWebhookMap(), event, e.Astats)
}
}
// handle plugin call
go sender.MayPluginNotify(e.genNoticeBytes(event), e.notifyConfigCache.GetNotifyScript(), e.Astats)
go sender.MayPluginNotify(e.ctx, e.genNoticeBytes(event), e.notifyConfigCache.
GetNotifyScript(), e.Astats, event)
if !isSubscribe {
// handle ibex callbacks
e.HandleIbex(rule, event)
}
}
func (e *Dispatch) SendCallbacks(rule *models.AlertRule, notifyTarget *NotifyTarget, event *models.AlertCurEvent) {
uids := notifyTarget.ToUidList()
urls := notifyTarget.ToCallbackList()
whMap := notifyTarget.ToWebhookMap()
ogw := event.OverrideGlobalWebhook()
for _, urlStr := range urls {
if len(urlStr) == 0 {
continue
}
cbCtx := sender.BuildCallBackContext(e.ctx, urlStr, rule, []*models.AlertCurEvent{event}, uids, e.userCache, e.Astats)
cbCtx := sender.BuildCallBackContext(e.ctx, urlStr, rule, []*models.AlertCurEvent{event}, uids, e.userCache, e.alerting.WebhookBatchSend, e.Astats)
if wh, ok := whMap[cbCtx.CallBackURL]; !ogw && ok && wh.Enable {
logger.Debugf("SendCallbacks: webhook[%s] is in global conf.", cbCtx.CallBackURL)
continue
}
if strings.HasPrefix(urlStr, "${ibex}") {
e.CallBacks[models.IbexDomain].CallBack(cbCtx)
@@ -318,6 +362,30 @@ func (e *Dispatch) SendCallbacks(rule *models.AlertRule, notifyTarget *NotifyTar
}
}
func (e *Dispatch) HandleIbex(rule *models.AlertRule, event *models.AlertCurEvent) {
// 解析 RuleConfig 字段
var ruleConfig struct {
TaskTpls []*models.Tpl `json:"task_tpls"`
}
json.Unmarshal([]byte(rule.RuleConfig), &ruleConfig)
for _, t := range ruleConfig.TaskTpls {
if t.TplId == 0 {
continue
}
if len(t.Host) == 0 {
sender.CallIbex(e.ctx, t.TplId, event.TargetIdent,
e.taskTplsCache, e.targetCache, e.userCache, event)
continue
}
for _, host := range t.Host {
sender.CallIbex(e.ctx, t.TplId, host,
e.taskTplsCache, e.targetCache, e.userCache, event)
}
}
}
type Notice struct {
Event *models.AlertCurEvent `json:"event"`
Tpls map[string]string `json:"tpls"`

View File

@@ -76,16 +76,12 @@ func (s *NotifyTarget) ToCallbackList() []string {
return callbacks
}
func (s *NotifyTarget) ToWebhookList() []*models.Webhook {
webhooks := make([]*models.Webhook, 0, len(s.webhooks))
for _, wh := range s.webhooks {
webhooks = append(webhooks, wh)
}
return webhooks
func (s *NotifyTarget) ToWebhookMap() map[string]*models.Webhook {
return s.webhooks
}
func (s *NotifyTarget) ToUidList() []int64 {
uids := make([]int64, len(s.userMap))
uids := make([]int64, 0, len(s.userMap))
for uid, _ := range s.userMap {
uids = append(uids, uid)
}

View File

@@ -13,8 +13,6 @@ import (
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/prom"
"github.com/ccfos/nightingale/v6/tdengine"
"github.com/toolkits/pkg/logger"
)
@@ -33,8 +31,7 @@ type Scheduler struct {
alertMuteCache *memsto.AlertMuteCacheType
datasourceCache *memsto.DatasourceCacheType
promClients *prom.PromClientMap
tdengineClients *tdengine.TdengineClientMap
promClients *prom.PromClientMap
naming *naming.Naming
@@ -45,7 +42,7 @@ type Scheduler struct {
func NewScheduler(aconf aconf.Alert, externalProcessors *process.ExternalProcessorsType, arc *memsto.AlertRuleCacheType,
targetCache *memsto.TargetCacheType, toarc *memsto.TargetsOfAlertRuleCacheType,
busiGroupCache *memsto.BusiGroupCacheType, alertMuteCache *memsto.AlertMuteCacheType, datasourceCache *memsto.DatasourceCacheType,
promClients *prom.PromClientMap, tdengineClients *tdengine.TdengineClientMap, naming *naming.Naming, ctx *ctx.Context, stats *astats.Stats) *Scheduler {
promClients *prom.PromClientMap, naming *naming.Naming, ctx *ctx.Context, stats *astats.Stats) *Scheduler {
scheduler := &Scheduler{
aconf: aconf,
alertRules: make(map[string]*AlertRuleWorker),
@@ -59,9 +56,8 @@ func NewScheduler(aconf aconf.Alert, externalProcessors *process.ExternalProcess
alertMuteCache: alertMuteCache,
datasourceCache: datasourceCache,
promClients: promClients,
tdengineClients: tdengineClients,
naming: naming,
promClients: promClients,
naming: naming,
ctx: ctx,
stats: stats,
@@ -95,9 +91,8 @@ func (s *Scheduler) syncAlertRules() {
}
ruleType := rule.GetRuleType()
if rule.IsPrometheusRule() || rule.IsLokiRule() || rule.IsTdengineRule() {
datasourceIds := s.promClients.Hit(rule.DatasourceIdsJson)
datasourceIds = append(datasourceIds, s.tdengineClients.Hit(rule.DatasourceIdsJson)...)
if rule.IsPrometheusRule() || rule.IsLokiRule() || rule.IsTdengineRule() || rule.IsClickHouseRule() || rule.IsElasticSearch() {
datasourceIds := s.datasourceCache.GetIDsByDsCateAndQueries(rule.Cate, rule.DatasourceQueries)
for _, dsId := range datasourceIds {
if !naming.DatasourceHashRing.IsHit(strconv.FormatInt(dsId, 10), fmt.Sprintf("%d", rule.Id), s.aconf.Heartbeat.Endpoint) {
continue
@@ -119,7 +114,7 @@ func (s *Scheduler) syncAlertRules() {
}
processor := process.NewProcessor(s.aconf.Heartbeat.EngineName, rule, dsId, s.alertRuleCache, s.targetCache, s.targetsOfAlertRuleCache, s.busiGroupCache, s.alertMuteCache, s.datasourceCache, s.ctx, s.stats)
alertRule := NewAlertRuleWorker(rule, dsId, processor, s.promClients, s.tdengineClients, s.ctx)
alertRule := NewAlertRuleWorker(rule, dsId, processor, s.promClients, s.ctx)
alertRuleWorkers[alertRule.Hash()] = alertRule
}
} else if rule.IsHostRule() {
@@ -128,12 +123,13 @@ func (s *Scheduler) syncAlertRules() {
continue
}
processor := process.NewProcessor(s.aconf.Heartbeat.EngineName, rule, 0, s.alertRuleCache, s.targetCache, s.targetsOfAlertRuleCache, s.busiGroupCache, s.alertMuteCache, s.datasourceCache, s.ctx, s.stats)
alertRule := NewAlertRuleWorker(rule, 0, processor, s.promClients, s.tdengineClients, s.ctx)
alertRule := NewAlertRuleWorker(rule, 0, processor, s.promClients, s.ctx)
alertRuleWorkers[alertRule.Hash()] = alertRule
} else {
// 如果 rule 不是通过 prometheus engine 来告警的,则创建为 externalRule
// if rule is not processed by prometheus engine, create it as externalRule
for _, dsId := range rule.DatasourceIdsJson {
dsIds := s.datasourceCache.GetIDsByDsCateAndQueries(rule.Cate, rule.DatasourceQueries)
for _, dsId := range dsIds {
ds := s.datasourceCache.GetById(dsId)
if ds == nil {
logger.Debugf("datasource %d not found", dsId)
@@ -153,6 +149,7 @@ func (s *Scheduler) syncAlertRules() {
for hash, rule := range alertRuleWorkers {
if _, has := s.alertRules[hash]; !has {
rule.Prepare()
time.Sleep(time.Duration(20) * time.Millisecond)
rule.Start()
s.alertRules[hash] = rule
}

File diff suppressed because it is too large Load Diff

458
alert/eval/eval_test.go Normal file
View File

@@ -0,0 +1,458 @@
package eval
import (
"reflect"
"testing"
"golang.org/x/exp/slices"
)
var (
reHashTagIndex1 = map[uint64][][]uint64{
1: {
{1, 2}, {3, 4},
},
2: {
{5, 6}, {7, 8},
},
}
reHashTagIndex2 = map[uint64][][]uint64{
1: {
{9, 10}, {11, 12},
},
3: {
{13, 14}, {15, 16},
},
}
seriesTagIndex1 = map[uint64][]uint64{
1: {1, 2, 3, 4},
2: {5, 6, 7, 8},
}
seriesTagIndex2 = map[uint64][]uint64{
1: {9, 10, 11, 12},
3: {13, 14, 15, 16},
}
)
func Test_originalJoin(t *testing.T) {
type args struct {
seriesTagIndex1 map[uint64][]uint64
seriesTagIndex2 map[uint64][]uint64
}
tests := []struct {
name string
args args
want map[uint64][]uint64
}{
{
name: "original join",
args: args{
seriesTagIndex1: map[uint64][]uint64{
1: {1, 2, 3, 4},
2: {5, 6, 7, 8},
},
seriesTagIndex2: map[uint64][]uint64{
1: {9, 10, 11, 12},
3: {13, 14, 15, 16},
},
},
want: map[uint64][]uint64{
1: {1, 2, 3, 4, 9, 10, 11, 12},
2: {5, 6, 7, 8},
3: {13, 14, 15, 16},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := originalJoin(tt.args.seriesTagIndex1, tt.args.seriesTagIndex2); !reflect.DeepEqual(got, tt.want) {
t.Errorf("originalJoin() = %v, want %v", got, tt.want)
}
})
}
}
func Test_exclude(t *testing.T) {
type args struct {
reHashTagIndex1 map[uint64][][]uint64
reHashTagIndex2 map[uint64][][]uint64
}
tests := []struct {
name string
args args
want map[uint64][]uint64
}{
{
name: "left exclude",
args: args{
reHashTagIndex1: reHashTagIndex1,
reHashTagIndex2: reHashTagIndex2,
},
want: map[uint64][]uint64{
0: {5, 6},
1: {7, 8},
},
},
{
name: "right exclude",
args: args{
reHashTagIndex1: reHashTagIndex2,
reHashTagIndex2: reHashTagIndex1,
},
want: map[uint64][]uint64{
3: {13, 14},
4: {15, 16},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := exclude(tt.args.reHashTagIndex1, tt.args.reHashTagIndex2); !allValueDeepEqual(flatten(got), tt.want) {
t.Errorf("exclude() = %v, want %v", got, tt.want)
}
})
}
}
func Test_noneJoin(t *testing.T) {
type args struct {
seriesTagIndex1 map[uint64][]uint64
seriesTagIndex2 map[uint64][]uint64
}
tests := []struct {
name string
args args
want map[uint64][]uint64
}{
{
name: "none join, direct splicing",
args: args{
seriesTagIndex1: seriesTagIndex1,
seriesTagIndex2: seriesTagIndex2,
},
want: map[uint64][]uint64{
0: {1, 2, 3, 4},
1: {5, 6, 7, 8},
2: {9, 10, 11, 12},
3: {13, 14, 15, 16},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := noneJoin(tt.args.seriesTagIndex1, tt.args.seriesTagIndex2); !allValueDeepEqual(got, tt.want) {
t.Errorf("noneJoin() = %v, want %v", got, tt.want)
}
})
}
}
func Test_cartesianJoin(t *testing.T) {
type args struct {
seriesTagIndex1 map[uint64][]uint64
seriesTagIndex2 map[uint64][]uint64
}
tests := []struct {
name string
args args
want map[uint64][]uint64
}{
{
name: "cartesian join",
args: args{
seriesTagIndex1: seriesTagIndex1,
seriesTagIndex2: seriesTagIndex2,
},
want: map[uint64][]uint64{
0: {1, 2, 3, 4, 9, 10, 11, 12},
1: {5, 6, 7, 8, 9, 10, 11, 12},
2: {5, 6, 7, 8, 13, 14, 15, 16},
3: {1, 2, 3, 4, 13, 14, 15, 16},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := cartesianJoin(tt.args.seriesTagIndex1, tt.args.seriesTagIndex2); !allValueDeepEqual(got, tt.want) {
t.Errorf("cartesianJoin() = %v, want %v", got, tt.want)
}
})
}
}
func Test_onJoin(t *testing.T) {
type args struct {
reHashTagIndex1 map[uint64][][]uint64
reHashTagIndex2 map[uint64][][]uint64
joinType JoinType
}
tests := []struct {
name string
args args
want map[uint64][]uint64
}{
{
name: "left join",
args: args{
reHashTagIndex1: reHashTagIndex1,
reHashTagIndex2: reHashTagIndex2,
joinType: Left,
},
want: map[uint64][]uint64{
1: {1, 2, 9, 10},
2: {3, 4, 9, 10},
3: {1, 2, 11, 12},
4: {3, 4, 11, 12},
5: {5, 6},
6: {7, 8},
},
},
{
name: "right join",
args: args{
reHashTagIndex1: reHashTagIndex2,
reHashTagIndex2: reHashTagIndex1,
joinType: Right,
},
want: map[uint64][]uint64{
1: {1, 2, 9, 10},
2: {3, 4, 9, 10},
3: {1, 2, 11, 12},
4: {3, 4, 11, 12},
5: {13, 14},
6: {15, 16},
},
},
{
name: "inner join",
args: args{
reHashTagIndex1: reHashTagIndex1,
reHashTagIndex2: reHashTagIndex2,
joinType: Inner,
},
want: map[uint64][]uint64{
1: {1, 2, 9, 10},
2: {3, 4, 9, 10},
3: {1, 2, 11, 12},
4: {3, 4, 11, 12},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := onJoin(tt.args.reHashTagIndex1, tt.args.reHashTagIndex2, tt.args.joinType); !allValueDeepEqual(flatten(got), tt.want) {
t.Errorf("onJoin() = %v, want %v", got, tt.want)
}
})
}
}
// allValueDeepEqual 判断 map 的 value 是否相同,不考虑 key
func allValueDeepEqual(got, want map[uint64][]uint64) bool {
if len(got) != len(want) {
return false
}
for _, v1 := range got {
curEqual := false
slices.Sort(v1)
for _, v2 := range want {
slices.Sort(v2)
if reflect.DeepEqual(v1, v2) {
curEqual = true
break
}
}
if !curEqual {
return false
}
}
return true
}
// allValueDeepEqualOmitOrder 判断两个字符串切片是否相等,不考虑顺序
func allValueDeepEqualOmitOrder(got, want []string) bool {
if len(got) != len(want) {
return false
}
slices.Sort(got)
slices.Sort(want)
for i := range got {
if got[i] != want[i] {
return false
}
}
return true
}
func Test_removeVal(t *testing.T) {
type args struct {
promql string
}
tests := []struct {
name string
args args
want string
}{
// TODO: Add test cases.
{
name: "removeVal1",
args: args{
promql: "mem{test1=\"$test1\",test2=\"$test2\",test3=\"$test3\"} > $val",
},
want: "mem{} > $val",
},
{
name: "removeVal2",
args: args{
promql: "mem{test1=\"test1\",test2=\"$test2\",test3=\"$test3\"} > $val",
},
want: "mem{test1=\"test1\"} > $val",
},
{
name: "removeVal3",
args: args{
promql: "mem{test1=\"$test1\",test2=\"test2\",test3=\"$test3\"} > $val",
},
want: "mem{test2=\"test2\"} > $val",
},
{
name: "removeVal4",
args: args{
promql: "mem{test1=\"$test1\",test2=\"$test2\",test3=\"test3\"} > $val",
},
want: "mem{test3=\"test3\"} > $val",
},
{
name: "removeVal5",
args: args{
promql: "mem{test1=\"$test1\",test2=\"test2\",test3=\"test3\"} > $val",
},
want: "mem{test2=\"test2\",test3=\"test3\"} > $val",
},
{
name: "removeVal6",
args: args{
promql: "mem{test1=\"test1\",test2=\"$test2\",test3=\"test3\"} > $val",
},
want: "mem{test1=\"test1\",test3=\"test3\"} > $val",
},
{
name: "removeVal7",
args: args{
promql: "mem{test1=\"test1\",test2=\"test2\",test3='$test3'} > $val",
},
want: "mem{test1=\"test1\",test2=\"test2\"} > $val",
},
{
name: "removeVal8",
args: args{
promql: "mem{test1=\"test1\",test2=\"test2\",test3=\"test3\"} > $val",
},
want: "mem{test1=\"test1\",test2=\"test2\",test3=\"test3\"} > $val",
},
{
name: "removeVal9",
args: args{
promql: "mem{test1=\"$test1\",test2=\"test2\"} > $val1 and mem{test3=\"test3\",test4=\"test4\"} > $val2",
},
want: "mem{test2=\"test2\"} > $val1 and mem{test3=\"test3\",test4=\"test4\"} > $val2",
},
{
name: "removeVal10",
args: args{
promql: "mem{test1=\"test1\",test2='$test2'} > $val1 and mem{test3=\"test3\",test4=\"test4\"} > $val2",
},
want: "mem{test1=\"test1\"} > $val1 and mem{test3=\"test3\",test4=\"test4\"} > $val2",
},
{
name: "removeVal11",
args: args{
promql: "mem{test1='test1',test2=\"test2\"} > $val1 and mem{test3=\"$test3\",test4=\"test4\"} > $val2",
},
want: "mem{test1='test1',test2=\"test2\"} > $val1 and mem{test4=\"test4\"} > $val2",
},
{
name: "removeVal12",
args: args{
promql: "mem{test1=\"test1\",test2=\"test2\"} > $val1 and mem{test3=\"test3\",test4=\"$test4\"} > $val2",
},
want: "mem{test1=\"test1\",test2=\"test2\"} > $val1 and mem{test3=\"test3\"} > $val2",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := removeVal(tt.args.promql); got != tt.want {
t.Errorf("removeVal() = %v, want %v", got, tt.want)
}
})
}
}
func TestExtractVarMapping(t *testing.T) {
tests := []struct {
name string
promql string
want map[string]string
}{
{
name: "单个花括号单个变量",
promql: `mem_used_percent{host="$my_host"} > $val`,
want: map[string]string{"my_host": "host"},
},
{
name: "单个花括号多个变量",
promql: `mem_used_percent{host="$my_host",region="$region",env="prod"} > $val`,
want: map[string]string{"my_host": "host", "region": "region"},
},
{
name: "多个花括号多个变量",
promql: `sum(rate(mem_used_percent{host="$my_host"})) by (instance) + avg(node_load1{region="$region"}) > $val`,
want: map[string]string{"my_host": "host", "region": "region"},
},
{
name: "相同变量出现多次",
promql: `sum(rate(mem_used_percent{host="$my_host"})) + avg(node_load1{host="$my_host"}) > $val`,
want: map[string]string{"my_host": "host"},
},
{
name: "没有变量",
promql: `mem_used_percent{host="localhost",region="cn"} > 80`,
want: map[string]string{},
},
{
name: "没有花括号",
promql: `80 > $val`,
want: map[string]string{},
},
{
name: "格式不规范的标签",
promql: `mem_used_percent{host=$my_host,region = $region} > $val`,
want: map[string]string{"my_host": "host", "region": "region"},
},
{
name: "空花括号",
promql: `mem_used_percent{} > $val`,
want: map[string]string{},
},
{
name: "不完整的花括号",
promql: `mem_used_percent{host="$my_host"`,
want: map[string]string{},
},
{
name: "复杂表达式",
promql: `sum(rate(http_requests_total{handler="$handler",code="$code"}[5m])) by (handler) / sum(rate(http_requests_total{handler="$handler"}[5m])) by (handler) * 100 > $threshold`,
want: map[string]string{"handler": "handler", "code": "code"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := ExtractVarMapping(tt.promql)
if !reflect.DeepEqual(got, tt.want) {
t.Errorf("ExtractVarMapping() = %v, want %v", got, tt.want)
}
})
}
}

View File

@@ -114,7 +114,7 @@ func BgNotMatchMuteStrategy(rule *models.AlertRule, event *models.AlertCurEvent,
target, exists := targetCache.Get(ident)
// 对于包含ident的告警事件check一下ident所属bg和rule所属bg是否相同
// 如果告警规则选择了只在本BG生效那其他BG的机器就不能因此规则产生告警
if exists && target.GroupId != rule.GroupId {
if exists && !target.MatchGroupId(rule.GroupId) {
logger.Debugf("[%s] mute: rule_eval:%d cluster:%s", "BgNotMatchMuteStrategy", rule.Id, event.Cluster)
return true
}

View File

@@ -2,6 +2,7 @@ package process
import (
"bytes"
"encoding/json"
"fmt"
"html/template"
"sort"
@@ -21,6 +22,7 @@ import (
"github.com/ccfos/nightingale/v6/pushgw/writer"
"github.com/prometheus/prometheus/prompb"
"github.com/robfig/cron/v3"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/str"
)
@@ -53,10 +55,11 @@ type Processor struct {
datasourceId int64
EngineName string
rule *models.AlertRule
fires *AlertCurEventMap
pendings *AlertCurEventMap
inhibit bool
rule *models.AlertRule
fires *AlertCurEventMap
pendings *AlertCurEventMap
pendingsUseByRecover *AlertCurEventMap
inhibit bool
tagsMap map[string]string
tagsArr []string
@@ -77,6 +80,9 @@ type Processor struct {
HandleFireEventHook HandleEventFunc
HandleRecoverEventHook HandleEventFunc
EventMuteHook EventMuteHookFunc
ScheduleEntry cron.Entry
PromEvalInterval int
}
func (p *Processor) Key() string {
@@ -88,9 +94,9 @@ func (p *Processor) DatasourceId() int64 {
}
func (p *Processor) Hash() string {
return str.MD5(fmt.Sprintf("%d_%d_%s_%d",
return str.MD5(fmt.Sprintf("%d_%s_%s_%d",
p.rule.Id,
p.rule.PromEvalInterval,
p.rule.CronPattern,
p.rule.RuleConfig,
p.datasourceId,
))
@@ -125,7 +131,7 @@ func NewProcessor(engineName string, rule *models.AlertRule, datasourceId int64,
return p
}
func (p *Processor) Handle(anomalyPoints []common.AnomalyPoint, from string, inhibit bool) {
func (p *Processor) Handle(anomalyPoints []models.AnomalyPoint, from string, inhibit bool) {
// 有可能rule的一些配置已经发生变化比如告警接收人、callbacks等
// 这些信息的修改是不会引起worker restart的但是确实会影响告警处理逻辑
// 所以这里直接从memsto.AlertRuleCache中获取并覆盖
@@ -133,10 +139,13 @@ func (p *Processor) Handle(anomalyPoints []common.AnomalyPoint, from string, inh
cachedRule := p.alertRuleCache.Get(p.rule.Id)
if cachedRule == nil {
logger.Errorf("rule not found %+v", anomalyPoints)
p.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", p.DatasourceId()), "handle_event").Inc()
p.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", p.DatasourceId()), "handle_event", p.BusiGroupCache.GetNameByBusiGroupId(p.rule.GroupId), fmt.Sprintf("%v", p.rule.Id)).Inc()
return
}
// 在 rule 变化之前取到 ruleHash
ruleHash := p.rule.Hash()
p.rule = cachedRule
now := time.Now().Unix()
alertingKeys := map[string]struct{}{}
@@ -144,7 +153,7 @@ func (p *Processor) Handle(anomalyPoints []common.AnomalyPoint, from string, inh
// 根据 event 的 tag 将 events 分组,处理告警抑制的情况
eventsMap := make(map[string][]*models.AlertCurEvent)
for _, anomalyPoint := range anomalyPoints {
event := p.BuildEvent(anomalyPoint, from, now)
event := p.BuildEvent(anomalyPoint, from, now, ruleHash)
// 如果 event 被 mute 了,本质也是 fire 的状态,这里无论如何都添加到 alertingKeys 中,防止 fire 的事件自动恢复了
hash := event.Hash
alertingKeys[hash] = struct{}{}
@@ -169,10 +178,12 @@ func (p *Processor) Handle(anomalyPoints []common.AnomalyPoint, from string, inh
p.handleEvent(events)
}
p.HandleRecover(alertingKeys, now, inhibit)
if from == "inner" {
p.HandleRecover(alertingKeys, now, inhibit)
}
}
func (p *Processor) BuildEvent(anomalyPoint common.AnomalyPoint, from string, now int64) *models.AlertCurEvent {
func (p *Processor) BuildEvent(anomalyPoint models.AnomalyPoint, from string, now int64, ruleHash string) *models.AlertCurEvent {
p.fillTags(anomalyPoint)
p.mayHandleIdent()
hash := Hash(p.rule.Id, p.datasourceId, anomalyPoint)
@@ -198,18 +209,43 @@ func (p *Processor) BuildEvent(anomalyPoint common.AnomalyPoint, from string, no
event.TargetNote = p.targetNote
event.TriggerValue = anomalyPoint.ReadableValue()
event.TriggerValues = anomalyPoint.Values
event.TriggerValuesJson = models.EventTriggerValues{ValuesWithUnit: anomalyPoint.ValuesUnit}
event.TagsJSON = p.tagsArr
event.Tags = strings.Join(p.tagsArr, ",,")
event.IsRecovered = false
event.Callbacks = p.rule.Callbacks
event.CallbacksJSON = p.rule.CallbacksJSON
event.Annotations = p.rule.Annotations
event.AnnotationsJSON = make(map[string]string)
event.RuleConfig = p.rule.RuleConfig
event.RuleConfigJson = p.rule.RuleConfigJson
event.Severity = anomalyPoint.Severity
event.ExtraConfig = p.rule.ExtraConfigJSON
event.PromQl = anomalyPoint.Query
event.RecoverConfig = anomalyPoint.RecoverConfig
event.RuleHash = ruleHash
if anomalyPoint.TriggerType == models.TriggerTypeNodata {
event.TriggerValue = "nodata"
ruleConfig := models.RuleQuery{}
json.Unmarshal([]byte(p.rule.RuleConfig), &ruleConfig)
ruleConfig.TriggerType = anomalyPoint.TriggerType
b, _ := json.Marshal(ruleConfig)
event.RuleConfig = string(b)
}
if err := json.Unmarshal([]byte(p.rule.Annotations), &event.AnnotationsJSON); err != nil {
event.AnnotationsJSON = make(map[string]string) // 解析失败时使用空 map
logger.Warningf("unmarshal annotations json failed: %v, rule: %d", err, p.rule.Id)
}
if p.target != "" {
if pt, exist := p.TargetCache.Get(p.target); exist {
pt.GroupNames = p.BusiGroupCache.GetNamesByBusiGroupIds(pt.GroupIds)
event.Target = pt
} else {
logger.Infof("Target[ident: %s] doesn't exist in cache.", p.target)
}
}
if event.TriggerValues != "" && strings.Count(event.TriggerValues, "$") > 1 {
// TriggerValues 有多个变量,将多个变量都放到 TriggerValue 中
@@ -232,17 +268,17 @@ func Relabel(rule *models.AlertRule, event *models.AlertCurEvent) {
return
}
if len(rule.EventRelabelConfig) == 0 {
return
}
// need to keep the original label
event.OriginalTags = event.Tags
event.OriginalTagsJSON = make([]string, len(event.TagsJSON))
labels := make([]prompb.Label, len(event.TagsJSON))
for i, tag := range event.TagsJSON {
label := strings.Split(tag, "=")
if len(label) != 2 {
logger.Errorf("event%+v relabel: the label length is not 2:%v", event, label)
continue
}
label := strings.SplitN(tag, "=", 2)
event.OriginalTagsJSON[i] = tag
labels[i] = prompb.Label{Name: label[0], Value: label[1]}
}
@@ -281,7 +317,7 @@ func (p *Processor) HandleRecover(alertingKeys map[string]struct{}, now int64, i
}
hashArr := make([]string, 0, len(alertingKeys))
for hash := range p.fires.GetAll() {
for hash, _ := range p.fires.GetAll() {
if _, has := alertingKeys[hash]; has {
continue
}
@@ -300,7 +336,7 @@ func (p *Processor) HandleRecoverEvent(hashArr []string, now int64, inhibit bool
if !inhibit {
for _, hash := range hashArr {
p.RecoverSingle(hash, now, nil)
p.RecoverSingle(false, hash, now, nil)
}
return
}
@@ -328,11 +364,11 @@ func (p *Processor) HandleRecoverEvent(hashArr []string, now int64, inhibit bool
}
for _, event := range eventMap {
p.RecoverSingle(event.Hash, now, nil)
p.RecoverSingle(false, event.Hash, now, nil)
}
}
func (p *Processor) RecoverSingle(hash string, now int64, value *string, values ...string) {
func (p *Processor) RecoverSingle(byRecover bool, hash string, now int64, value *string, values ...string) {
cachedRule := p.rule
if cachedRule == nil {
return
@@ -342,11 +378,28 @@ func (p *Processor) RecoverSingle(hash string, now int64, value *string, values
if !has {
return
}
// 如果配置了留观时长,就不能立马恢复了
if cachedRule.RecoverDuration > 0 && now-event.LastEvalTime < cachedRule.RecoverDuration {
if cachedRule.RecoverDuration > 0 {
lastPendingEvent, has := p.pendingsUseByRecover.Get(hash)
if !has {
// 说明没有产生过异常点,就不需要恢复了
logger.Debugf("rule_eval:%s event:%v do not has pending event, not recover", p.Key(), event)
return
}
if now-lastPendingEvent.LastEvalTime < cachedRule.RecoverDuration {
logger.Debugf("rule_eval:%s event:%v not recover", p.Key(), event)
return
}
}
// 如果设置了恢复条件,则不能在此处恢复,必须依靠 recoverPoint 来恢复
if event.RecoverConfig.JudgeType != models.Origin && !byRecover {
logger.Debugf("rule_eval:%s event:%v not recover", p.Key(), event)
return
}
if value != nil {
event.TriggerValue = *value
if len(values) > 0 {
@@ -358,6 +411,7 @@ func (p *Processor) RecoverSingle(hash string, now int64, value *string, values
// 我确实无法分辨是prom中有值但是未满足阈值所以没返回还是prom中确实丢了一些点导致没有数据可以返回尴尬
p.fires.Delete(hash)
p.pendings.Delete(hash)
p.pendingsUseByRecover.Delete(hash)
// 可能是因为调整了promql才恢复的所以事件里边要体现最新的promql否则用户会比较困惑
// 当然其实rule的各个字段都可能发生变化了都更新一下吧
@@ -377,6 +431,14 @@ func (p *Processor) handleEvent(events []*models.AlertCurEvent) {
if event == nil {
continue
}
if _, has := p.pendingsUseByRecover.Get(event.Hash); has {
p.pendingsUseByRecover.UpdateLastEvalTime(event.Hash, event.LastEvalTime)
} else {
p.pendingsUseByRecover.Set(event.Hash, event)
}
event.PromEvalInterval = p.PromEvalInterval
if p.rule.PromForDuration == 0 {
fireEvents = append(fireEvents, event)
if severity > event.Severity {
@@ -385,7 +447,7 @@ func (p *Processor) handleEvent(events []*models.AlertCurEvent) {
continue
}
var preTriggerTime int64
var preTriggerTime int64 // 第一个 pending event 的触发时间
preEvent, has := p.pendings.Get(event.Hash)
if has {
p.pendings.UpdateLastEvalTime(event.Hash, event.LastEvalTime)
@@ -471,22 +533,24 @@ func (p *Processor) pushEventToQueue(e *models.AlertCurEvent) {
dispatch.LogEvent(e, "push_queue")
if !queue.EventQueue.PushFront(e) {
logger.Warningf("event_push_queue: queue is full, event:%+v", e)
p.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", p.DatasourceId()), "push_event_queue").Inc()
p.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", p.DatasourceId()), "push_event_queue", p.BusiGroupCache.GetNameByBusiGroupId(p.rule.GroupId), fmt.Sprintf("%v", p.rule.Id)).Inc()
}
}
func (p *Processor) RecoverAlertCurEventFromDb() {
p.pendings = NewAlertCurEventMap(nil)
p.pendingsUseByRecover = NewAlertCurEventMap(nil)
curEvents, err := models.AlertCurEventGetByRuleIdAndDsId(p.ctx, p.rule.Id, p.datasourceId)
if err != nil {
logger.Errorf("recover event from db for rule:%s failed, err:%s", p.Key(), err)
p.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", p.DatasourceId()), "get_recover_event").Inc()
p.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", p.DatasourceId()), "get_recover_event", p.BusiGroupCache.GetNameByBusiGroupId(p.rule.GroupId), fmt.Sprintf("%v", p.rule.Id)).Inc()
p.fires = NewAlertCurEventMap(nil)
return
}
fireMap := make(map[string]*models.AlertCurEvent)
pendingsUseByRecoverMap := make(map[string]*models.AlertCurEvent)
for _, event := range curEvents {
if event.Cate == models.HOST {
target, exists := p.TargetCache.Get(event.TargetIdent)
@@ -497,13 +561,24 @@ func (p *Processor) RecoverAlertCurEventFromDb() {
}
event.DB2Mem()
target, exists := p.TargetCache.Get(event.TargetIdent)
if exists {
target.GroupNames = p.BusiGroupCache.GetNamesByBusiGroupIds(target.GroupIds)
event.Target = target
}
fireMap[event.Hash] = event
e := *event
pendingsUseByRecoverMap[event.Hash] = &e
}
p.fires = NewAlertCurEventMap(fireMap)
// 修改告警规则,或者进程重启之后,需要重新加载 pendingsUseByRecover
p.pendingsUseByRecover = NewAlertCurEventMap(pendingsUseByRecoverMap)
}
func (p *Processor) fillTags(anomalyPoint common.AnomalyPoint) {
func (p *Processor) fillTags(anomalyPoint models.AnomalyPoint) {
// handle series tags
tagsMap := make(map[string]string)
for label, value := range anomalyPoint.Labels {
@@ -576,6 +651,7 @@ func (p *Processor) mayHandleGroup() {
func (p *Processor) DeleteProcessEvent(hash string) {
p.fires.Delete(hash)
p.pendings.Delete(hash)
p.pendingsUseByRecover.Delete(hash)
}
func labelMapToArr(m map[string]string) []string {
@@ -592,10 +668,10 @@ func labelMapToArr(m map[string]string) []string {
return labelStrings
}
func Hash(ruleId, datasourceId int64, vector common.AnomalyPoint) string {
func Hash(ruleId, datasourceId int64, vector models.AnomalyPoint) string {
return str.MD5(fmt.Sprintf("%d_%s_%d_%d_%s", ruleId, vector.Labels.String(), datasourceId, vector.Severity, vector.Query))
}
func TagHash(vector common.AnomalyPoint) string {
func TagHash(vector models.AnomalyPoint) string {
return str.MD5(vector.Labels.String())
}

View File

@@ -56,11 +56,12 @@ func (rrc *RecordRuleContext) Key() string {
}
func (rrc *RecordRuleContext) Hash() string {
return str.MD5(fmt.Sprintf("%d_%s_%s_%d",
return str.MD5(fmt.Sprintf("%d_%s_%s_%d_%s",
rrc.rule.Id,
rrc.rule.CronPattern,
rrc.rule.PromQl,
rrc.datasourceId,
rrc.rule.AppendTags,
))
}

View File

@@ -26,9 +26,11 @@ type Scheduler struct {
writers *writer.WritersType
stats *astats.Stats
datasourceCache *memsto.DatasourceCacheType
}
func NewScheduler(aconf aconf.Alert, rrc *memsto.RecordingRuleCacheType, promClients *prom.PromClientMap, writers *writer.WritersType, stats *astats.Stats) *Scheduler {
func NewScheduler(aconf aconf.Alert, rrc *memsto.RecordingRuleCacheType, promClients *prom.PromClientMap, writers *writer.WritersType, stats *astats.Stats, datasourceCache *memsto.DatasourceCacheType) *Scheduler {
scheduler := &Scheduler{
aconf: aconf,
recordRules: make(map[string]*RecordRuleContext),
@@ -39,6 +41,8 @@ func NewScheduler(aconf aconf.Alert, rrc *memsto.RecordingRuleCacheType, promCli
writers: writers,
stats: stats,
datasourceCache: datasourceCache,
}
go scheduler.LoopSyncRules(context.Background())
@@ -67,7 +71,7 @@ func (s *Scheduler) syncRecordRules() {
continue
}
datasourceIds := s.promClients.Hit(rule.DatasourceIdsJson)
datasourceIds := s.datasourceCache.GetIDsByDsCateAndQueries("prometheus", rule.DatasourceQueries)
for _, dsId := range datasourceIds {
if !naming.DatasourceHashRing.IsHit(strconv.FormatInt(dsId, 10), fmt.Sprintf("%d", rule.Id), s.aconf.Heartbeat.Endpoint) {
continue

View File

@@ -6,7 +6,6 @@ import (
"strings"
"time"
"github.com/ccfos/nightingale/v6/alert/common"
"github.com/ccfos/nightingale/v6/alert/dispatch"
"github.com/ccfos/nightingale/v6/alert/mute"
"github.com/ccfos/nightingale/v6/alert/naming"
@@ -34,7 +33,7 @@ func (rt *Router) pushEventToQueue(c *gin.Context) {
continue
}
arr := strings.Split(pair, "=")
arr := strings.SplitN(pair, "=", 2)
if len(arr) != 2 {
continue
}
@@ -92,7 +91,7 @@ func (rt *Router) eventPersist(c *gin.Context) {
type eventForm struct {
Alert bool `json:"alert"`
AnomalyPoints []common.AnomalyPoint `json:"vectors"`
AnomalyPoints []models.AnomalyPoint `json:"vectors"`
RuleId int64 `json:"rule_id"`
DatasourceId int64 `json:"datasource_id"`
Inhibit bool `json:"inhibit"`
@@ -129,7 +128,7 @@ func (rt *Router) makeEvent(c *gin.Context) {
} else {
for _, vector := range events[i].AnomalyPoints {
readableString := vector.ReadableValue()
go ruleWorker.RecoverSingle(process.Hash(events[i].RuleId, events[i].DatasourceId, vector), vector.Timestamp, &readableString)
go ruleWorker.RecoverSingle(false, process.Hash(events[i].RuleId, events[i].DatasourceId, vector), vector.Timestamp, &readableString)
}
}
}

View File

@@ -29,13 +29,14 @@ type (
Rule *models.AlertRule
Events []*models.AlertCurEvent
Stats *astats.Stats
BatchSend bool
}
DefaultCallBacker struct{}
)
func BuildCallBackContext(ctx *ctx.Context, callBackURL string, rule *models.AlertRule, events []*models.AlertCurEvent,
uids []int64, userCache *memsto.UserCacheType, stats *astats.Stats) CallBackContext {
uids []int64, userCache *memsto.UserCacheType, batchSend bool, stats *astats.Stats) CallBackContext {
users := userCache.GetByUserIds(uids)
newCallBackUrl, _ := events[0].ParseURL(callBackURL)
@@ -45,6 +46,7 @@ func BuildCallBackContext(ctx *ctx.Context, callBackURL string, rule *models.Ale
Rule: rule,
Events: events,
Users: users,
BatchSend: batchSend,
Stats: stats,
}
}
@@ -112,31 +114,96 @@ func (c *DefaultCallBacker) CallBack(ctx CallBackContext) {
event := ctx.Events[0]
ctx.Stats.AlertNotifyTotal.WithLabelValues("rule_callback").Inc()
resp, code, err := poster.PostJSON(ctx.CallBackURL, 5*time.Second, event, 3)
if err != nil {
logger.Errorf("event_callback_fail(rule_id=%d url=%s), event:%+v, resp: %s, err: %v, code: %d",
event.RuleId, ctx.CallBackURL, event, string(resp), err, code)
ctx.Stats.AlertNotifyErrorTotal.WithLabelValues("rule_callback").Inc()
} else {
logger.Infof("event_callback_succ(rule_id=%d url=%s), event:%+v, resp: %s, code: %d",
event.RuleId, ctx.CallBackURL, event, string(resp), code)
if ctx.BatchSend {
webhookConf := &models.Webhook{
Type: models.RuleCallback,
Enable: true,
Url: ctx.CallBackURL,
Timeout: 5,
RetryCount: 3,
RetryInterval: 10,
Batch: 1000,
}
PushCallbackEvent(ctx.Ctx, webhookConf, event, ctx.Stats)
return
}
doSendAndRecord(ctx.Ctx, ctx.CallBackURL, ctx.CallBackURL, event, "callback", ctx.Stats, ctx.Events)
}
func doSendAndRecord(ctx *ctx.Context, url, token string, body interface{}, channel string,
stats *astats.Stats, events []*models.AlertCurEvent) {
res, err := doSend(url, body, channel, stats)
NotifyRecord(ctx, events, channel, token, res, err)
}
func NotifyRecord(ctx *ctx.Context, evts []*models.AlertCurEvent, channel, target, res string, err error) {
// 一个通知可能对应多个 event都需要记录
notis := make([]*models.NotificaitonRecord, 0, len(evts))
for _, evt := range evts {
noti := models.NewNotificationRecord(evt, channel, target)
if err != nil {
noti.SetStatus(models.NotiStatusFailure)
noti.SetDetails(err.Error())
} else if res != "" {
noti.SetDetails(string(res))
}
notis = append(notis, noti)
}
if !ctx.IsCenter {
_, err := poster.PostByUrlsWithResp[[]int64](ctx, "/v1/n9e/notify-record", notis)
if err != nil {
logger.Errorf("add notis:%v failed, err: %v", notis, err)
}
return
}
if err := models.DB(ctx).CreateInBatches(notis, 100).Error; err != nil {
logger.Errorf("add notis:%v failed, err: %v", notis, err)
}
}
func doSend(url string, body interface{}, channel string, stats *astats.Stats) {
func doSend(url string, body interface{}, channel string, stats *astats.Stats) (string, error) {
stats.AlertNotifyTotal.WithLabelValues(channel).Inc()
res, code, err := poster.PostJSON(url, time.Second*5, body, 3)
if err != nil {
logger.Errorf("%s_sender: result=fail url=%s code=%d error=%v req:%v response=%s", channel, url, code, err, body, string(res))
stats.AlertNotifyErrorTotal.WithLabelValues(channel).Inc()
} else {
logger.Infof("%s_sender: result=succ url=%s code=%d req:%v response=%s", channel, url, code, body, string(res))
return "", err
}
logger.Infof("%s_sender: result=succ url=%s code=%d req:%v response=%s", channel, url, code, body, string(res))
return string(res), nil
}
type TaskCreateReply struct {
Err string `json:"err"`
Dat int64 `json:"dat"` // task.id
}
func PushCallbackEvent(ctx *ctx.Context, webhook *models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
CallbackEventQueueLock.RLock()
queue := CallbackEventQueue[webhook.Url]
CallbackEventQueueLock.RUnlock()
if queue == nil {
queue = &WebhookQueue{
eventQueue: NewSafeEventQueue(QueueMaxSize),
closeCh: make(chan struct{}),
}
CallbackEventQueueLock.Lock()
CallbackEventQueue[webhook.Url] = queue
CallbackEventQueueLock.Unlock()
StartConsumer(ctx, queue, webhook.Batch, webhook, stats)
}
succ := queue.eventQueue.Push(event)
if !succ {
logger.Warningf("Write channel(%s) full, current channel size: %d event:%v", webhook.Url, queue.eventQueue.Len(), event)
}
}

View File

@@ -1,9 +1,10 @@
package sender
import (
"github.com/ccfos/nightingale/v6/models"
"html/template"
"strings"
"github.com/ccfos/nightingale/v6/models"
)
type dingtalkMarkdown struct {
@@ -35,13 +36,13 @@ func (ds *DingtalkSender) Send(ctx MessageContext) {
return
}
urls, ats := ds.extract(ctx.Users)
urls, ats, tokens := ds.extract(ctx.Users)
if len(urls) == 0 {
return
}
message := BuildTplMessage(models.Dingtalk, ds.tpl, ctx.Events)
for _, url := range urls {
for i, url := range urls {
var body dingtalk
// NoAt in url
if strings.Contains(url, "noat=1") {
@@ -66,7 +67,7 @@ func (ds *DingtalkSender) Send(ctx MessageContext) {
}
}
doSend(url, body, models.Dingtalk, ctx.Stats)
doSendAndRecord(ctx.Ctx, url, tokens[i], body, models.Dingtalk, ctx.Stats, ctx.Events)
}
}
@@ -96,15 +97,14 @@ func (ds *DingtalkSender) CallBack(ctx CallBackContext) {
body.Markdown.Text = message
}
doSend(ctx.CallBackURL, body, models.Dingtalk, ctx.Stats)
ctx.Stats.AlertNotifyTotal.WithLabelValues("rule_callback").Inc()
doSendAndRecord(ctx.Ctx, ctx.CallBackURL, ctx.CallBackURL, body, "callback", ctx.Stats, ctx.Events)
}
// extract urls and ats from Users
func (ds *DingtalkSender) extract(users []*models.User) ([]string, []string) {
func (ds *DingtalkSender) extract(users []*models.User) ([]string, []string, []string) {
urls := make([]string, 0, len(users))
ats := make([]string, 0, len(users))
tokens := make([]string, 0, len(users))
for _, user := range users {
if user.Phone != "" {
@@ -116,7 +116,8 @@ func (ds *DingtalkSender) extract(users []*models.User) ([]string, []string) {
url = "https://oapi.dingtalk.com/robot/send?access_token=" + token
}
urls = append(urls, url)
tokens = append(tokens, token)
}
}
return urls, ats
return urls, ats, tokens
}

View File

@@ -9,13 +9,14 @@ import (
"github.com/ccfos/nightingale/v6/alert/aconf"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/toolkits/pkg/logger"
"gopkg.in/gomail.v2"
)
var mailch chan *gomail.Message
var mailch chan *EmailContext
type EmailSender struct {
subjectTpl *template.Template
@@ -23,6 +24,11 @@ type EmailSender struct {
smtp aconf.SMTPConfig
}
type EmailContext struct {
events []*models.AlertCurEvent
mail *gomail.Message
}
func (es *EmailSender) Send(ctx MessageContext) {
if len(ctx.Users) == 0 || len(ctx.Events) == 0 {
return
@@ -36,7 +42,7 @@ func (es *EmailSender) Send(ctx MessageContext) {
subject = ctx.Events[0].RuleName
}
content := BuildTplMessage(models.Email, es.contentTpl, ctx.Events)
es.WriteEmail(subject, content, tos)
es.WriteEmail(subject, content, tos, ctx.Events)
ctx.Stats.AlertNotifyTotal.WithLabelValues(models.Email).Add(float64(len(tos)))
}
@@ -73,7 +79,7 @@ func SendEmail(subject, content string, tos []string, stmp aconf.SMTPConfig) err
return nil
}
func (es *EmailSender) WriteEmail(subject, content string, tos []string) {
func (es *EmailSender) WriteEmail(subject, content string, tos []string, events []*models.AlertCurEvent) {
m := gomail.NewMessage()
m.SetHeader("From", es.smtp.From)
@@ -81,7 +87,7 @@ func (es *EmailSender) WriteEmail(subject, content string, tos []string) {
m.SetHeader("Subject", subject)
m.SetBody("text/html", content)
mailch <- m
mailch <- &EmailContext{events, m}
}
func dialSmtp(d *gomail.Dialer) gomail.SendCloser {
@@ -104,22 +110,22 @@ func dialSmtp(d *gomail.Dialer) gomail.SendCloser {
var mailQuit = make(chan struct{})
func RestartEmailSender(smtp aconf.SMTPConfig) {
func RestartEmailSender(ctx *ctx.Context, smtp aconf.SMTPConfig) {
// Notify internal start exit
mailQuit <- struct{}{}
startEmailSender(smtp)
startEmailSender(ctx, smtp)
}
var smtpConfig aconf.SMTPConfig
func InitEmailSender(ncc *memsto.NotifyConfigCacheType) {
mailch = make(chan *gomail.Message, 100000)
go updateSmtp(ncc)
func InitEmailSender(ctx *ctx.Context, ncc *memsto.NotifyConfigCacheType) {
mailch = make(chan *EmailContext, 100000)
go updateSmtp(ctx, ncc)
smtpConfig = ncc.GetSMTP()
startEmailSender(smtpConfig)
go startEmailSender(ctx, smtpConfig)
}
func updateSmtp(ncc *memsto.NotifyConfigCacheType) {
func updateSmtp(ctx *ctx.Context, ncc *memsto.NotifyConfigCacheType) {
for {
time.Sleep(1 * time.Minute)
smtp := ncc.GetSMTP()
@@ -127,15 +133,16 @@ func updateSmtp(ncc *memsto.NotifyConfigCacheType) {
smtpConfig.Pass != smtp.Pass || smtpConfig.User != smtp.User || smtpConfig.Port != smtp.Port ||
smtpConfig.InsecureSkipVerify != smtp.InsecureSkipVerify { //diff
smtpConfig = smtp
RestartEmailSender(smtp)
RestartEmailSender(ctx, smtp)
}
}
}
func startEmailSender(smtp aconf.SMTPConfig) {
func startEmailSender(ctx *ctx.Context, smtp aconf.SMTPConfig) {
conf := smtp
if conf.Host == "" || conf.Port == 0 {
logger.Warning("SMTP configurations invalid")
<-mailQuit
return
}
logger.Infof("start email sender... conf.Host:%+v,conf.Port:%+v", conf.Host, conf.Port)
@@ -167,7 +174,8 @@ func startEmailSender(smtp aconf.SMTPConfig) {
}
open = true
}
if err := gomail.Send(s, m); err != nil {
var err error
if err = gomail.Send(s, m.mail); err != nil {
logger.Errorf("email_sender: failed to send: %s", err)
// close and retry
@@ -184,11 +192,20 @@ func startEmailSender(smtp aconf.SMTPConfig) {
}
open = true
if err := gomail.Send(s, m); err != nil {
if err = gomail.Send(s, m.mail); err != nil {
logger.Errorf("email_sender: failed to retry send: %s", err)
}
} else {
logger.Infof("email_sender: result=succ subject=%v to=%v", m.GetHeader("Subject"), m.GetHeader("To"))
logger.Infof("email_sender: result=succ subject=%v to=%v",
m.mail.GetHeader("Subject"), m.mail.GetHeader("To"))
}
for _, to := range m.mail.GetHeader("To") {
msg := ""
if err == nil {
msg = "ok"
}
NotifyRecord(ctx, m.events, models.Email, to, msg, err)
}
size++

View File

@@ -54,17 +54,16 @@ func (fs *FeishuSender) CallBack(ctx CallBackContext) {
},
}
doSend(ctx.CallBackURL, body, models.Feishu, ctx.Stats)
ctx.Stats.AlertNotifyTotal.WithLabelValues("rule_callback").Inc()
doSendAndRecord(ctx.Ctx, ctx.CallBackURL, ctx.CallBackURL, body, "callback", ctx.Stats, ctx.Events)
}
func (fs *FeishuSender) Send(ctx MessageContext) {
if len(ctx.Users) == 0 || len(ctx.Events) == 0 {
return
}
urls, ats := fs.extract(ctx.Users)
urls, ats, tokens := fs.extract(ctx.Users)
message := BuildTplMessage(models.Feishu, fs.tpl, ctx.Events)
for _, url := range urls {
for i, url := range urls {
body := feishu{
Msgtype: "text",
Content: feishuContent{
@@ -77,13 +76,14 @@ func (fs *FeishuSender) Send(ctx MessageContext) {
IsAtAll: false,
}
}
doSend(url, body, models.Feishu, ctx.Stats)
doSendAndRecord(ctx.Ctx, url, tokens[i], body, models.Feishu, ctx.Stats, ctx.Events)
}
}
func (fs *FeishuSender) extract(users []*models.User) ([]string, []string) {
func (fs *FeishuSender) extract(users []*models.User) ([]string, []string, []string) {
urls := make([]string, 0, len(users))
ats := make([]string, 0, len(users))
tokens := make([]string, 0, len(users))
for _, user := range users {
if user.Phone != "" {
@@ -95,7 +95,8 @@ func (fs *FeishuSender) extract(users []*models.User) ([]string, []string) {
url = "https://open.feishu.cn/open-apis/bot/v2/hook/" + token
}
urls = append(urls, url)
tokens = append(tokens, token)
}
}
return urls, ats
return urls, ats, tokens
}

View File

@@ -56,8 +56,8 @@ const (
Triggered = "triggered"
)
var (
body = feishuCard{
func createFeishuCardBody() feishuCard {
return feishuCard{
feishu: feishu{Msgtype: "interactive"},
Card: Cards{
Config: Conf{
@@ -90,7 +90,7 @@ var (
},
},
}
)
}
func (fs *FeishuCardSender) CallBack(ctx CallBackContext) {
if len(ctx.Events) == 0 || len(ctx.CallBackURL) == 0 {
@@ -121,6 +121,7 @@ func (fs *FeishuCardSender) CallBack(ctx CallBackContext) {
}
SendTitle := fmt.Sprintf("🔔 %s", ctx.Events[0].RuleName)
body := createFeishuCardBody()
body.Card.Header.Title.Content = SendTitle
body.Card.Header.Template = color
body.Card.Elements[0].Text.Content = message
@@ -134,14 +135,14 @@ func (fs *FeishuCardSender) CallBack(ctx CallBackContext) {
}
parsedURL.RawQuery = ""
doSend(parsedURL.String(), body, models.FeishuCard, ctx.Stats)
doSendAndRecord(ctx.Ctx, parsedURL.String(), parsedURL.String(), body, "callback", ctx.Stats, ctx.Events)
}
func (fs *FeishuCardSender) Send(ctx MessageContext) {
if len(ctx.Users) == 0 || len(ctx.Events) == 0 {
return
}
urls, _ := fs.extract(ctx.Users)
urls, tokens := fs.extract(ctx.Users)
message := BuildTplMessage(models.FeishuCard, fs.tpl, ctx.Events)
color := "red"
lowerUnicode := strings.ToLower(message)
@@ -152,18 +153,19 @@ func (fs *FeishuCardSender) Send(ctx MessageContext) {
}
SendTitle := fmt.Sprintf("🔔 %s", ctx.Events[0].RuleName)
body := createFeishuCardBody()
body.Card.Header.Title.Content = SendTitle
body.Card.Header.Template = color
body.Card.Elements[0].Text.Content = message
body.Card.Elements[2].Elements[0].Content = SendTitle
for _, url := range urls {
doSend(url, body, models.FeishuCard, ctx.Stats)
for i, url := range urls {
doSendAndRecord(ctx.Ctx, url, tokens[i], body, models.FeishuCard, ctx.Stats, ctx.Events)
}
}
func (fs *FeishuCardSender) extract(users []*models.User) ([]string, []string) {
urls := make([]string, 0, len(users))
ats := make([]string, 0)
tokens := make([]string, 0, len(users))
for i := range users {
if token, has := users[i].ExtractToken(models.FeishuCard); has {
url := token
@@ -171,7 +173,8 @@ func (fs *FeishuCardSender) extract(users []*models.User) ([]string, []string) {
url = "https://open.feishu.cn/open-apis/bot/v2/hook/" + strings.TrimSpace(token)
}
urls = append(urls, url)
tokens = append(tokens, token)
}
}
return urls, ats
return urls, tokens
}

View File

@@ -77,15 +77,20 @@ func (c *IbexCallBacker) handleIbex(ctx *ctx.Context, url string, event *models.
return
}
tpl := c.taskTplCache.Get(id)
CallIbex(ctx, id, host, c.taskTplCache, c.targetCache, c.userCache, event)
}
func CallIbex(ctx *ctx.Context, id int64, host string,
taskTplCache *memsto.TaskTplCache, targetCache *memsto.TargetCacheType,
userCache *memsto.UserCacheType, event *models.AlertCurEvent) {
tpl := taskTplCache.Get(id)
if tpl == nil {
logger.Errorf("event_callback_ibex: no such tpl(%d)", id)
return
}
// check perm
// tpl.GroupId - host - account 三元组校验权限
can, err := canDoIbex(tpl.UpdateBy, tpl, host, c.targetCache, c.userCache)
can, err := canDoIbex(tpl.UpdateBy, tpl, host, targetCache, userCache)
if err != nil {
logger.Errorf("event_callback_ibex: check perm fail: %v", err)
return
@@ -103,7 +108,7 @@ func (c *IbexCallBacker) handleIbex(ctx *ctx.Context, url string, event *models.
continue
}
arr := strings.Split(pair, "=")
arr := strings.SplitN(pair, "=", 2)
if len(arr) != 2 {
continue
}
@@ -113,6 +118,7 @@ func (c *IbexCallBacker) handleIbex(ctx *ctx.Context, url string, event *models.
// 附加告警级别 告警触发值标签
tagsMap["alert_severity"] = strconv.Itoa(event.Severity)
tagsMap["alert_trigger_value"] = event.TriggerValue
tagsMap["is_recovered"] = strconv.FormatBool(event.IsRecovered)
tags, err := json.Marshal(tagsMap)
if err != nil {
@@ -176,10 +182,15 @@ func canDoIbex(username string, tpl *models.TaskTpl, host string, targetCache *m
return false, nil
}
return target.GroupId == tpl.GroupId, nil
return target.MatchGroupId(tpl.GroupId), nil
}
func TaskAdd(f models.TaskForm, authUser string, isCenter bool) (int64, error) {
if storage.Cache == nil {
logger.Warning("event_callback_ibex: redis cache is nil")
return 0, fmt.Errorf("redis cache is nil")
}
hosts := cleanHosts(f.Hosts)
if len(hosts) == 0 {
return 0, fmt.Errorf("arg(hosts) empty")

View File

@@ -27,29 +27,29 @@ func (lk *LarkSender) CallBack(ctx CallBackContext) {
},
}
doSend(ctx.CallBackURL, body, models.Lark, ctx.Stats)
ctx.Stats.AlertNotifyTotal.WithLabelValues("rule_callback").Inc()
doSendAndRecord(ctx.Ctx, ctx.CallBackURL, ctx.CallBackURL, body, "callback", ctx.Stats, ctx.Events)
}
func (lk *LarkSender) Send(ctx MessageContext) {
if len(ctx.Users) == 0 || len(ctx.Events) == 0 {
return
}
urls := lk.extract(ctx.Users)
urls, tokens := lk.extract(ctx.Users)
message := BuildTplMessage(models.Lark, lk.tpl, ctx.Events)
for _, url := range urls {
for i, url := range urls {
body := feishu{
Msgtype: "text",
Content: feishuContent{
Text: message,
},
}
doSend(url, body, models.Lark, ctx.Stats)
doSendAndRecord(ctx.Ctx, url, tokens[i], body, models.Lark, ctx.Stats, ctx.Events)
}
}
func (lk *LarkSender) extract(users []*models.User) []string {
func (lk *LarkSender) extract(users []*models.User) ([]string, []string) {
urls := make([]string, 0, len(users))
tokens := make([]string, 0, len(users))
for _, user := range users {
if token, has := user.ExtractToken(models.Lark); has {
@@ -58,7 +58,8 @@ func (lk *LarkSender) extract(users []*models.User) []string {
url = "https://open.larksuite.com/open-apis/bot/v2/hook/" + token
}
urls = append(urls, url)
tokens = append(tokens, token)
}
}
return urls
return urls, tokens
}

View File

@@ -42,6 +42,7 @@ func (fs *LarkCardSender) CallBack(ctx CallBackContext) {
}
SendTitle := fmt.Sprintf("🔔 %s", ctx.Events[0].RuleName)
body := createFeishuCardBody()
body.Card.Header.Title.Content = SendTitle
body.Card.Header.Template = color
body.Card.Elements[0].Text.Content = message
@@ -55,14 +56,14 @@ func (fs *LarkCardSender) CallBack(ctx CallBackContext) {
}
parsedURL.RawQuery = ""
doSend(parsedURL.String(), body, models.LarkCard, ctx.Stats)
doSendAndRecord(ctx.Ctx, ctx.CallBackURL, ctx.CallBackURL, body, "callback", ctx.Stats, ctx.Events)
}
func (fs *LarkCardSender) Send(ctx MessageContext) {
if len(ctx.Users) == 0 || len(ctx.Events) == 0 {
return
}
urls, _ := fs.extract(ctx.Users)
urls, tokens := fs.extract(ctx.Users)
message := BuildTplMessage(models.LarkCard, fs.tpl, ctx.Events)
color := "red"
lowerUnicode := strings.ToLower(message)
@@ -73,18 +74,19 @@ func (fs *LarkCardSender) Send(ctx MessageContext) {
}
SendTitle := fmt.Sprintf("🔔 %s", ctx.Events[0].RuleName)
body := createFeishuCardBody()
body.Card.Header.Title.Content = SendTitle
body.Card.Header.Template = color
body.Card.Elements[0].Text.Content = message
body.Card.Elements[2].Elements[0].Content = SendTitle
for _, url := range urls {
doSend(url, body, models.LarkCard, ctx.Stats)
for i, url := range urls {
doSendAndRecord(ctx.Ctx, url, tokens[i], body, models.LarkCard, ctx.Stats, ctx.Events)
}
}
func (fs *LarkCardSender) extract(users []*models.User) ([]string, []string) {
urls := make([]string, 0, len(users))
ats := make([]string, 0)
tokens := make([]string, 0)
for i := range users {
if token, has := users[i].ExtractToken(models.Lark); has {
url := token
@@ -92,7 +94,8 @@ func (fs *LarkCardSender) extract(users []*models.User) ([]string, []string) {
url = "https://open.larksuite.com/open-apis/bot/v2/hook/" + strings.TrimSpace(token)
}
urls = append(urls, url)
tokens = append(tokens, token)
}
}
return urls, ats
return urls, tokens
}

View File

@@ -7,6 +7,7 @@ import (
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/toolkits/pkg/logger"
)
@@ -38,11 +39,11 @@ func (ms *MmSender) Send(ctx MessageContext) {
}
message := BuildTplMessage(models.Mm, ms.tpl, ctx.Events)
SendMM(MatterMostMessage{
SendMM(ctx.Ctx, MatterMostMessage{
Text: message,
Tokens: urls,
Stats: ctx.Stats,
})
}, ctx.Events, models.Mm)
}
func (ms *MmSender) CallBack(ctx CallBackContext) {
@@ -51,13 +52,11 @@ func (ms *MmSender) CallBack(ctx CallBackContext) {
}
message := BuildTplMessage(models.Mm, ms.tpl, ctx.Events)
SendMM(MatterMostMessage{
SendMM(ctx.Ctx, MatterMostMessage{
Text: message,
Tokens: []string{ctx.CallBackURL},
Stats: ctx.Stats,
})
ctx.Stats.AlertNotifyTotal.WithLabelValues("rule_callback").Inc()
}, ctx.Events, "callback")
}
func (ms *MmSender) extract(users []*models.User) []string {
@@ -70,11 +69,12 @@ func (ms *MmSender) extract(users []*models.User) []string {
return tokens
}
func SendMM(message MatterMostMessage) {
func SendMM(ctx *ctx.Context, message MatterMostMessage, events []*models.AlertCurEvent, channel string) {
for i := 0; i < len(message.Tokens); i++ {
u, err := url.Parse(message.Tokens[i])
if err != nil {
logger.Errorf("mm_sender: failed to parse error=%v", err)
NotifyRecord(ctx, events, channel, message.Tokens[i], "", err)
continue
}
@@ -103,7 +103,7 @@ func SendMM(message MatterMostMessage) {
Username: username,
Text: txt + message.Text,
}
doSend(ur, body, models.Mm, message.Stats)
doSendAndRecord(ctx, ur, message.Tokens[i], body, channel, message.Stats, events)
}
}
}

View File

@@ -2,26 +2,30 @@ package sender
import (
"bytes"
"fmt"
"os"
"os/exec"
"time"
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/toolkits/pkg/file"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/sys"
)
func MayPluginNotify(noticeBytes []byte, notifyScript models.NotifyScript, stats *astats.Stats) {
func MayPluginNotify(ctx *ctx.Context, noticeBytes []byte, notifyScript models.NotifyScript,
stats *astats.Stats, event *models.AlertCurEvent) {
if len(noticeBytes) == 0 {
return
}
alertingCallScript(noticeBytes, notifyScript, stats)
alertingCallScript(ctx, noticeBytes, notifyScript, stats, event)
}
func alertingCallScript(stdinBytes []byte, notifyScript models.NotifyScript, stats *astats.Stats) {
func alertingCallScript(ctx *ctx.Context, stdinBytes []byte, notifyScript models.NotifyScript,
stats *astats.Stats, event *models.AlertCurEvent) {
// not enable or no notify.py? do nothing
config := notifyScript
if !config.Enable || config.Content == "" {
@@ -81,6 +85,7 @@ func alertingCallScript(stdinBytes []byte, notifyScript models.NotifyScript, sta
}
err, isTimeout := sys.WrapTimeout(cmd, time.Duration(config.Timeout)*time.Second)
NotifyRecord(ctx, []*models.AlertCurEvent{event}, channel, cmd.String(), "", buildErr(err, isTimeout))
if isTimeout {
if err == nil {
@@ -102,3 +107,11 @@ func alertingCallScript(stdinBytes []byte, notifyScript models.NotifyScript, sta
logger.Infof("event_script_notify_ok: exec %s output: %s", fpath, buf.String())
}
func buildErr(err error, isTimeout bool) error {
if err == nil && !isTimeout {
return nil
} else {
return fmt.Errorf("is_timeout: %v, err: %v", isTimeout, err)
}
}

View File

@@ -8,6 +8,7 @@ import (
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
)
type (
@@ -22,6 +23,7 @@ type (
Rule *models.AlertRule
Events []*models.AlertCurEvent
Stats *astats.Stats
Ctx *ctx.Context
}
)
@@ -49,13 +51,15 @@ func NewSender(key string, tpls map[string]*template.Template, smtp ...aconf.SMT
return nil
}
func BuildMessageContext(rule *models.AlertRule, events []*models.AlertCurEvent, uids []int64, userCache *memsto.UserCacheType, stats *astats.Stats) MessageContext {
func BuildMessageContext(ctx *ctx.Context, rule *models.AlertRule, events []*models.AlertCurEvent,
uids []int64, userCache *memsto.UserCacheType, stats *astats.Stats) MessageContext {
users := userCache.GetByUserIds(uids)
return MessageContext{
Rule: rule,
Events: events,
Users: users,
Stats: stats,
Ctx: ctx,
}
}

View File

@@ -1,11 +1,13 @@
package sender
import (
"errors"
"html/template"
"strings"
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/toolkits/pkg/logger"
)
@@ -35,13 +37,11 @@ func (ts *TelegramSender) CallBack(ctx CallBackContext) {
}
message := BuildTplMessage(models.Telegram, ts.tpl, ctx.Events)
SendTelegram(TelegramMessage{
SendTelegram(ctx.Ctx, TelegramMessage{
Text: message,
Tokens: []string{ctx.CallBackURL},
Stats: ctx.Stats,
})
ctx.Stats.AlertNotifyTotal.WithLabelValues("rule_callback").Inc()
}, ctx.Events, "callback")
}
func (ts *TelegramSender) Send(ctx MessageContext) {
@@ -51,11 +51,11 @@ func (ts *TelegramSender) Send(ctx MessageContext) {
tokens := ts.extract(ctx.Users)
message := BuildTplMessage(models.Telegram, ts.tpl, ctx.Events)
SendTelegram(TelegramMessage{
SendTelegram(ctx.Ctx, TelegramMessage{
Text: message,
Tokens: tokens,
Stats: ctx.Stats,
})
}, ctx.Events, models.Telegram)
}
func (ts *TelegramSender) extract(users []*models.User) []string {
@@ -68,10 +68,11 @@ func (ts *TelegramSender) extract(users []*models.User) []string {
return tokens
}
func SendTelegram(message TelegramMessage) {
func SendTelegram(ctx *ctx.Context, message TelegramMessage, events []*models.AlertCurEvent, channel string) {
for i := 0; i < len(message.Tokens); i++ {
if !strings.Contains(message.Tokens[i], "/") && !strings.HasPrefix(message.Tokens[i], "https://") {
logger.Errorf("telegram_sender: result=fail invalid token=%s", message.Tokens[i])
NotifyRecord(ctx, events, channel, message.Tokens[i], "", errors.New("invalid token"))
continue
}
var url string
@@ -92,6 +93,6 @@ func SendTelegram(message TelegramMessage) {
Text: message.Text,
}
doSend(url, body, models.Telegram, message.Stats)
doSendAndRecord(ctx, url, message.Tokens[i], body, channel, message.Stats, events)
}
}

View File

@@ -4,33 +4,41 @@ import (
"bytes"
"crypto/tls"
"encoding/json"
"fmt"
"io"
"net/http"
"sync"
"time"
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/toolkits/pkg/logger"
)
func sendWebhook(webhook *models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) bool {
func sendWebhook(webhook *models.Webhook, event interface{}, stats *astats.Stats) (bool, string, error) {
channel := "webhook"
if webhook.Type == models.RuleCallback {
channel = "callback"
}
conf := webhook
if conf.Url == "" || !conf.Enable {
return false
return false, "", nil
}
bs, err := json.Marshal(event)
if err != nil {
logger.Errorf("alertingWebhook failed to marshal event:%+v err:%v", event, err)
return false
logger.Errorf("%s alertingWebhook failed to marshal event:%+v err:%v", channel, event, err)
return false, "", err
}
bf := bytes.NewBuffer(bs)
req, err := http.NewRequest("POST", conf.Url, bf)
if err != nil {
logger.Warningf("alertingWebhook failed to new reques event:%+v err:%v", event, err)
return true
logger.Warningf("%s alertingWebhook failed to new reques event:%s err:%v", channel, string(bs), err)
return true, "", err
}
req.Header.Set("Content-Type", "application/json")
@@ -51,42 +59,48 @@ func sendWebhook(webhook *models.Webhook, event *models.AlertCurEvent, stats *as
if webhook != nil {
insecureSkipVerify = webhook.SkipVerify
}
client := http.Client{
Timeout: time.Duration(conf.Timeout) * time.Second,
Transport: &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: insecureSkipVerify},
},
if conf.Client == nil {
logger.Warningf("event_%s, event:%s, url: [%s], error: [%s]", channel, string(bs), conf.Url, "client is nil")
conf.Client = &http.Client{
Timeout: time.Duration(conf.Timeout) * time.Second,
Transport: &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: insecureSkipVerify},
},
}
}
stats.AlertNotifyTotal.WithLabelValues("webhook").Inc()
stats.AlertNotifyTotal.WithLabelValues(channel).Inc()
var resp *http.Response
resp, err = client.Do(req)
var body []byte
resp, err = conf.Client.Do(req)
if err != nil {
stats.AlertNotifyErrorTotal.WithLabelValues("webhook").Inc()
logger.Errorf("event_webhook_fail, ruleId: [%d], eventId: [%d], event:%+v, url: [%s], error: [%s]", event.RuleId, event.Id, event, conf.Url, err)
return true
stats.AlertNotifyErrorTotal.WithLabelValues(channel).Inc()
logger.Errorf("event_%s_fail, event:%s, url: [%s], error: [%s]", channel, string(bs), conf.Url, err)
return true, "", err
}
var body []byte
if resp.Body != nil {
defer resp.Body.Close()
body, _ = io.ReadAll(resp.Body)
}
if resp.StatusCode == 429 {
logger.Errorf("event_webhook_fail, url: %s, response code: %d, body: %s event:%+v", conf.Url, resp.StatusCode, string(body), event)
return true
logger.Errorf("event_%s_fail, url: %s, response code: %d, body: %s event:%s", channel, conf.Url, resp.StatusCode, string(body), string(bs))
return true, string(body), fmt.Errorf("status code is 429")
}
logger.Debugf("event_webhook_succ, url: %s, response code: %d, body: %s event:%+v", conf.Url, resp.StatusCode, string(body), event)
return false
logger.Debugf("event_%s_succ, url: %s, response code: %d, body: %s event:%s", channel, conf.Url, resp.StatusCode, string(body), string(bs))
return false, string(body), nil
}
func SendWebhooks(webhooks []*models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
func SingleSendWebhooks(ctx *ctx.Context, webhooks map[string]*models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
for _, conf := range webhooks {
retryCount := 0
for retryCount < 3 {
needRetry := sendWebhook(conf, event, stats)
needRetry, res, err := sendWebhook(conf, event, stats)
NotifyRecord(ctx, []*models.AlertCurEvent{event}, "webhook", conf.Url, res, err)
if !needRetry {
break
}
@@ -95,3 +109,74 @@ func SendWebhooks(webhooks []*models.Webhook, event *models.AlertCurEvent, stats
}
}
}
func BatchSendWebhooks(ctx *ctx.Context, webhooks map[string]*models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
for _, conf := range webhooks {
logger.Infof("push event:%+v to queue:%v", event, conf)
PushEvent(ctx, conf, event, stats)
}
}
var EventQueue = make(map[string]*WebhookQueue)
var CallbackEventQueue = make(map[string]*WebhookQueue)
var CallbackEventQueueLock sync.RWMutex
var EventQueueLock sync.RWMutex
const QueueMaxSize = 100000
type WebhookQueue struct {
eventQueue *SafeEventQueue
closeCh chan struct{}
}
func PushEvent(ctx *ctx.Context, webhook *models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
EventQueueLock.RLock()
queue := EventQueue[webhook.Url]
EventQueueLock.RUnlock()
if queue == nil {
queue = &WebhookQueue{
eventQueue: NewSafeEventQueue(QueueMaxSize),
closeCh: make(chan struct{}),
}
EventQueueLock.Lock()
EventQueue[webhook.Url] = queue
EventQueueLock.Unlock()
StartConsumer(ctx, queue, webhook.Batch, webhook, stats)
}
succ := queue.eventQueue.Push(event)
if !succ {
stats.AlertNotifyErrorTotal.WithLabelValues("push_event_queue").Inc()
logger.Warningf("Write channel(%s) full, current channel size: %d event:%v", webhook.Url, queue.eventQueue.Len(), event)
}
}
func StartConsumer(ctx *ctx.Context, queue *WebhookQueue, popSize int, webhook *models.Webhook, stats *astats.Stats) {
for {
select {
case <-queue.closeCh:
logger.Infof("event queue:%v closed", queue)
return
default:
events := queue.eventQueue.PopN(popSize)
if len(events) == 0 {
time.Sleep(time.Millisecond * 400)
continue
}
retryCount := 0
for retryCount < webhook.RetryCount {
needRetry, res, err := sendWebhook(webhook, events, stats)
go NotifyRecord(ctx, events, "webhook", webhook.Url, res, err)
if !needRetry {
break
}
retryCount++
time.Sleep(time.Second * time.Duration(webhook.RetryInterval) * time.Duration(retryCount))
}
}
}
}

View File

@@ -0,0 +1,109 @@
package sender
import (
"container/list"
"sync"
"github.com/ccfos/nightingale/v6/models"
)
type SafeEventQueue struct {
lock sync.RWMutex
maxSize int
queueHigh *list.List
queueMiddle *list.List
queueLow *list.List
}
const (
High = 1
Middle = 2
Low = 3
)
func NewSafeEventQueue(maxSize int) *SafeEventQueue {
return &SafeEventQueue{
maxSize: maxSize,
lock: sync.RWMutex{},
queueHigh: list.New(),
queueMiddle: list.New(),
queueLow: list.New(),
}
}
func (spq *SafeEventQueue) Len() int {
spq.lock.RLock()
defer spq.lock.RUnlock()
return spq.queueHigh.Len() + spq.queueMiddle.Len() + spq.queueLow.Len()
}
// len 无锁读取长度,不要在本文件外调用
func (spq *SafeEventQueue) len() int {
return spq.queueHigh.Len() + spq.queueMiddle.Len() + spq.queueLow.Len()
}
func (spq *SafeEventQueue) Push(event *models.AlertCurEvent) bool {
spq.lock.Lock()
defer spq.lock.Unlock()
for spq.len() > spq.maxSize {
return false
}
switch event.Severity {
case High:
spq.queueHigh.PushBack(event)
case Middle:
spq.queueMiddle.PushBack(event)
case Low:
spq.queueLow.PushBack(event)
default:
return false
}
return true
}
// pop 无锁弹出事件,不要在本文件外调用
func (spq *SafeEventQueue) pop() *models.AlertCurEvent {
if spq.len() == 0 {
return nil
}
var elem interface{}
if spq.queueHigh.Len() > 0 {
elem = spq.queueHigh.Remove(spq.queueHigh.Front())
} else if spq.queueMiddle.Len() > 0 {
elem = spq.queueMiddle.Remove(spq.queueMiddle.Front())
} else {
elem = spq.queueLow.Remove(spq.queueLow.Front())
}
event, ok := elem.(*models.AlertCurEvent)
if !ok {
return nil
}
return event
}
func (spq *SafeEventQueue) Pop() *models.AlertCurEvent {
spq.lock.Lock()
defer spq.lock.Unlock()
return spq.pop()
}
func (spq *SafeEventQueue) PopN(n int) []*models.AlertCurEvent {
spq.lock.Lock()
defer spq.lock.Unlock()
events := make([]*models.AlertCurEvent, 0, n)
count := 0
for count < n && spq.len() > 0 {
event := spq.pop()
if event != nil {
events = append(events, event)
}
count++
}
return events
}

View File

@@ -0,0 +1,157 @@
package sender
import (
"sync"
"testing"
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/stretchr/testify/assert"
)
func TestSafePriorityQueue_ConcurrentPushPop(t *testing.T) {
spq := NewSafeEventQueue(100000)
var wg sync.WaitGroup
numGoroutines := 100
numEvents := 1000
// 并发 Push
wg.Add(numGoroutines)
for i := 0; i < numGoroutines; i++ {
go func(goroutineID int) {
defer wg.Done()
for j := 0; j < numEvents; j++ {
event := &models.AlertCurEvent{
Severity: goroutineID%3 + 1,
TriggerTime: time.Now().UnixNano(),
}
spq.Push(event)
}
}(i)
}
wg.Wait()
// 检查队列长度是否正确
expectedLen := numGoroutines * numEvents
assert.Equal(t, expectedLen, spq.Len(), "Queue length mismatch after concurrent pushes")
// 并发 Pop
wg.Add(numGoroutines)
for i := 0; i < numGoroutines; i++ {
go func() {
defer wg.Done()
for {
event := spq.Pop()
if event == nil {
return
}
}
}()
}
wg.Wait()
// 最终队列应该为空
assert.Equal(t, 0, spq.Len(), "Queue should be empty after concurrent pops")
}
func TestSafePriorityQueue_ConcurrentPopMax(t *testing.T) {
spq := NewSafeEventQueue(100000)
// 添加初始数据
for i := 0; i < 1000; i++ {
spq.Push(&models.AlertCurEvent{
Severity: i%3 + 1,
TriggerTime: time.Now().UnixNano(),
})
}
var wg sync.WaitGroup
numGoroutines := 10
popMax := 100
// 并发 PopN
wg.Add(numGoroutines)
for i := 0; i < numGoroutines; i++ {
go func() {
defer wg.Done()
events := spq.PopN(popMax)
assert.LessOrEqual(t, len(events), popMax, "PopN exceeded maximum")
}()
}
wg.Wait()
// 检查队列长度是否正确
expectedRemaining := 1000 - (numGoroutines * popMax)
if expectedRemaining < 0 {
expectedRemaining = 0
}
assert.Equal(t, expectedRemaining, spq.Len(), "Queue length mismatch after concurrent PopN")
}
func TestSafePriorityQueue_ConcurrentPushPopWithDifferentSeverities(t *testing.T) {
spq := NewSafeEventQueue(100000)
var wg sync.WaitGroup
numGoroutines := 50
numEvents := 500
// 并发 Push 不同优先级的事件
wg.Add(numGoroutines)
for i := 0; i < numGoroutines; i++ {
go func(goroutineID int) {
defer wg.Done()
for j := 0; j < numEvents; j++ {
event := &models.AlertCurEvent{
Severity: goroutineID%3 + 1, // 模拟不同的 Severity
TriggerTime: time.Now().UnixNano(),
}
spq.Push(event)
}
}(i)
}
wg.Wait()
// 检查队列长度是否正确
expectedLen := numGoroutines * numEvents
assert.Equal(t, expectedLen, spq.Len(), "Queue length mismatch after concurrent pushes")
// 检查事件的顺序是否按照优先级排列
var lastEvent *models.AlertCurEvent
for spq.Len() > 0 {
event := spq.Pop()
if lastEvent != nil {
assert.LessOrEqual(t, lastEvent.Severity, event.Severity, "Events are not in correct priority order")
}
lastEvent = event
}
}
func TestSafePriorityQueue_ExceedMaxSize(t *testing.T) {
spq := NewSafeEventQueue(5)
// 插入超过最大容量的事件
for i := 0; i < 10; i++ {
spq.Push(&models.AlertCurEvent{
Severity: i % 3,
TriggerTime: int64(i),
})
}
// 验证队列的长度是否不超过 maxSize
assert.LessOrEqual(t, spq.Len(), spq.maxSize)
// 验证队列中剩余事件的内容
expectedEvents := 5
if spq.Len() < 5 {
expectedEvents = spq.Len()
}
// 检查最后存入的事件是否是按优先级排序
for i := 0; i < expectedEvents; i++ {
event := spq.Pop()
if event != nil {
assert.LessOrEqual(t, event.Severity, 2)
}
}
}

View File

@@ -0,0 +1,111 @@
package sender
import (
"container/list"
"sync"
"github.com/ccfos/nightingale/v6/models"
)
type SafeList struct {
sync.RWMutex
L *list.List
}
func NewSafeList() *SafeList {
return &SafeList{L: list.New()}
}
func (sl *SafeList) PushFront(v interface{}) *list.Element {
sl.Lock()
e := sl.L.PushFront(v)
sl.Unlock()
return e
}
func (sl *SafeList) PushFrontBatch(vs []interface{}) {
sl.Lock()
for _, item := range vs {
sl.L.PushFront(item)
}
sl.Unlock()
}
func (sl *SafeList) PopBack(max int) []*models.AlertCurEvent {
sl.Lock()
count := sl.L.Len()
if count == 0 {
sl.Unlock()
return []*models.AlertCurEvent{}
}
if count > max {
count = max
}
items := make([]*models.AlertCurEvent, 0, count)
for i := 0; i < count; i++ {
item := sl.L.Remove(sl.L.Back())
sample, ok := item.(*models.AlertCurEvent)
if ok {
items = append(items, sample)
}
}
sl.Unlock()
return items
}
func (sl *SafeList) RemoveAll() {
sl.Lock()
sl.L.Init()
sl.Unlock()
}
func (sl *SafeList) Len() int {
sl.RLock()
size := sl.L.Len()
sl.RUnlock()
return size
}
// SafeList with Limited Size
type SafeListLimited struct {
maxSize int
SL *SafeList
}
func NewSafeListLimited(maxSize int) *SafeListLimited {
return &SafeListLimited{SL: NewSafeList(), maxSize: maxSize}
}
func (sll *SafeListLimited) PopBack(max int) []*models.AlertCurEvent {
return sll.SL.PopBack(max)
}
func (sll *SafeListLimited) PushFront(v interface{}) bool {
if sll.SL.Len() >= sll.maxSize {
return false
}
sll.SL.PushFront(v)
return true
}
func (sll *SafeListLimited) PushFrontBatch(vs []interface{}) bool {
if sll.SL.Len() >= sll.maxSize {
return false
}
sll.SL.PushFrontBatch(vs)
return true
}
func (sll *SafeListLimited) RemoveAll() {
sll.SL.RemoveAll()
}
func (sll *SafeListLimited) Len() int {
return sll.SL.Len()
}

View File

@@ -37,29 +37,29 @@ func (ws *WecomSender) CallBack(ctx CallBackContext) {
},
}
doSend(ctx.CallBackURL, body, models.Wecom, ctx.Stats)
ctx.Stats.AlertNotifyTotal.WithLabelValues("rule_callback").Inc()
doSendAndRecord(ctx.Ctx, ctx.CallBackURL, ctx.CallBackURL, body, "callback", ctx.Stats, ctx.Events)
}
func (ws *WecomSender) Send(ctx MessageContext) {
if len(ctx.Users) == 0 || len(ctx.Events) == 0 {
return
}
urls := ws.extract(ctx.Users)
urls, tokens := ws.extract(ctx.Users)
message := BuildTplMessage(models.Wecom, ws.tpl, ctx.Events)
for _, url := range urls {
for i, url := range urls {
body := wecom{
Msgtype: "markdown",
Markdown: wecomMarkdown{
Content: message,
},
}
doSend(url, body, models.Wecom, ctx.Stats)
doSendAndRecord(ctx.Ctx, url, tokens[i], body, models.Wecom, ctx.Stats, ctx.Events)
}
}
func (ws *WecomSender) extract(users []*models.User) []string {
func (ws *WecomSender) extract(users []*models.User) ([]string, []string) {
urls := make([]string, 0, len(users))
tokens := make([]string, 0, len(users))
for _, user := range users {
if token, has := user.ExtractToken(models.Wecom); has {
url := token
@@ -67,7 +67,8 @@ func (ws *WecomSender) extract(users []*models.User) []string {
url = "https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=" + token
}
urls = append(urls, url)
tokens = append(tokens, token)
}
}
return urls
return urls, tokens
}

View File

@@ -13,6 +13,8 @@ type Center struct {
UseFileAssets bool
FlashDuty FlashDuty
EventHistoryGroupView bool
CleanNotifyRecordDay int
MigrateBusiGroupLabel bool
}
type Plugin struct {

View File

@@ -4,8 +4,11 @@ import (
"context"
"fmt"
"github.com/ccfos/nightingale/v6/dscache"
"github.com/ccfos/nightingale/v6/alert"
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/alert/dispatch"
"github.com/ccfos/nightingale/v6/alert/process"
alertrt "github.com/ccfos/nightingale/v6/alert/router"
"github.com/ccfos/nightingale/v6/center/cconf"
@@ -16,6 +19,7 @@ import (
centerrt "github.com/ccfos/nightingale/v6/center/router"
"github.com/ccfos/nightingale/v6/center/sso"
"github.com/ccfos/nightingale/v6/conf"
"github.com/ccfos/nightingale/v6/cron"
"github.com/ccfos/nightingale/v6/dumper"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
@@ -25,14 +29,13 @@ import (
"github.com/ccfos/nightingale/v6/pkg/httpx"
"github.com/ccfos/nightingale/v6/pkg/i18nx"
"github.com/ccfos/nightingale/v6/pkg/logx"
"github.com/ccfos/nightingale/v6/pkg/macros"
"github.com/ccfos/nightingale/v6/pkg/version"
"github.com/ccfos/nightingale/v6/prom"
"github.com/ccfos/nightingale/v6/pushgw/idents"
pushgwrt "github.com/ccfos/nightingale/v6/pushgw/router"
"github.com/ccfos/nightingale/v6/pushgw/writer"
"github.com/ccfos/nightingale/v6/storage"
"github.com/ccfos/nightingale/v6/tdengine"
"github.com/flashcatcloud/ibex/src/cmd/ibex"
)
@@ -47,6 +50,10 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
cconf.MergeOperationConf()
if config.Alert.Heartbeat.EngineName == "" {
config.Alert.Heartbeat.EngineName = "default"
}
logxClean, err := logx.Init(config.Log)
if err != nil {
return nil, err
@@ -62,7 +69,7 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
}
ctx := ctx.NewContext(context.Background(), db, true)
migrate.Migrate(db)
models.InitRoot(ctx)
isRootInit := models.InitRoot(ctx)
config.HTTP.JWTAuth.SigningKey = models.InitJWTSigningKey(ctx)
@@ -71,7 +78,7 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
return nil, err
}
integration.Init(ctx, config.Center.BuiltinIntegrationsDir)
go integration.Init(ctx, config.Center.BuiltinIntegrationsDir)
var redis storage.Redis
redis, err = storage.NewRedis(config.Redis)
if err != nil {
@@ -94,24 +101,38 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
userCache := memsto.NewUserCache(ctx, syncStats)
userGroupCache := memsto.NewUserGroupCache(ctx, syncStats)
taskTplCache := memsto.NewTaskTplCache(ctx)
configCvalCache := memsto.NewCvalCache(ctx, syncStats)
sso := sso.Init(config.Center, ctx, configCache)
promClients := prom.NewPromClient(ctx)
tdengineClients := tdengine.NewTdengineClient(ctx, config.Alert.Heartbeat)
dispatch.InitRegisterQueryFunc(promClients)
externalProcessors := process.NewExternalProcessors()
alert.Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache, alertRuleCache, notifyConfigCache, taskTplCache, dsCache, ctx, promClients, tdengineClients, userCache, userGroupCache)
macros.RegisterMacro(macros.MacroInVain)
dscache.Init(ctx, false)
alert.Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache, alertRuleCache, notifyConfigCache, taskTplCache, dsCache, ctx, promClients, userCache, userGroupCache)
writers := writer.NewWriters(config.Pushgw)
go version.GetGithubVersion()
go cron.CleanNotifyRecord(ctx, config.Center.CleanNotifyRecordDay)
alertrtRouter := alertrt.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors)
centerRouter := centerrt.New(config.HTTP, config.Center, config.Alert, config.Ibex, cconf.Operations, dsCache, notifyConfigCache, promClients, tdengineClients,
centerRouter := centerrt.New(config.HTTP, config.Center, config.Alert, config.Ibex,
cconf.Operations, dsCache, notifyConfigCache, promClients,
redis, sso, ctx, metas, idents, targetCache, userCache, userGroupCache)
pushgwRouter := pushgwrt.New(config.HTTP, config.Pushgw, config.Alert, targetCache, busiGroupCache, idents, metas, writers, ctx)
r := httpx.GinEngine(config.Global.RunMode, config.HTTP)
go func() {
if config.Center.MigrateBusiGroupLabel || models.CanMigrateBg(ctx) {
models.MigrateBg(ctx, pushgwRouter.Pushgw.BusiGroupLabelKey)
}
}()
r := httpx.GinEngine(config.Global.RunMode, config.HTTP, configCvalCache.PrintBodyPaths, configCvalCache.PrintAccessLog)
centerRouter.Config(r)
alertrtRouter.Config(r)
@@ -125,6 +146,11 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
httpClean := httpx.Init(config.HTTP, r)
fmt.Printf("please view n9e at http://%v:%v\n", config.Alert.Heartbeat.IP, config.HTTP.Port)
if isRootInit {
fmt.Println("username/password: root/root.2020")
}
return func() {
logxClean()
httpClean()

View File

@@ -16,6 +16,12 @@ import (
const SYSTEM = "system"
func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
err := models.InitBuiltinPayloads(ctx)
if err != nil {
logger.Warning("init old builtinPayloads fail ", err)
return
}
fp := builtinIntegrationsDir
if fp == "" {
fp = path.Join(runner.Cwd, "integrations")
@@ -92,6 +98,7 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
logger.Warning("update builtin component fail ", old, err)
}
}
component.ID = old.ID
}
// delete uuid is emtpy
@@ -106,6 +113,12 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
logger.Warning("delete builtin metrics fail ", err)
}
// 删除 uuid%1000 不为 0 uuid > 1000000000000000000 且 type 为 dashboard 的记录
err = models.DB(ctx).Exec("delete from builtin_payloads where uuid%1000 != 0 and uuid > 1000000000000000000 and type = 'dashboard' and updated_by = 'system'").Error
if err != nil {
logger.Warning("delete builtin payloads fail ", err)
}
// alerts
files, err = file.FilesUnder(componentDir + "/alerts")
if err == nil && len(files) > 0 {
@@ -141,13 +154,13 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
cate := strings.Replace(f, ".json", "", -1)
builtinAlert := models.BuiltinPayload{
Component: component.Ident,
Type: "alert",
Cate: cate,
Name: alert.Name,
Tags: alert.AppendTags,
Content: string(content),
UUID: alert.UUID,
ComponentID: component.ID,
Type: "alert",
Cate: cate,
Name: alert.Name,
Tags: alert.AppendTags,
Content: string(content),
UUID: alert.UUID,
}
old, err := models.BuiltinPayloadGet(ctx, "uuid = ?", alert.UUID)
@@ -165,6 +178,7 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
}
if old.UpdatedBy == SYSTEM {
old.ComponentID = component.ID
old.Content = string(content)
old.Name = alert.Name
old.Tags = alert.AppendTags
@@ -210,7 +224,8 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
}
if dashboard.UUID == 0 {
dashboard.UUID = time.Now().UnixNano()
time.Sleep(time.Microsecond)
dashboard.UUID = time.Now().UnixMicro()
// 补全文件中的 uuid
bs, err = json.MarshalIndent(dashboard, "", " ")
if err != nil {
@@ -231,13 +246,13 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
}
builtinDashboard := models.BuiltinPayload{
Component: component.Ident,
Type: "dashboard",
Cate: "",
Name: dashboard.Name,
Tags: dashboard.Tags,
Content: string(content),
UUID: dashboard.UUID,
ComponentID: component.ID,
Type: "dashboard",
Cate: "",
Name: dashboard.Name,
Tags: dashboard.Tags,
Content: string(content),
UUID: dashboard.UUID,
}
old, err := models.BuiltinPayloadGet(ctx, "uuid = ?", dashboard.UUID)
@@ -255,6 +270,7 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
}
if old.UpdatedBy == SYSTEM {
old.ComponentID = component.ID
old.Content = string(content)
old.Name = dashboard.Name
old.Tags = dashboard.Tags

View File

@@ -92,7 +92,6 @@ func (s *Set) updateMeta(items map[string]models.HostMeta) {
func (s *Set) updateTargets(m map[string]models.HostMeta) error {
if s.redis == nil {
logger.Warningf("redis is nil")
return nil
}

View File

@@ -16,6 +16,7 @@ import (
"github.com/ccfos/nightingale/v6/conf"
_ "github.com/ccfos/nightingale/v6/front/statik"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/aop"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/httpx"
@@ -23,7 +24,6 @@ import (
"github.com/ccfos/nightingale/v6/prom"
"github.com/ccfos/nightingale/v6/pushgw/idents"
"github.com/ccfos/nightingale/v6/storage"
"github.com/ccfos/nightingale/v6/tdengine"
"github.com/gin-gonic/gin"
"github.com/rakyll/statik/fs"
@@ -41,7 +41,6 @@ type Router struct {
DatasourceCache *memsto.DatasourceCacheType
NotifyConfigCache *memsto.NotifyConfigCacheType
PromClients *prom.PromClientMap
TdendgineClients *tdengine.TdengineClientMap
Redis storage.Redis
MetaSet *metas.Set
IdentSet *idents.Set
@@ -51,9 +50,14 @@ type Router struct {
UserGroupCache *memsto.UserGroupCacheType
Ctx *ctx.Context
HeartbeatHook HeartbeatHookFunc
TargetDeleteHook models.TargetDeleteHookFunc
}
func New(httpConfig httpx.Config, center cconf.Center, alert aconf.Alert, ibex conf.Ibex, operations cconf.Operation, ds *memsto.DatasourceCacheType, ncc *memsto.NotifyConfigCacheType, pc *prom.PromClientMap, tdendgineClients *tdengine.TdengineClientMap, redis storage.Redis, sso *sso.SsoClient, ctx *ctx.Context, metaSet *metas.Set, idents *idents.Set, tc *memsto.TargetCacheType, uc *memsto.UserCacheType, ugc *memsto.UserGroupCacheType) *Router {
func New(httpConfig httpx.Config, center cconf.Center, alert aconf.Alert, ibex conf.Ibex,
operations cconf.Operation, ds *memsto.DatasourceCacheType, ncc *memsto.NotifyConfigCacheType,
pc *prom.PromClientMap, redis storage.Redis,
sso *sso.SsoClient, ctx *ctx.Context, metaSet *metas.Set, idents *idents.Set,
tc *memsto.TargetCacheType, uc *memsto.UserCacheType, ugc *memsto.UserGroupCacheType) *Router {
return &Router{
HTTP: httpConfig,
Center: center,
@@ -63,7 +67,6 @@ func New(httpConfig httpx.Config, center cconf.Center, alert aconf.Alert, ibex c
DatasourceCache: ds,
NotifyConfigCache: ncc,
PromClients: pc,
TdendgineClients: tdendgineClients,
Redis: redis,
MetaSet: metaSet,
IdentSet: idents,
@@ -73,9 +76,14 @@ func New(httpConfig httpx.Config, center cconf.Center, alert aconf.Alert, ibex c
UserGroupCache: ugc,
Ctx: ctx,
HeartbeatHook: func(ident string) map[string]interface{} { return nil },
TargetDeleteHook: emptyDeleteHook,
}
}
func emptyDeleteHook(ctx *ctx.Context, idents []string) error {
return nil
}
func stat() gin.HandlerFunc {
return func(c *gin.Context) {
start := time.Now()
@@ -173,26 +181,53 @@ func (rt *Router) Config(r *gin.Engine) {
pages.POST("/query-range-batch", rt.promBatchQueryRange)
pages.POST("/query-instant-batch", rt.promBatchQueryInstant)
pages.GET("/datasource/brief", rt.datasourceBriefs)
pages.POST("/datasource/query", rt.datasourceQuery)
pages.POST("/ds-query", rt.QueryData)
pages.POST("/logs-query", rt.QueryLog)
pages.POST("/logs-query", rt.QueryLogV2)
pages.POST("/tdengine-databases", rt.tdengineDatabases)
pages.POST("/tdengine-tables", rt.tdengineTables)
pages.POST("/tdengine-columns", rt.tdengineColumns)
pages.POST("/log-query-batch", rt.QueryLogBatch)
// 数据库元数据接口
pages.POST("/db-databases", rt.ShowDatabases)
pages.POST("/db-tables", rt.ShowTables)
pages.POST("/db-desc-table", rt.DescribeTable)
// es 专用接口
pages.POST("/indices", rt.auth(), rt.user(), rt.QueryIndices)
pages.POST("/es-variable", rt.auth(), rt.user(), rt.QueryESVariable)
pages.POST("/fields", rt.auth(), rt.user(), rt.QueryFields)
pages.POST("/log-query", rt.auth(), rt.user(), rt.QueryLog)
} else {
pages.Any("/proxy/:id/*url", rt.auth(), rt.dsProxy)
pages.POST("/query-range-batch", rt.auth(), rt.promBatchQueryRange)
pages.POST("/query-instant-batch", rt.auth(), rt.promBatchQueryInstant)
pages.GET("/datasource/brief", rt.auth(), rt.user(), rt.datasourceBriefs)
pages.POST("/datasource/query", rt.auth(), rt.user(), rt.datasourceQuery)
pages.POST("/ds-query", rt.auth(), rt.QueryData)
pages.POST("/logs-query", rt.auth(), rt.QueryLog)
pages.POST("/logs-query", rt.auth(), rt.QueryLogV2)
pages.POST("/tdengine-databases", rt.auth(), rt.tdengineDatabases)
pages.POST("/tdengine-tables", rt.auth(), rt.tdengineTables)
pages.POST("/tdengine-columns", rt.auth(), rt.tdengineColumns)
pages.POST("/log-query-batch", rt.auth(), rt.user(), rt.QueryLogBatch)
// 数据库元数据接口
pages.POST("/db-databases", rt.auth(), rt.user(), rt.ShowDatabases)
pages.POST("/db-tables", rt.auth(), rt.user(), rt.ShowTables)
pages.POST("/db-desc-table", rt.auth(), rt.user(), rt.DescribeTable)
// es 专用接口
pages.POST("/indices", rt.auth(), rt.user(), rt.QueryIndices)
pages.POST("/es-variable", rt.QueryESVariable)
pages.POST("/fields", rt.QueryFields)
pages.POST("/log-query", rt.QueryLog)
}
pages.GET("/sql-template", rt.QuerySqlTemplate)
@@ -267,6 +302,7 @@ func (rt *Router) Config(r *gin.Engine) {
pages.DELETE("/busi-group/:id/members", rt.auth(), rt.user(), rt.perm("/busi-groups/put"), rt.bgrw(), rt.busiGroupMemberDel)
pages.DELETE("/busi-group/:id", rt.auth(), rt.user(), rt.perm("/busi-groups/del"), rt.bgrw(), rt.busiGroupDel)
pages.GET("/busi-group/:id/perm/:perm", rt.auth(), rt.user(), rt.checkBusiGroupPerm)
pages.GET("/busi-groups/tags", rt.auth(), rt.user(), rt.busiGroupsGetTags)
pages.GET("/targets", rt.auth(), rt.user(), rt.targetGets)
pages.GET("/target/extra-meta", rt.auth(), rt.user(), rt.targetExtendInfoByIdent)
@@ -276,7 +312,7 @@ func (rt *Router) Config(r *gin.Engine) {
pages.POST("/targets/tags", rt.auth(), rt.user(), rt.perm("/targets/put"), rt.targetBindTagsByFE)
pages.DELETE("/targets/tags", rt.auth(), rt.user(), rt.perm("/targets/put"), rt.targetUnbindTagsByFE)
pages.PUT("/targets/note", rt.auth(), rt.user(), rt.perm("/targets/put"), rt.targetUpdateNote)
pages.PUT("/targets/bgid", rt.auth(), rt.user(), rt.perm("/targets/put"), rt.targetUpdateBgid)
pages.PUT("/targets/bgids", rt.auth(), rt.user(), rt.perm("/targets/put"), rt.targetBindBgids)
pages.POST("/builtin-cate-favorite", rt.auth(), rt.user(), rt.builtinCateFavoriteAdd)
pages.DELETE("/builtin-cate-favorite/:name", rt.auth(), rt.user(), rt.builtinCateFavoriteDel)
@@ -297,6 +333,7 @@ func (rt *Router) Config(r *gin.Engine) {
pages.POST("/busi-group/:id/board/:bid/clone", rt.auth(), rt.user(), rt.perm("/dashboards/add"), rt.bgrw(), rt.boardClone)
pages.POST("/busi-groups/boards/clones", rt.auth(), rt.user(), rt.perm("/dashboards/add"), rt.boardBatchClone)
pages.GET("/boards", rt.auth(), rt.user(), rt.boardGetsByBids)
pages.GET("/board/:bid", rt.boardGet)
pages.GET("/board/:bid/pure", rt.boardPureGet)
pages.PUT("/board/:bid", rt.auth(), rt.user(), rt.perm("/dashboards/put"), rt.boardPut)
@@ -307,6 +344,11 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/share-charts", rt.chartShareGets)
pages.POST("/share-charts", rt.auth(), rt.chartShareAdd)
pages.POST("/dashboard-annotations", rt.auth(), rt.user(), rt.perm("/dashboards/put"), rt.dashAnnotationAdd)
pages.GET("/dashboard-annotations", rt.dashAnnotationGets)
pages.PUT("/dashboard-annotation/:id", rt.auth(), rt.user(), rt.perm("/dashboards/put"), rt.dashAnnotationPut)
pages.DELETE("/dashboard-annotation/:id", rt.auth(), rt.user(), rt.perm("/dashboards/del"), rt.dashAnnotationDel)
// pages.GET("/alert-rules/builtin/alerts-cates", rt.auth(), rt.user(), rt.builtinAlertCateGets)
// pages.GET("/alert-rules/builtin/list", rt.auth(), rt.user(), rt.builtinAlertRules)
pages.GET("/alert-rules/callbacks", rt.auth(), rt.user(), rt.alertRuleCallbacks)
@@ -315,6 +357,8 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/busi-group/:id/alert-rules", rt.auth(), rt.user(), rt.perm("/alert-rules"), rt.alertRuleGets)
pages.POST("/busi-group/:id/alert-rules", rt.auth(), rt.user(), rt.perm("/alert-rules/add"), rt.bgrw(), rt.alertRuleAddByFE)
pages.POST("/busi-group/:id/alert-rules/import", rt.auth(), rt.user(), rt.perm("/alert-rules/add"), rt.bgrw(), rt.alertRuleAddByImport)
pages.POST("/busi-group/:id/alert-rules/import-prom-rule", rt.auth(),
rt.user(), rt.perm("/alert-rules/add"), rt.bgrw(), rt.alertRuleAddByImportPromRule)
pages.DELETE("/busi-group/:id/alert-rules", rt.auth(), rt.user(), rt.perm("/alert-rules/del"), rt.bgrw(), rt.alertRuleDel)
pages.PUT("/busi-group/:id/alert-rules/fields", rt.auth(), rt.user(), rt.perm("/alert-rules/put"), rt.bgrw(), rt.alertRulePutFields)
pages.PUT("/busi-group/:id/alert-rule/:arid", rt.auth(), rt.user(), rt.perm("/alert-rules/put"), rt.alertRulePutByFE)
@@ -322,6 +366,7 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/alert-rule/:arid/pure", rt.auth(), rt.user(), rt.perm("/alert-rules"), rt.alertRulePureGet)
pages.PUT("/busi-group/alert-rule/validate", rt.auth(), rt.user(), rt.perm("/alert-rules/put"), rt.alertRuleValidation)
pages.POST("/relabel-test", rt.auth(), rt.user(), rt.relabelTest)
pages.POST("/busi-group/:id/alert-rules/clone", rt.auth(), rt.user(), rt.perm("/alert-rules/add"), rt.bgrw(), rt.cloneToMachine)
pages.GET("/busi-groups/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules"), rt.recordingRuleGetsByGids)
pages.GET("/busi-group/:id/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules"), rt.recordingRuleGets)
@@ -350,9 +395,11 @@ func (rt *Router) Config(r *gin.Engine) {
if rt.Center.AnonymousAccess.AlertDetail {
pages.GET("/alert-cur-event/:eid", rt.alertCurEventGet)
pages.GET("/alert-his-event/:eid", rt.alertHisEventGet)
pages.GET("/event-notify-records/:eid", rt.notificationRecordList)
} else {
pages.GET("/alert-cur-event/:eid", rt.auth(), rt.user(), rt.alertCurEventGet)
pages.GET("/alert-his-event/:eid", rt.auth(), rt.user(), rt.alertHisEventGet)
pages.GET("/event-notify-records/:eid", rt.auth(), rt.user(), rt.notificationRecordList)
}
// card logic
@@ -458,6 +505,8 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/builtin-payload/:id", rt.auth(), rt.user(), rt.perm("/built-in-components"), rt.builtinPayloadGet)
pages.PUT("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/built-in-components/put"), rt.builtinPayloadsPut)
pages.DELETE("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/built-in-components/del"), rt.builtinPayloadsDel)
pages.GET("/builtin-payload", rt.auth(), rt.user(), rt.builtinPayloadsGetByUUIDOrID)
}
r.GET("/api/n9e/versions", func(c *gin.Context) {
@@ -500,6 +549,8 @@ func (rt *Router) Config(r *gin.Engine) {
service.PUT("/targets/note", rt.targetUpdateNoteByService)
service.PUT("/targets/bgid", rt.targetUpdateBgidByService)
service.POST("/targets-of-host-query", rt.targetsOfHostQuery)
service.POST("/alert-rules", rt.alertRuleAddByService)
service.POST("/alert-rule-add", rt.alertRuleAddOneByService)
service.DELETE("/alert-rules", rt.alertRuleDelByService)
@@ -534,6 +585,7 @@ func (rt *Router) Config(r *gin.Engine) {
service.GET("/config/:id", rt.configGet)
service.GET("/configs", rt.configsGet)
service.GET("/config", rt.configGetByKey)
service.GET("/all-configs", rt.configGetAll)
service.PUT("/configs", rt.configsPut)
service.POST("/configs", rt.configsPost)
service.DELETE("/configs", rt.configsDel)
@@ -551,6 +603,11 @@ func (rt *Router) Config(r *gin.Engine) {
service.GET("/targets-of-alert-rule", rt.targetsOfAlertRule)
service.POST("/notify-record", rt.notificationRecordAdd)
service.GET("/alert-cur-events-del-by-hash", rt.alertCurEventDelByHash)
service.POST("/center/heartbeat", rt.heartbeat)
}
}

View File

@@ -65,7 +65,8 @@ func (rt *Router) alertCurEventsCard(c *gin.Context) {
ginx.Dangerous(err)
// 最多获取50000个获取太多也没啥意义
list, err := models.AlertCurEventGets(rt.Ctx, prods, bgids, stime, etime, severity, dsIds, cates, query, 50000, 0)
list, err := models.AlertCurEventsGet(rt.Ctx, prods, bgids, stime, etime, severity, dsIds,
cates, 0, query, 50000, 0)
ginx.Dangerous(err)
cardmap := make(map[string]*AlertCard)
@@ -162,13 +163,17 @@ func (rt *Router) alertCurEventsList(c *gin.Context) {
cates = strings.Split(cate, ",")
}
ruleId := ginx.QueryInt64(c, "rid", 0)
bgids, err := GetBusinessGroupIds(c, rt.Ctx, rt.Center.EventHistoryGroupView)
ginx.Dangerous(err)
total, err := models.AlertCurEventTotal(rt.Ctx, prods, bgids, stime, etime, severity, dsIds, cates, query)
total, err := models.AlertCurEventTotal(rt.Ctx, prods, bgids, stime, etime, severity, dsIds,
cates, ruleId, query)
ginx.Dangerous(err)
list, err := models.AlertCurEventGets(rt.Ctx, prods, bgids, stime, etime, severity, dsIds, cates, query, limit, ginx.Offset(c, limit))
list, err := models.AlertCurEventsGet(rt.Ctx, prods, bgids, stime, etime, severity, dsIds,
cates, ruleId, query, limit, ginx.Offset(c, limit))
ginx.Dangerous(err)
cache := make(map[int64]*models.UserGroup)
@@ -201,7 +206,9 @@ func (rt *Router) checkCurEventBusiGroupRWPermission(c *gin.Context, ids []int64
for i := 0; i < len(ids); i++ {
event, err := models.AlertCurEventGetById(rt.Ctx, ids[i])
ginx.Dangerous(err)
if event == nil {
continue
}
if _, has := set[event.GroupId]; !has {
rt.bgrwCheck(c, event.GroupId)
set[event.GroupId] = struct{}{}
@@ -222,6 +229,12 @@ func (rt *Router) alertCurEventGet(c *gin.Context) {
rt.bgroCheck(c, event.GroupId)
}
ruleConfig, needReset := models.FillRuleConfigTplName(rt.Ctx, event.RuleConfig)
if needReset {
event.RuleConfigJson = ruleConfig
}
event.LastEvalTime = event.TriggerTime
ginx.NewRender(c).Data(event, nil)
}
@@ -229,3 +242,8 @@ func (rt *Router) alertCurEventsStatistics(c *gin.Context) {
ginx.NewRender(c).Data(models.AlertCurEventStatistics(rt.Ctx, time.Now()), nil)
}
func (rt *Router) alertCurEventDelByHash(c *gin.Context) {
hash := ginx.QueryStr(c, "hash")
ginx.NewRender(c).Message(models.AlertCurEventDelByHash(rt.Ctx, hash))
}

View File

@@ -54,13 +54,17 @@ func (rt *Router) alertHisEventsList(c *gin.Context) {
cates = strings.Split(cate, ",")
}
ruleId := ginx.QueryInt64(c, "rid", 0)
bgids, err := GetBusinessGroupIds(c, rt.Ctx, rt.Center.EventHistoryGroupView)
ginx.Dangerous(err)
total, err := models.AlertHisEventTotal(rt.Ctx, prods, bgids, stime, etime, severity, recovered, dsIds, cates, query)
total, err := models.AlertHisEventTotal(rt.Ctx, prods, bgids, stime, etime, severity,
recovered, dsIds, cates, ruleId, query)
ginx.Dangerous(err)
list, err := models.AlertHisEventGets(rt.Ctx, prods, bgids, stime, etime, severity, recovered, dsIds, cates, query, limit, ginx.Offset(c, limit))
list, err := models.AlertHisEventGets(rt.Ctx, prods, bgids, stime, etime, severity, recovered,
dsIds, cates, ruleId, query, limit, ginx.Offset(c, limit))
ginx.Dangerous(err)
cache := make(map[int64]*models.UserGroup)
@@ -87,6 +91,11 @@ func (rt *Router) alertHisEventGet(c *gin.Context) {
rt.bgroCheck(c, event.GroupId)
}
ruleConfig, needReset := models.FillRuleConfigTplName(rt.Ctx, event.RuleConfig)
if needReset {
event.RuleConfigJson = ruleConfig
}
ginx.NewRender(c).Data(event, err)
}

View File

@@ -1,17 +1,22 @@
package router
import (
"encoding/json"
"fmt"
"net/http"
"regexp"
"strconv"
"strings"
"time"
"gopkg.in/yaml.v2"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pushgw/pconf"
"github.com/ccfos/nightingale/v6/pushgw/writer"
"github.com/gin-gonic/gin"
"github.com/jinzhu/copier"
"github.com/prometheus/prometheus/prompb"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
@@ -32,6 +37,18 @@ func (rt *Router) alertRuleGets(c *gin.Context) {
ginx.NewRender(c).Data(ars, err)
}
func getAlertCueEventTimeRange(c *gin.Context) (stime, etime int64) {
stime = ginx.QueryInt64(c, "stime", 0)
etime = ginx.QueryInt64(c, "etime", 0)
if etime == 0 {
etime = time.Now().Unix()
}
if stime == 0 || stime >= etime {
stime = etime - 30*24*int64(time.Hour.Seconds())
}
return
}
func (rt *Router) alertRuleGetsByGids(c *gin.Context) {
gids := str.IdsInt64(ginx.QueryStr(c, "gids", ""), ",")
if len(gids) > 0 {
@@ -55,9 +72,35 @@ func (rt *Router) alertRuleGetsByGids(c *gin.Context) {
ars, err := models.AlertRuleGetsByBGIds(rt.Ctx, gids)
if err == nil {
cache := make(map[int64]*models.UserGroup)
rids := make([]int64, 0, len(ars))
names := make([]string, 0, len(ars))
for i := 0; i < len(ars); i++ {
ars[i].FillNotifyGroups(rt.Ctx, cache)
ars[i].FillSeverities()
if len(ars[i].DatasourceQueries) != 0 {
ars[i].DatasourceIdsJson = rt.DatasourceCache.GetIDsByDsCateAndQueries(ars[i].Cate, ars[i].DatasourceQueries)
}
rids = append(rids, ars[i].Id)
names = append(names, ars[i].UpdateBy)
}
stime, etime := getAlertCueEventTimeRange(c)
cnt := models.AlertCurEventCountByRuleId(rt.Ctx, rids, stime, etime)
if cnt != nil {
for i := 0; i < len(ars); i++ {
ars[i].CurEventCount = cnt[ars[i].Id]
}
}
users := models.UserMapGet(rt.Ctx, "username in (?)", names)
if users != nil {
for i := 0; i < len(ars); i++ {
if user, exist := users[ars[i].UpdateBy]; exist {
ars[i].UpdateByNickname = user.Nickname
}
}
}
}
ginx.NewRender(c).Data(ars, err)
@@ -85,6 +128,10 @@ func (rt *Router) alertRulesGetByService(c *gin.Context) {
cache := make(map[int64]*models.UserGroup)
for i := 0; i < len(ars); i++ {
ars[i].FillNotifyGroups(rt.Ctx, cache)
if len(ars[i].DatasourceQueries) != 0 {
ars[i].DatasourceIdsJson = rt.DatasourceCache.GetIDsByDsCateAndQueries(ars[i].Cate, ars[i].DatasourceQueries)
}
}
}
ginx.NewRender(c).Data(ars, err)
@@ -119,12 +166,48 @@ func (rt *Router) alertRuleAddByImport(c *gin.Context) {
ginx.Bomb(http.StatusBadRequest, "input json is empty")
}
for i := range lst {
if len(lst[i].DatasourceQueries) == 0 {
lst[i].DatasourceQueries = []models.DatasourceQuery{
models.DataSourceQueryAll,
}
}
}
bgid := ginx.UrlParamInt64(c, "id")
reterr := rt.alertRuleAdd(lst, username, bgid, c.GetHeader("X-Language"))
ginx.NewRender(c).Data(reterr, nil)
}
type promRuleForm struct {
Payload string `json:"payload" binding:"required"`
DatasourceQueries []models.DatasourceQuery `json:"datasource_queries" binding:"required"`
Disabled int `json:"disabled" binding:"gte=0,lte=1"`
}
func (rt *Router) alertRuleAddByImportPromRule(c *gin.Context) {
var f promRuleForm
ginx.Dangerous(c.BindJSON(&f))
var pr struct {
Groups []models.PromRuleGroup `yaml:"groups"`
}
err := yaml.Unmarshal([]byte(f.Payload), &pr)
if err != nil {
ginx.Bomb(http.StatusBadRequest, "invalid yaml format, please use the example format. err: %v", err)
}
if len(pr.Groups) == 0 {
ginx.Bomb(http.StatusBadRequest, "input yaml is empty")
}
lst := models.DealPromGroup(pr.Groups, f.DatasourceQueries, f.Disabled)
username := c.MustGet("username").(string)
bgid := ginx.UrlParamInt64(c, "id")
ginx.NewRender(c).Data(rt.alertRuleAdd(lst, username, bgid, c.GetHeader("X-Language")), nil)
}
func (rt *Router) alertRuleAddByService(c *gin.Context) {
var lst []models.AlertRule
ginx.BindJSON(c, &lst)
@@ -275,6 +358,43 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
continue
}
if f.Action == "update_triggers" {
if triggers, has := f.Fields["triggers"]; has {
originRule := ar.RuleConfigJson.(map[string]interface{})
originRule["triggers"] = triggers
b, err := json.Marshal(originRule)
ginx.Dangerous(err)
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"rule_config": string(b)}))
continue
}
}
if f.Action == "annotations_add" {
if annotations, has := f.Fields["annotations"]; has {
annotationsMap := annotations.(map[string]interface{})
for k, v := range annotationsMap {
ar.AnnotationsJSON[k] = v.(string)
}
b, err := json.Marshal(ar.AnnotationsJSON)
ginx.Dangerous(err)
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"annotations": string(b)}))
continue
}
}
if f.Action == "annotations_del" {
if annotations, has := f.Fields["annotations"]; has {
annotationsKeys := annotations.(map[string]interface{})
for key := range annotationsKeys {
delete(ar.AnnotationsJSON, key)
}
b, err := json.Marshal(ar.AnnotationsJSON)
ginx.Dangerous(err)
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"annotations": string(b)}))
continue
}
}
if f.Action == "callback_add" {
// 增加一个 callback 地址
if callbacks, has := f.Fields["callbacks"]; has {
@@ -295,6 +415,16 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
}
}
if f.Action == "datasource_change" {
// 修改数据源
if datasourceQueries, has := f.Fields["datasource_queries"]; has {
bytes, err := json.Marshal(datasourceQueries)
ginx.Dangerous(err)
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"datasource_queries": bytes}))
continue
}
}
for k, v := range f.Fields {
ginx.Dangerous(ar.UpdateColumn(rt.Ctx, k, v))
}
@@ -314,6 +444,10 @@ func (rt *Router) alertRuleGet(c *gin.Context) {
return
}
if len(ar.DatasourceQueries) != 0 {
ar.DatasourceIdsJson = rt.DatasourceCache.GetIDsByDsCateAndQueries(ar.Cate, ar.DatasourceQueries)
}
err = ar.FillNotifyGroups(rt.Ctx, make(map[int64]*models.UserGroup))
ginx.Dangerous(err)
@@ -422,7 +556,7 @@ func (rt *Router) relabelTest(c *gin.Context) {
labels := make([]prompb.Label, len(f.Tags))
for i, tag := range f.Tags {
label := strings.Split(tag, "=")
label := strings.SplitN(tag, "=", 2)
if len(label) != 2 {
ginx.Bomb(http.StatusBadRequest, "tag:%s format error", tag)
}
@@ -453,3 +587,91 @@ func (rt *Router) relabelTest(c *gin.Context) {
ginx.NewRender(c).Data(tags, nil)
}
type identListForm struct {
Ids []int64 `json:"ids"`
IdentList []string `json:"ident_list"`
}
func containsIdentOperator(s string) bool {
pattern := `ident\s*(!=|!~|=~)`
matched, err := regexp.MatchString(pattern, s)
if err != nil {
return false
}
return matched
}
func (rt *Router) cloneToMachine(c *gin.Context) {
var f identListForm
ginx.BindJSON(c, &f)
if len(f.IdentList) == 0 {
ginx.Bomb(http.StatusBadRequest, "ident_list is empty")
}
alertRules, err := models.AlertRuleGetsByIds(rt.Ctx, f.Ids)
ginx.Dangerous(err)
re := regexp.MustCompile(`ident\s*=\s*\\".*?\\"`)
user := c.MustGet("username").(string)
now := time.Now().Unix()
newRules := make([]*models.AlertRule, 0)
reterr := make(map[string]map[string]string)
for i := range alertRules {
errMsg := make(map[string]string)
if alertRules[i].Cate != "prometheus" {
errMsg["all"] = "Only Prometheus rule can be cloned to machines"
reterr[alertRules[i].Name] = errMsg
continue
}
if containsIdentOperator(alertRules[i].RuleConfig) {
errMsg["all"] = "promql is missing ident"
reterr[alertRules[i].Name] = errMsg
continue
}
for j := range f.IdentList {
alertRules[i].RuleConfig = re.ReplaceAllString(alertRules[i].RuleConfig, fmt.Sprintf(`ident=\"%s\"`, f.IdentList[j]))
newRule := &models.AlertRule{}
if err := copier.Copy(newRule, alertRules[i]); err != nil {
errMsg[f.IdentList[j]] = fmt.Sprintf("fail to clone rule, err: %s", err)
continue
}
newRule.Id = 0
newRule.Name = alertRules[i].Name + "_" + f.IdentList[j]
newRule.CreateBy = user
newRule.UpdateBy = user
newRule.UpdateAt = now
newRule.CreateAt = now
newRule.RuleConfig = alertRules[i].RuleConfig
exist, err := models.AlertRuleExists(rt.Ctx, 0, newRule.GroupId, newRule.Name)
if err != nil {
errMsg[f.IdentList[j]] = err.Error()
continue
}
if exist {
errMsg[f.IdentList[j]] = fmt.Sprintf("rule already exists, ruleName: %s", newRule.Name)
continue
}
newRules = append(newRules, newRule)
}
if len(errMsg) > 0 {
reterr[alertRules[i].Name] = errMsg
}
}
ginx.NewRender(c).Data(reterr, models.InsertAlertRule(rt.Ctx, newRules))
}

View File

@@ -94,6 +94,14 @@ func (rt *Router) boardGet(c *gin.Context) {
ginx.NewRender(c).Data(board, nil)
}
// 根据 bids 参数,获取多个 board
func (rt *Router) boardGetsByBids(c *gin.Context) {
bids := str.IdsInt64(ginx.QueryStr(c, "bids", ""), ",")
boards, err := models.BoardGetsByBids(rt.Ctx, bids)
ginx.Dangerous(err)
ginx.NewRender(c).Data(boards, err)
}
func (rt *Router) boardPureGet(c *gin.Context) {
board, err := models.BoardGetByID(rt.Ctx, ginx.UrlParamInt64(c, "bid"))
ginx.Dangerous(err)

View File

@@ -4,10 +4,15 @@ import (
"net/http"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"gorm.io/gorm"
)
const SYSTEM = "system"
func (rt *Router) builtinComponentsAdd(c *gin.Context) {
var lst []models.BuiltinComponent
ginx.BindJSON(c, &lst)
@@ -31,8 +36,9 @@ func (rt *Router) builtinComponentsAdd(c *gin.Context) {
func (rt *Router) builtinComponentsGets(c *gin.Context) {
query := ginx.QueryStr(c, "query", "")
disabled := ginx.QueryInt(c, "disabled", -1)
bc, err := models.BuiltinComponentGets(rt.Ctx, query)
bc, err := models.BuiltinComponentGets(rt.Ctx, query, disabled)
ginx.Dangerous(err)
ginx.NewRender(c).Data(bc, nil)
@@ -50,10 +56,31 @@ func (rt *Router) builtinComponentsPut(c *gin.Context) {
return
}
if bc.CreatedBy == SYSTEM {
req.Ident = bc.Ident
}
username := Username(c)
req.UpdatedBy = username
ginx.NewRender(c).Message(bc.Update(rt.Ctx, req))
err = models.DB(rt.Ctx).Transaction(func(tx *gorm.DB) error {
tCtx := &ctx.Context{
DB: tx,
}
txErr := models.BuiltinMetricBatchUpdateColumn(tCtx, "typ", bc.Ident, req.Ident, req.UpdatedBy)
if txErr != nil {
return txErr
}
txErr = bc.Update(tCtx, req)
if txErr != nil {
return txErr
}
return nil
})
ginx.NewRender(c).Message(err)
}
func (rt *Router) builtinComponentsDel(c *gin.Context) {

View File

@@ -102,9 +102,29 @@ func (rt *Router) builtinMetricsDefaultTypes(c *gin.Context) {
func (rt *Router) builtinMetricsTypes(c *gin.Context) {
collector := ginx.QueryStr(c, "collector", "")
query := ginx.QueryStr(c, "query", "")
disabled := ginx.QueryInt(c, "disabled", -1)
lang := c.GetHeader("X-Language")
ginx.NewRender(c).Data(models.BuiltinMetricTypes(rt.Ctx, lang, collector, query))
metricTypeList, err := models.BuiltinMetricTypes(rt.Ctx, lang, collector, query)
ginx.Dangerous(err)
componentList, err := models.BuiltinComponentGets(rt.Ctx, "", disabled)
ginx.Dangerous(err)
// 创建一个 map 来存储 componentList 中的类型
componentTypes := make(map[string]struct{})
for _, comp := range componentList {
componentTypes[comp.Ident] = struct{}{}
}
filteredMetricTypeList := make([]string, 0)
for _, metricType := range metricTypeList {
if _, exists := componentTypes[metricType]; exists {
filteredMetricTypeList = append(filteredMetricTypeList, metricType)
}
}
ginx.NewRender(c).Data(filteredMetricTypeList, nil)
}
func (rt *Router) builtinMetricsCollectors(c *gin.Context) {

View File

@@ -6,6 +6,7 @@ import (
"strings"
"time"
"github.com/BurntSushi/toml"
"github.com/ccfos/nightingale/v6/models"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
@@ -42,7 +43,7 @@ func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
for _, rule := range alertRules {
if rule.UUID == 0 {
rule.UUID = time.Now().UnixNano()
rule.UUID = time.Now().UnixMicro()
}
contentBytes, err := json.Marshal(rule)
@@ -52,15 +53,15 @@ func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
}
bp := models.BuiltinPayload{
Type: lst[i].Type,
Component: lst[i].Component,
Cate: lst[i].Cate,
Name: rule.Name,
Tags: rule.AppendTags,
UUID: rule.UUID,
Content: string(contentBytes),
CreatedBy: username,
UpdatedBy: username,
Type: lst[i].Type,
ComponentID: lst[i].ComponentID,
Cate: lst[i].Cate,
Name: rule.Name,
Tags: rule.AppendTags,
UUID: rule.UUID,
Content: string(contentBytes),
CreatedBy: username,
UpdatedBy: username,
}
if err := bp.Add(rt.Ctx, username); err != nil {
@@ -77,19 +78,25 @@ func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
}
if alertRule.UUID == 0 {
alertRule.UUID = time.Now().UnixNano()
alertRule.UUID = time.Now().UnixMicro()
}
contentBytes, err := json.Marshal(alertRule)
if err != nil {
reterr[alertRule.Name] = err.Error()
continue
}
bp := models.BuiltinPayload{
Type: lst[i].Type,
Component: lst[i].Component,
Cate: lst[i].Cate,
Name: alertRule.Name,
Tags: alertRule.AppendTags,
UUID: alertRule.UUID,
Content: lst[i].Content,
CreatedBy: username,
UpdatedBy: username,
Type: lst[i].Type,
ComponentID: lst[i].ComponentID,
Cate: lst[i].Cate,
Name: alertRule.Name,
Tags: alertRule.AppendTags,
UUID: alertRule.UUID,
Content: string(contentBytes),
CreatedBy: username,
UpdatedBy: username,
}
if err := bp.Add(rt.Ctx, username); err != nil {
@@ -105,7 +112,7 @@ func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
for _, dashboard := range dashboards {
if dashboard.UUID == 0 {
dashboard.UUID = time.Now().UnixNano()
dashboard.UUID = time.Now().UnixMicro()
}
contentBytes, err := json.Marshal(dashboard)
@@ -115,15 +122,15 @@ func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
}
bp := models.BuiltinPayload{
Type: lst[i].Type,
Component: lst[i].Component,
Cate: lst[i].Cate,
Name: dashboard.Name,
Tags: dashboard.Tags,
UUID: dashboard.UUID,
Content: string(contentBytes),
CreatedBy: username,
UpdatedBy: username,
Type: lst[i].Type,
ComponentID: lst[i].ComponentID,
Cate: lst[i].Cate,
Name: dashboard.Name,
Tags: dashboard.Tags,
UUID: dashboard.UUID,
Content: string(contentBytes),
CreatedBy: username,
UpdatedBy: username,
}
if err := bp.Add(rt.Ctx, username); err != nil {
@@ -140,25 +147,39 @@ func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
}
if dashboard.UUID == 0 {
dashboard.UUID = time.Now().UnixNano()
dashboard.UUID = time.Now().UnixMicro()
}
contentBytes, err := json.Marshal(dashboard)
if err != nil {
reterr[dashboard.Name] = err.Error()
continue
}
bp := models.BuiltinPayload{
Type: lst[i].Type,
Component: lst[i].Component,
Cate: lst[i].Cate,
Name: dashboard.Name,
Tags: dashboard.Tags,
UUID: dashboard.UUID,
Content: lst[i].Content,
CreatedBy: username,
UpdatedBy: username,
Type: lst[i].Type,
ComponentID: lst[i].ComponentID,
Cate: lst[i].Cate,
Name: dashboard.Name,
Tags: dashboard.Tags,
UUID: dashboard.UUID,
Content: string(contentBytes),
CreatedBy: username,
UpdatedBy: username,
}
if err := bp.Add(rt.Ctx, username); err != nil {
reterr[bp.Name] = i18n.Sprintf(c.GetHeader("X-Language"), err.Error())
}
} else {
if lst[i].Type == "collect" {
c := make(map[string]interface{})
if _, err := toml.Decode(lst[i].Content, &c); err != nil {
reterr[lst[i].Name] = err.Error()
continue
}
}
if err := lst[i].Add(rt.Ctx, username); err != nil {
reterr[lst[i].Name] = i18n.Sprintf(c.GetHeader("X-Language"), err.Error())
}
@@ -171,19 +192,20 @@ func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
func (rt *Router) builtinPayloadsGets(c *gin.Context) {
typ := ginx.QueryStr(c, "type", "")
component := ginx.QueryStr(c, "component", "")
ComponentID := ginx.QueryInt64(c, "component_id", 0)
cate := ginx.QueryStr(c, "cate", "")
query := ginx.QueryStr(c, "query", "")
lst, err := models.BuiltinPayloadGets(rt.Ctx, typ, component, cate, query)
lst, err := models.BuiltinPayloadGets(rt.Ctx, uint64(ComponentID), typ, cate, query)
ginx.NewRender(c).Data(lst, err)
}
func (rt *Router) builtinPayloadcatesGet(c *gin.Context) {
typ := ginx.QueryStr(c, "type", "")
component := ginx.QueryStr(c, "component", "")
ComponentID := ginx.QueryInt64(c, "component_id", 0)
cates, err := models.BuiltinPayloadCates(rt.Ctx, typ, component)
cates, err := models.BuiltinPayloadCates(rt.Ctx, typ, uint64(ComponentID))
ginx.NewRender(c).Data(cates, err)
}
@@ -229,6 +251,11 @@ func (rt *Router) builtinPayloadsPut(c *gin.Context) {
req.Name = dashboard.Name
req.Tags = dashboard.Tags
} else if req.Type == "collect" {
c := make(map[string]interface{})
if _, err := toml.Decode(req.Content, &c); err != nil {
ginx.Bomb(http.StatusBadRequest, err.Error())
}
}
username := Username(c)
@@ -245,3 +272,15 @@ func (rt *Router) builtinPayloadsDel(c *gin.Context) {
ginx.NewRender(c).Message(models.BuiltinPayloadDels(rt.Ctx, req.Ids))
}
func (rt *Router) builtinPayloadsGetByUUIDOrID(c *gin.Context) {
uuid := ginx.QueryInt64(c, "uuid", 0)
// 优先以 uuid 为准
if uuid != 0 {
ginx.NewRender(c).Data(models.BuiltinPayloadGet(rt.Ctx, "uuid = ?", uuid))
return
}
id := ginx.QueryInt64(c, "id", 0)
ginx.NewRender(c).Data(models.BuiltinPayloadGet(rt.Ctx, "id = ?", id))
}

View File

@@ -140,3 +140,12 @@ func (rt *Router) busiGroupGet(c *gin.Context) {
ginx.Dangerous(bg.FillUserGroups(rt.Ctx))
ginx.NewRender(c).Data(bg, nil)
}
func (rt *Router) busiGroupsGetTags(c *gin.Context) {
bgids := str.IdsInt64(ginx.QueryStr(c, "gids", ""), ",")
targetIdents, err := models.TargetIndentsGetByBgids(rt.Ctx, bgids)
ginx.Dangerous(err)
tags, err := models.TargetGetTags(rt.Ctx, targetIdents, true, "busigroup")
ginx.Dangerous(err)
ginx.NewRender(c).Data(tags, nil)
}

View File

@@ -24,6 +24,11 @@ func (rt *Router) configGet(c *gin.Context) {
ginx.NewRender(c).Data(configs, err)
}
func (rt *Router) configGetAll(c *gin.Context) {
config, err := models.ConfigsGetAll(rt.Ctx)
ginx.NewRender(c).Data(config, err)
}
func (rt *Router) configGetByKey(c *gin.Context) {
config, err := models.ConfigsGet(rt.Ctx, ginx.QueryStr(c, "key"))
ginx.NewRender(c).Data(config, err)

View File

@@ -0,0 +1,99 @@
package router
import (
"fmt"
"net/http"
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func checkAnnotationPermission(c *gin.Context, ctx *ctx.Context, dashboardId int64) {
dashboard, err := models.BoardGetByID(ctx, dashboardId)
if err != nil {
ginx.Bomb(http.StatusInternalServerError, "failed to get dashboard: %v", err)
}
if dashboard == nil {
ginx.Bomb(http.StatusNotFound, "dashboard not found")
}
bg := BusiGroup(ctx, dashboard.GroupId)
me := c.MustGet("user").(*models.User)
can, err := me.CanDoBusiGroup(ctx, bg, "rw")
ginx.Dangerous(err)
if !can {
ginx.Bomb(http.StatusForbidden, "forbidden")
}
}
func (rt *Router) dashAnnotationAdd(c *gin.Context) {
var f models.DashAnnotation
ginx.BindJSON(c, &f)
username := c.MustGet("username").(string)
now := time.Now().Unix()
checkAnnotationPermission(c, rt.Ctx, f.DashboardId)
f.CreateBy = username
f.CreateAt = now
f.UpdateBy = username
f.UpdateAt = now
ginx.NewRender(c).Data(f.Id, f.Add(rt.Ctx))
}
func (rt *Router) dashAnnotationGets(c *gin.Context) {
dashboardId := ginx.QueryInt64(c, "dashboard_id")
from := ginx.QueryInt64(c, "from")
to := ginx.QueryInt64(c, "to")
limit := ginx.QueryInt(c, "limit", 100)
lst, err := models.DashAnnotationGets(rt.Ctx, dashboardId, from, to, limit)
ginx.NewRender(c).Data(lst, err)
}
func (rt *Router) dashAnnotationPut(c *gin.Context) {
var f models.DashAnnotation
ginx.BindJSON(c, &f)
id := ginx.UrlParamInt64(c, "id")
annotation, err := getAnnotationById(rt.Ctx, id)
ginx.Dangerous(err)
checkAnnotationPermission(c, rt.Ctx, annotation.DashboardId)
f.Id = id
f.UpdateAt = time.Now().Unix()
f.UpdateBy = c.MustGet("username").(string)
ginx.NewRender(c).Message(f.Update(rt.Ctx))
}
func (rt *Router) dashAnnotationDel(c *gin.Context) {
id := ginx.UrlParamInt64(c, "id")
annotation, err := getAnnotationById(rt.Ctx, id)
ginx.Dangerous(err)
checkAnnotationPermission(c, rt.Ctx, annotation.DashboardId)
ginx.NewRender(c).Message(models.DashAnnotationDel(rt.Ctx, id))
}
// 可以提取获取注释的通用方法
func getAnnotationById(ctx *ctx.Context, id int64) (*models.DashAnnotation, error) {
annotation, err := models.DashAnnotationGet(ctx, "id=?", id)
if err != nil {
return nil, err
}
if annotation == nil {
return nil, fmt.Errorf("annotation not found")
}
return annotation, nil
}

View File

@@ -92,10 +92,14 @@ func (rt *Router) datasourceUpsert(c *gin.Context) {
var err error
var count int64
err = DatasourceCheck(req)
if err != nil {
Dangerous(c, err)
return
if !req.ForceSave {
if req.PluginType == models.PROMETHEUS || req.PluginType == models.LOKI || req.PluginType == models.TDENGINE {
err = DatasourceCheck(req)
if err != nil {
Dangerous(c, err)
return
}
}
}
if req.Id == 0 {
@@ -120,12 +124,14 @@ func (rt *Router) datasourceUpsert(c *gin.Context) {
}
func DatasourceCheck(ds models.Datasource) error {
if ds.HTTPJson.Url == "" {
return fmt.Errorf("url is empty")
}
if ds.PluginType == models.PROMETHEUS || ds.PluginType == models.LOKI || ds.PluginType == models.TDENGINE {
if ds.HTTPJson.Url == "" {
return fmt.Errorf("url is empty")
}
if !strings.HasPrefix(ds.HTTPJson.Url, "http") {
return fmt.Errorf("url must start with http or https")
if !strings.HasPrefix(ds.HTTPJson.Url, "http") {
return fmt.Errorf("url must start with http or https")
}
}
client := &http.Client{
@@ -136,11 +142,11 @@ func DatasourceCheck(ds models.Datasource) error {
},
}
fullURL := ds.HTTPJson.Url
req, err := http.NewRequest("GET", fullURL, nil)
var fullURL string
req, err := ds.HTTPJson.NewReq(&fullURL)
if err != nil {
logger.Errorf("Error creating request: %v", err)
return fmt.Errorf("request url:%s failed", fullURL)
return fmt.Errorf("request urls:%v failed", ds.HTTPJson.GetUrls())
}
if ds.PluginType == models.PROMETHEUS {
@@ -247,3 +253,37 @@ func (rt *Router) getDatasourceIds(c *gin.Context) {
ginx.NewRender(c).Data(datasourceIds, err)
}
type datasourceQueryForm struct {
Cate string `json:"datasource_cate"`
DatasourceQueries []models.DatasourceQuery `json:"datasource_queries"`
}
type datasourceQueryResp struct {
ID int64 `json:"id"`
Name string `json:"name"`
}
func (rt *Router) datasourceQuery(c *gin.Context) {
var dsf datasourceQueryForm
ginx.BindJSON(c, &dsf)
datasources, err := models.GetDatasourcesGetsByTypes(rt.Ctx, []string{dsf.Cate})
ginx.Dangerous(err)
nameToID := make(map[string]int64)
IDToName := make(map[int64]string)
for _, ds := range datasources {
nameToID[ds.Name] = ds.Id
IDToName[ds.Id] = ds.Name
}
ids := models.GetDatasourceIDsByDatasourceQueries(dsf.DatasourceQueries, IDToName, nameToID)
var req []datasourceQueryResp
for _, id := range ids {
req = append(req, datasourceQueryResp{
ID: id,
Name: IDToName[id],
})
}
ginx.NewRender(c).Data(req, err)
}

View File

@@ -0,0 +1,101 @@
package router
import (
"context"
"github.com/ccfos/nightingale/v6/dscache"
"github.com/ccfos/nightingale/v6/dskit/types"
"github.com/ccfos/nightingale/v6/models"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
func (rt *Router) ShowDatabases(c *gin.Context) {
var f models.QueryParam
ginx.BindJSON(c, &f)
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logger.Warningf("cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
var databases []string
var err error
type DatabaseShower interface {
ShowDatabases(context.Context) ([]string, error)
}
switch plug.(type) {
case DatabaseShower:
databases, err = plug.(DatabaseShower).ShowDatabases(c.Request.Context())
ginx.Dangerous(err)
default:
ginx.Bomb(200, "datasource not exists")
}
if len(databases) == 0 {
databases = make([]string, 0)
}
ginx.NewRender(c).Data(databases, nil)
}
func (rt *Router) ShowTables(c *gin.Context) {
var f models.QueryParam
ginx.BindJSON(c, &f)
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logger.Warningf("cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
// 只接受一个入参
tables := make([]string, 0)
var err error
type TableShower interface {
ShowTables(ctx context.Context, database string) ([]string, error)
}
switch plug.(type) {
case TableShower:
if len(f.Querys) > 0 {
database, ok := f.Querys[0].(string)
if ok {
tables, err = plug.(TableShower).ShowTables(c.Request.Context(), database)
}
}
default:
ginx.Bomb(200, "datasource not exists")
}
ginx.NewRender(c).Data(tables, err)
}
func (rt *Router) DescribeTable(c *gin.Context) {
var f models.QueryParam
ginx.BindJSON(c, &f)
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logger.Warningf("cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
// 只接受一个入参
columns := make([]*types.ColumnProperty, 0)
var err error
type TableDescriber interface {
DescribeTable(context.Context, interface{}) ([]*types.ColumnProperty, error)
}
switch plug.(type) {
case TableDescriber:
client := plug.(TableDescriber)
if len(f.Querys) > 0 {
columns, err = client.DescribeTable(c.Request.Context(), f.Querys[0])
}
default:
ginx.Bomb(200, "datasource not exists")
}
ginx.NewRender(c).Data(columns, err)
}

View File

@@ -0,0 +1,77 @@
package router
import (
"github.com/ccfos/nightingale/v6/datasource/es"
"github.com/ccfos/nightingale/v6/dscache"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
type IndexReq struct {
Cate string `json:"cate"`
DatasourceId int64 `json:"datasource_id"`
Index string `json:"index"`
}
type FieldValueReq struct {
Cate string `json:"cate"`
DatasourceId int64 `json:"datasource_id"`
Index string `json:"index"`
Query FieldObj `json:"query"`
}
type FieldObj struct {
Find string `json:"find"`
Field string `json:"field"`
Query string `json:"query"`
}
func (rt *Router) QueryIndices(c *gin.Context) {
var f IndexReq
ginx.BindJSON(c, &f)
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logger.Warningf("cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
indices, err := plug.(*es.Elasticsearch).QueryIndices()
ginx.Dangerous(err)
ginx.NewRender(c).Data(indices, nil)
}
func (rt *Router) QueryFields(c *gin.Context) {
var f IndexReq
ginx.BindJSON(c, &f)
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logger.Warningf("cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
fields, err := plug.(*es.Elasticsearch).QueryFields([]string{f.Index})
ginx.Dangerous(err)
ginx.NewRender(c).Data(fields, nil)
}
func (rt *Router) QueryESVariable(c *gin.Context) {
var f FieldValueReq
ginx.BindJSON(c, &f)
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logger.Warningf("cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
fields, err := plug.(*es.Elasticsearch).QueryFieldValue([]string{f.Index}, f.Query.Field, f.Query.Query)
ginx.Dangerous(err)
ginx.NewRender(c).Data(fields, nil)
}

View File

@@ -45,6 +45,10 @@ func (rt *Router) statistic(c *gin.Context) {
statistics, err = models.ConfigsUserVariableStatistics(rt.Ctx)
ginx.NewRender(c).Data(statistics, err)
return
case "cval":
statistics, err = models.ConfigCvalStatistics(rt.Ctx)
ginx.NewRender(c).Data(statistics, err)
return
default:
ginx.Bomb(http.StatusBadRequest, "invalid name")
}
@@ -65,6 +69,23 @@ func queryDatasourceIds(c *gin.Context) []int64 {
return ids
}
func queryStrListField(c *gin.Context, fieldName string, sep ...string) []string {
str := ginx.QueryStr(c, fieldName, "")
if str == "" {
return nil
}
lst := []string{str}
for _, s := range sep {
var newLst []string
for _, str := range lst {
newLst = append(newLst, strings.Split(str, s)...)
}
lst = newLst
}
return lst
}
type idsForm struct {
Ids []int64 `json:"ids"`
IsSyncToFlashDuty bool `json:"is_sync_to_flashduty"`

View File

@@ -3,8 +3,10 @@ package router
import (
"compress/gzip"
"encoding/json"
"fmt"
"errors"
"io/ioutil"
"sort"
"strconv"
"strings"
"time"
@@ -57,7 +59,7 @@ func HandleHeartbeat(c *gin.Context, ctx *ctx.Context, engineName string, metaSe
}
if req.Hostname == "" {
return req, fmt.Errorf("hostname is required", 400)
return req, errors.New("hostname is required")
}
// maybe from pushgw
@@ -79,55 +81,122 @@ func HandleHeartbeat(c *gin.Context, ctx *ctx.Context, engineName string, metaSe
identSet.MSet(items)
if target, has := targetCache.Get(req.Hostname); has && target != nil {
gid := ginx.QueryInt64(c, "gid", 0)
gidsStr := ginx.QueryStr(c, "gid", "")
overwriteGids := ginx.QueryBool(c, "overwrite_gids", false)
hostIp := strings.TrimSpace(req.HostIp)
gids := strings.Split(gidsStr, ",")
field := make(map[string]interface{})
if gid != 0 && gid != target.GroupId {
field["group_id"] = gid
if overwriteGids {
groupIds := make([]int64, 0)
for i := range gids {
if gids[i] == "" {
continue
}
groupId, err := strconv.ParseInt(gids[i], 10, 64)
if err != nil {
logger.Warningf("update target:%s group ids failed, err: %v", req.Hostname, err)
continue
}
groupIds = append(groupIds, groupId)
}
err := models.TargetOverrideBgids(ctx, []string{target.Ident}, groupIds, nil)
if err != nil {
logger.Warningf("update target:%s group ids failed, err: %v", target.Ident, err)
}
} else if gidsStr != "" {
for i := range gids {
groupId, err := strconv.ParseInt(gids[i], 10, 64)
if err != nil {
logger.Warningf("update target:%s group ids failed, err: %v", req.Hostname, err)
continue
}
if !target.MatchGroupId(groupId) {
err := models.TargetBindBgids(ctx, []string{target.Ident}, []int64{groupId}, nil)
if err != nil {
logger.Warningf("update target:%s group ids failed, err: %v", target.Ident, err)
}
}
}
}
newTarget := models.Target{}
targetNeedUpdate := false
if hostIp != "" && hostIp != target.HostIp {
field["host_ip"] = hostIp
newTarget.HostIp = hostIp
targetNeedUpdate = true
}
tagsMap := target.GetTagsMap()
tagNeedUpdate := false
for k, v := range req.GlobalLabels {
hostTagsMap := target.GetHostTagsMap()
hostTagNeedUpdate := false
if len(hostTagsMap) != len(req.GlobalLabels) {
hostTagNeedUpdate = true
} else {
for k, v := range req.GlobalLabels {
if v == "" {
continue
}
if tagv, ok := hostTagsMap[k]; !ok || tagv != v {
hostTagNeedUpdate = true
break
}
}
}
if hostTagNeedUpdate {
lst := []string{}
for k, v := range req.GlobalLabels {
lst = append(lst, k+"="+v)
}
sort.Strings(lst)
newTarget.HostTags = lst
targetNeedUpdate = true
}
userTagsMap := target.GetTagsMap()
userTagNeedUpdate := false
userTags := []string{}
for k, v := range userTagsMap {
if v == "" {
continue
}
if tagv, ok := tagsMap[k]; !ok || tagv != v {
tagNeedUpdate = true
tagsMap[k] = v
if _, ok := req.GlobalLabels[k]; !ok {
userTags = append(userTags, k+"="+v)
} else { // 该key在hostTags中已经存在
userTagNeedUpdate = true
}
}
if tagNeedUpdate {
lst := []string{}
for k, v := range tagsMap {
lst = append(lst, k+"="+v)
}
labels := strings.Join(lst, " ") + " "
field["tags"] = labels
if userTagNeedUpdate {
newTarget.Tags = strings.Join(userTags, " ") + " "
targetNeedUpdate = true
}
if req.EngineName != "" && req.EngineName != target.EngineName {
field["engine_name"] = req.EngineName
newTarget.EngineName = req.EngineName
targetNeedUpdate = true
}
if req.AgentVersion != "" && req.AgentVersion != target.AgentVersion {
field["agent_version"] = req.AgentVersion
newTarget.AgentVersion = req.AgentVersion
targetNeedUpdate = true
}
if len(field) > 0 {
err := target.UpdateFieldsMap(ctx, field)
if req.OS != "" && req.OS != target.OS {
newTarget.OS = req.OS
targetNeedUpdate = true
}
if targetNeedUpdate {
err := models.DB(ctx).Model(&target).Updates(newTarget).Error
if err != nil {
logger.Errorf("update target fields failed, err: %v", err)
}
}
logger.Debugf("heartbeat field:%+v target: %v", field, *target)
logger.Debugf("heartbeat field:%+v target: %v", newTarget, *target)
}
return req, nil

View File

@@ -50,7 +50,8 @@ func (rt *Router) alertMuteGets(c *gin.Context) {
prods := strings.Fields(ginx.QueryStr(c, "prods", ""))
bgid := ginx.QueryInt64(c, "bgid", -1)
query := ginx.QueryStr(c, "query", "")
lst, err := models.AlertMuteGets(rt.Ctx, prods, bgid, query)
disabled := ginx.QueryInt(c, "disabled", -1)
lst, err := models.AlertMuteGets(rt.Ctx, prods, bgid, disabled, query)
ginx.NewRender(c).Data(lst, err)
}

View File

@@ -130,9 +130,9 @@ func (rt *Router) User() gin.HandlerFunc {
func (rt *Router) user() gin.HandlerFunc {
return func(c *gin.Context) {
userid := c.MustGet("userid").(int64)
username := c.MustGet("username").(string)
user, err := models.UserGetById(rt.Ctx, userid)
user, err := models.UserGetByUsername(rt.Ctx, username)
if err != nil {
ginx.Bomb(http.StatusUnauthorized, "unauthorized")
}

View File

@@ -0,0 +1,212 @@
package router
import (
"strings"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
type NotificationResponse struct {
SubRules []SubRule `json:"sub_rules"`
Notifies map[string][]Record `json:"notifies"`
}
type SubRule struct {
SubID int64 `json:"sub_id"`
Notifies map[string][]Record `json:"notifies"`
}
type Notify struct {
Channel string `json:"channel"`
Records []Record `json:"records"`
}
type Record struct {
Target string `json:"target"`
Username string `json:"username"`
Status int `json:"status"`
Detail string `json:"detail"`
}
// notificationRecordAdd
func (rt *Router) notificationRecordAdd(c *gin.Context) {
var req []*models.NotificaitonRecord
ginx.BindJSON(c, &req)
err := models.DB(rt.Ctx).CreateInBatches(req, 100).Error
var ids []int64
if err == nil {
ids = make([]int64, len(req))
for i, noti := range req {
ids[i] = noti.Id
}
}
ginx.NewRender(c).Data(ids, err)
}
func (rt *Router) notificationRecordList(c *gin.Context) {
eid := ginx.UrlParamInt64(c, "eid")
lst, err := models.NotificaitonRecordsGetByEventId(rt.Ctx, eid)
ginx.Dangerous(err)
response := buildNotificationResponse(rt.Ctx, lst)
ginx.NewRender(c).Data(response, nil)
}
func buildNotificationResponse(ctx *ctx.Context, nl []*models.NotificaitonRecord) NotificationResponse {
response := NotificationResponse{
SubRules: []SubRule{},
Notifies: make(map[string][]Record),
}
subRuleMap := make(map[int64]*SubRule)
// Collect all group IDs
groupIdSet := make(map[int64]struct{})
// map[SubId]map[Channel]map[Target]index
filter := make(map[int64]map[string]map[string]int)
for i, n := range nl {
// 对相同的 channel-target 进行合并
for _, gid := range n.GetGroupIds(ctx) {
groupIdSet[gid] = struct{}{}
}
if _, exists := filter[n.SubId]; !exists {
filter[n.SubId] = make(map[string]map[string]int)
}
if _, exists := filter[n.SubId][n.Channel]; !exists {
filter[n.SubId][n.Channel] = make(map[string]int)
}
idx, exists := filter[n.SubId][n.Channel][n.Target]
if !exists {
filter[n.SubId][n.Channel][n.Target] = i
} else {
if nl[idx].Status < n.Status {
nl[idx].Status = n.Status
}
nl[idx].Details = nl[idx].Details + ", " + n.Details
nl[i] = nil
}
}
// Fill usernames only once
usernameByTarget := fillUserNames(ctx, groupIdSet)
for _, n := range nl {
if n == nil {
continue
}
m := usernameByTarget[n.Target]
usernames := make([]string, 0, len(m))
for k := range m {
usernames = append(usernames, k)
}
if !checkChannel(n.Channel) {
// Hide sensitive information
n.Target = replaceLastEightChars(n.Target)
}
record := Record{
Target: n.Target,
Status: n.Status,
Detail: n.Details,
}
record.Username = strings.Join(usernames, ",")
if n.SubId > 0 {
// Handle SubRules
subRule, ok := subRuleMap[n.SubId]
if !ok {
newSubRule := &SubRule{
SubID: n.SubId,
}
newSubRule.Notifies = make(map[string][]Record)
newSubRule.Notifies[n.Channel] = []Record{record}
subRuleMap[n.SubId] = newSubRule
} else {
if _, exists := subRule.Notifies[n.Channel]; !exists {
subRule.Notifies[n.Channel] = []Record{record}
} else {
subRule.Notifies[n.Channel] = append(subRule.Notifies[n.Channel], record)
}
}
continue
}
if response.Notifies == nil {
response.Notifies = make(map[string][]Record)
}
if _, exists := response.Notifies[n.Channel]; !exists {
response.Notifies[n.Channel] = []Record{record}
} else {
response.Notifies[n.Channel] = append(response.Notifies[n.Channel], record)
}
}
for _, subRule := range subRuleMap {
response.SubRules = append(response.SubRules, *subRule)
}
return response
}
// check channel is one of the following: tx-sms, tx-voice, ali-sms, ali-voice, email, script
func checkChannel(channel string) bool {
switch channel {
case "tx-sms", "tx-voice", "ali-sms", "ali-voice", "email", "script":
return true
}
return false
}
func replaceLastEightChars(s string) string {
if len(s) <= 8 {
return strings.Repeat("*", len(s))
}
return s[:len(s)-8] + strings.Repeat("*", 8)
}
func fillUserNames(ctx *ctx.Context, groupIdSet map[int64]struct{}) map[string]map[string]struct{} {
userNameByTarget := make(map[string]map[string]struct{})
gids := make([]int64, 0, len(groupIdSet))
for gid := range groupIdSet {
gids = append(gids, gid)
}
users, err := models.UsersGetByGroupIds(ctx, gids)
if err != nil {
logger.Errorf("UsersGetByGroupIds failed, err: %v", err)
return userNameByTarget
}
for _, user := range users {
logger.Warningf("user: %s", user.Username)
for _, ch := range models.DefaultChannels {
target, exist := user.ExtractToken(ch)
if exist {
if _, ok := userNameByTarget[target]; !ok {
userNameByTarget[target] = make(map[string]struct{})
}
userNameByTarget[target][user.Username] = struct{}{}
}
}
}
return userNameByTarget
}

View File

@@ -182,7 +182,7 @@ func (rt *Router) notifyConfigPut(c *gin.Context) {
smtp, errSmtp := SmtpValidate(text)
ginx.Dangerous(errSmtp)
go sender.RestartEmailSender(smtp)
go sender.RestartEmailSender(rt.Ctx, smtp)
}
ginx.NewRender(c).Message(nil)

View File

@@ -68,7 +68,7 @@ func (rt *Router) notifyTplUpdate(c *gin.Context) {
}
// get the count of the same channel and name but different id
count, err := models.Count(models.DB(rt.Ctx).Model(&models.NotifyTpl{}).Where("channel = ? or name = ? and id <> ?", f.Channel, f.Name, f.Id))
count, err := models.Count(models.DB(rt.Ctx).Model(&models.NotifyTpl{}).Where("(channel = ? or name = ?) and id <> ?", f.Channel, f.Name, f.Id))
ginx.Dangerous(err)
if count != 0 {
ginx.Bomb(200, "Refuse to create duplicate channel or name")
@@ -138,7 +138,7 @@ func (rt *Router) notifyTplPreview(c *gin.Context) {
continue
}
arr := strings.Split(pair, "=")
arr := strings.SplitN(pair, "=", 2)
if len(arr) != 2 {
continue
}
@@ -161,7 +161,11 @@ func (rt *Router) notifyTplPreview(c *gin.Context) {
func (rt *Router) notifyTplAdd(c *gin.Context) {
var f models.NotifyTpl
ginx.BindJSON(c, &f)
f.Channel = strings.TrimSpace(f.Channel)
user := c.MustGet("user").(*models.User)
f.CreateBy = user.Username
f.Channel = strings.TrimSpace(f.Channel)
ginx.Dangerous(templateValidate(f))
count, err := models.Count(models.DB(rt.Ctx).Model(&models.NotifyTpl{}).Where("channel = ? or name = ?", f.Channel, f.Name))
@@ -169,6 +173,8 @@ func (rt *Router) notifyTplAdd(c *gin.Context) {
if count != 0 {
ginx.Bomb(200, "Refuse to create duplicate channel(unique)")
}
f.CreateAt = time.Now().Unix()
ginx.NewRender(c).Message(f.Create(rt.Ctx))
}

View File

@@ -7,7 +7,6 @@ import (
"net"
"net/http"
"net/http/httputil"
"net/url"
"strings"
"sync"
"time"
@@ -112,9 +111,9 @@ func (rt *Router) dsProxy(c *gin.Context) {
return
}
target, err := url.Parse(ds.HTTPJson.Url)
target, err := ds.HTTPJson.ParseUrl()
if err != nil {
c.String(http.StatusInternalServerError, "invalid url: %s", ds.HTTPJson.Url)
c.String(http.StatusInternalServerError, "invalid urls: %s", ds.HTTPJson.GetUrls())
return
}

View File

@@ -0,0 +1,181 @@
package router
import (
"fmt"
"sort"
"github.com/ccfos/nightingale/v6/dscache"
"github.com/ccfos/nightingale/v6/models"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
func CheckDsPerm(c *gin.Context, dsId int64, cate string, q interface{}) bool {
// todo: 后续需要根据 cate 判断是否需要权限
return true
}
type QueryFrom struct {
Queries []Query `json:"queries"`
Exps []Exp `json:"exps"`
}
type Query struct {
Ref string `json:"ref"`
Did int64 `json:"ds_id"`
DsCate string `json:"ds_cate"`
Query interface{} `json:"query"`
}
type Exp struct {
Exp string `json:"exp"`
Ref string `json:"ref"`
}
type LogResp struct {
Total int64 `json:"total"`
List []interface{} `json:"list"`
}
func (rt *Router) QueryLogBatch(c *gin.Context) {
var f QueryFrom
ginx.BindJSON(c, &f)
var resp LogResp
var errMsg string
for _, q := range f.Queries {
if !rt.Center.AnonymousAccess.PromQuerier && !CheckDsPerm(c, q.Did, q.DsCate, q) {
ginx.Bomb(200, "no permission")
}
plug, exists := dscache.DsCache.Get(q.DsCate, q.Did)
if !exists {
logger.Warningf("cluster:%d not exists query:%+v", q.Did, q)
ginx.Bomb(200, "cluster not exists")
}
data, total, err := plug.QueryLog(c.Request.Context(), q.Query)
if err != nil {
errMsg += fmt.Sprintf("query data error: %v query:%v\n ", err, q)
logger.Warningf("query data error: %v query:%v", err, q)
continue
}
m := make(map[string]interface{})
m["ref"] = q.Ref
m["ds_id"] = q.Did
m["ds_cate"] = q.DsCate
m["data"] = data
resp.List = append(resp.List, m)
resp.Total += total
}
if errMsg != "" || len(resp.List) == 0 {
ginx.Bomb(200, errMsg)
}
ginx.NewRender(c).Data(resp, nil)
}
func (rt *Router) QueryData(c *gin.Context) {
var f models.QueryParam
ginx.BindJSON(c, &f)
var resp []models.DataResp
var err error
for _, q := range f.Querys {
if !rt.Center.AnonymousAccess.PromQuerier && !CheckDsPerm(c, f.DatasourceId, f.Cate, q) {
ginx.Bomb(403, "no permission")
}
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logger.Warningf("cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
var datas []models.DataResp
datas, err = plug.QueryData(c.Request.Context(), q)
if err != nil {
logger.Warningf("query data error: req:%+v err:%v", q, err)
ginx.Bomb(200, "err:%v", err)
}
logger.Debugf("query data: req:%+v resp:%+v", q, datas)
resp = append(resp, datas...)
}
// 面向API的统一处理
// 按照 .Metric 排序
// 确保仪表盘中相同图例的曲线颜色相同
if len(resp) > 1 {
sort.Slice(resp, func(i, j int) bool {
if resp[i].Metric != nil && resp[j].Metric != nil {
return resp[i].Metric.String() < resp[j].Metric.String()
}
return false
})
}
ginx.NewRender(c).Data(resp, err)
}
func (rt *Router) QueryLogV2(c *gin.Context) {
var f models.QueryParam
ginx.BindJSON(c, &f)
var resp LogResp
var errMsg string
for _, q := range f.Querys {
if !rt.Center.AnonymousAccess.PromQuerier && !CheckDsPerm(c, f.DatasourceId, f.Cate, q) {
ginx.Bomb(200, "no permission")
}
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logger.Warningf("cluster:%d not exists query:%+v", f.DatasourceId, f)
ginx.Bomb(200, "cluster not exists")
}
data, total, err := plug.QueryLog(c.Request.Context(), q)
if err != nil {
errMsg += fmt.Sprintf("query data error: %v query:%v\n ", err, q)
logger.Warningf("query data error: %v query:%v", err, q)
continue
}
resp.List = append(resp.List, data...)
resp.Total += total
}
if errMsg != "" || len(resp.List) == 0 {
ginx.Bomb(200, errMsg)
}
ginx.NewRender(c).Data(resp, nil)
}
func (rt *Router) QueryLog(c *gin.Context) {
var f models.QueryParam
ginx.BindJSON(c, &f)
var resp []interface{}
for _, q := range f.Querys {
if !rt.Center.AnonymousAccess.PromQuerier && !CheckDsPerm(c, f.DatasourceId, f.Cate, q) {
ginx.Bomb(200, "no permission")
}
plug, exists := dscache.DsCache.Get("elasticsearch", f.DatasourceId)
if !exists {
logger.Warningf("cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
data, _, err := plug.QueryLog(c.Request.Context(), q)
if err != nil {
logger.Warningf("query data error: %v", err)
ginx.Bomb(200, "err:%v", err)
continue
}
resp = append(resp, data...)
}
ginx.NewRender(c).Data(resp, nil)
}

View File

@@ -3,8 +3,6 @@ package router
import (
"encoding/json"
"net/http"
"strconv"
"strings"
"time"
"github.com/ccfos/nightingale/v6/models"
@@ -74,6 +72,14 @@ func (rt *Router) recordingRuleAddByFE(c *gin.Context) {
ginx.Bomb(http.StatusBadRequest, "input json is empty")
}
for i := range lst {
if len(lst[i].DatasourceQueries) == 0 {
lst[i].DatasourceQueries = []models.DatasourceQuery{
models.DataSourceQueryAll,
}
}
}
bgid := ginx.UrlParamInt64(c, "id")
reterr := make(map[string]string)
for i := 0; i < count; i++ {
@@ -137,23 +143,10 @@ func (rt *Router) recordingRulePutFields(c *gin.Context) {
f.Fields["update_by"] = c.MustGet("username").(string)
f.Fields["update_at"] = time.Now().Unix()
if _, ok := f.Fields["datasource_ids"]; ok {
// datasource_ids = "1 2 3"
idsStr := strings.Fields(f.Fields["datasource_ids"].(string))
ids := make([]int64, 0)
for _, idStr := range idsStr {
id, err := strconv.ParseInt(idStr, 10, 64)
if err != nil {
ginx.Bomb(http.StatusBadRequest, "datasource_ids error")
}
ids = append(ids, id)
}
bs, err := json.Marshal(ids)
if err != nil {
ginx.Bomb(http.StatusBadRequest, "datasource_ids error")
}
f.Fields["datasource_ids"] = string(bs)
if datasourceQueries, ok := f.Fields["datasource_queries"]; ok {
bytes, err := json.Marshal(datasourceQueries)
ginx.Dangerous(err)
f.Fields["datasource_queries"] = string(bytes)
}
for i := 0; i < len(f.Ids); i++ {

View File

@@ -9,6 +9,7 @@ import (
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/storage"
"github.com/gin-gonic/gin"
@@ -52,6 +53,8 @@ func (rt *Router) targetGets(c *gin.Context) {
order := ginx.QueryStr(c, "order", "ident")
desc := ginx.QueryBool(c, "desc", false)
hosts := queryStrListField(c, "hosts", ",", " ", "\n")
var err error
if len(bgids) == 0 {
user := c.MustGet("user").(*models.User)
@@ -65,11 +68,13 @@ func (rt *Router) targetGets(c *gin.Context) {
bgids = append(bgids, 0)
}
}
options := []models.BuildTargetWhereOption{
models.BuildTargetWhereWithBgids(bgids),
models.BuildTargetWhereWithDsIds(dsIds),
models.BuildTargetWhereWithQuery(query),
models.BuildTargetWhereWithDowntime(downtime),
models.BuildTargetWhereWithHosts(hosts),
}
total, err := models.TargetTotal(rt.Ctx, options...)
ginx.Dangerous(err)
@@ -78,6 +83,13 @@ func (rt *Router) targetGets(c *gin.Context) {
ginx.Offset(c, limit), order, desc, options...)
ginx.Dangerous(err)
tgs, err := models.TargetBusiGroupsGetAll(rt.Ctx)
ginx.Dangerous(err)
for _, t := range list {
t.GroupIds = tgs[t.Ident]
}
if err == nil {
now := time.Now()
cache := make(map[int64]*models.BusiGroup)
@@ -157,7 +169,8 @@ func (rt *Router) targetGetsByService(c *gin.Context) {
func (rt *Router) targetGetTags(c *gin.Context) {
idents := ginx.QueryStr(c, "idents", "")
idents = strings.ReplaceAll(idents, ",", " ")
lst, err := models.TargetGetTags(rt.Ctx, strings.Fields(idents))
ignoreHostTag := ginx.QueryBool(c, "ignore_host_tag", false)
lst, err := models.TargetGetTags(rt.Ctx, strings.Fields(idents), ignoreHostTag, "")
ginx.NewRender(c).Data(lst, err)
}
@@ -262,7 +275,7 @@ func (rt *Router) validateTags(tags []string) error {
func (rt *Router) addTagsToTarget(target *models.Target, tags []string) error {
for _, tag := range tags {
tagKey := strings.Split(tag, "=")[0]
if strings.Contains(target.Tags, tagKey+"=") {
if _, exist := target.TagsMap[tagKey]; exist {
return fmt.Errorf("duplicate tagkey(%s)", tagKey)
}
}
@@ -379,8 +392,31 @@ type targetBgidForm struct {
Bgid int64 `json:"bgid"`
}
func (rt *Router) targetUpdateBgid(c *gin.Context) {
var f targetBgidForm
type targetBgidsForm struct {
Idents []string `json:"idents" binding:"required_without=HostIps"`
HostIps []string `json:"host_ips" binding:"required_without=Idents"`
Bgids []int64 `json:"bgids"`
Tags []string `json:"tags"`
Action string `json:"action"` // add del reset
}
func haveNeverGroupedIdent(ctx *ctx.Context, idents []string) (bool, error) {
for _, ident := range idents {
bgids, err := models.TargetGroupIdsGetByIdent(ctx, ident)
if err != nil {
return false, err
}
if len(bgids) <= 0 {
return true, nil
}
}
return false, nil
}
func (rt *Router) targetBindBgids(c *gin.Context) {
var f targetBgidsForm
var err error
var failedResults = make(map[string]string)
ginx.BindJSON(c, &f)
@@ -396,35 +432,24 @@ func (rt *Router) targetUpdateBgid(c *gin.Context) {
}
user := c.MustGet("user").(*models.User)
if user.IsAdmin() {
ginx.NewRender(c).Data(failedResults, models.TargetUpdateBgid(rt.Ctx, f.Idents, f.Bgid, false))
return
}
if f.Bgid > 0 {
// 把要操作的机器分成两部分一部分是bgid为0需要管理员分配另一部分bgid>0说明是业务组内部想调整
// 比如原来分配给didiyun的机器didiyun的管理员想把部分机器调整到didiyun-ceph下
// 对于调整的这种情况当前登录用户要对这批机器有操作权限同时还要对目标BG有操作权限
orphans, err := models.IdentsFilter(rt.Ctx, f.Idents, "group_id = ?", 0)
if !user.IsAdmin() {
// 普通用户,检查用户是否有权限操作所有请求的业务组
existing, _, err := models.SeparateTargetIdents(rt.Ctx, f.Idents)
ginx.Dangerous(err)
rt.checkTargetPerm(c, existing)
// 机器里边存在未归组的登录用户就需要是admin
if len(orphans) > 0 && !user.IsAdmin() {
can, err := user.CheckPerm(rt.Ctx, "/targets/bind")
var groupIds []int64
if f.Action == "reset" {
// 如果是复写,则需要检查用户是否有权限操作机器之前的业务组
bgids, err := models.TargetGroupIdsGetByIdents(rt.Ctx, f.Idents)
ginx.Dangerous(err)
if !can {
ginx.Bomb(http.StatusForbidden, "No permission. Only admin can assign BG")
}
groupIds = append(groupIds, bgids...)
}
groupIds = append(groupIds, f.Bgids...)
reBelongs, err := models.IdentsFilter(rt.Ctx, f.Idents, "group_id > ?", 0)
ginx.Dangerous(err)
if len(reBelongs) > 0 {
// 对于这些要重新分配的机器操作者要对这些机器本身有权限同时要对目标bgid有权限
rt.checkTargetPerm(c, f.Idents)
bg := BusiGroup(rt.Ctx, f.Bgid)
for _, bgid := range groupIds {
bg := BusiGroup(rt.Ctx, bgid)
can, err := user.CanDoBusiGroup(rt.Ctx, bg, "rw")
ginx.Dangerous(err)
@@ -432,14 +457,28 @@ func (rt *Router) targetUpdateBgid(c *gin.Context) {
ginx.Bomb(http.StatusForbidden, "No permission. You are not admin of BG(%s)", bg.Name)
}
}
} else if f.Bgid == 0 {
// 退还机器
rt.checkTargetPerm(c, f.Idents)
} else {
ginx.Bomb(http.StatusBadRequest, "invalid bgid")
isNeverGrouped, checkErr := haveNeverGroupedIdent(rt.Ctx, f.Idents)
ginx.Dangerous(checkErr)
if isNeverGrouped {
can, err := user.CheckPerm(rt.Ctx, "/targets/bind")
ginx.Dangerous(err)
if !can {
ginx.Bomb(http.StatusForbidden, "No permission. Only admin can assign BG")
}
}
}
ginx.NewRender(c).Data(failedResults, models.TargetUpdateBgid(rt.Ctx, f.Idents, f.Bgid, false))
switch f.Action {
case "add":
ginx.NewRender(c).Data(failedResults, models.TargetBindBgids(rt.Ctx, f.Idents, f.Bgids, f.Tags))
case "del":
ginx.NewRender(c).Data(failedResults, models.TargetUnbindBgids(rt.Ctx, f.Idents, f.Bgids))
case "reset":
ginx.NewRender(c).Data(failedResults, models.TargetOverrideBgids(rt.Ctx, f.Idents, f.Bgids, f.Tags))
default:
ginx.Bomb(http.StatusBadRequest, "invalid action")
}
}
func (rt *Router) targetUpdateBgidByService(c *gin.Context) {
@@ -458,7 +497,7 @@ func (rt *Router) targetUpdateBgidByService(c *gin.Context) {
ginx.Bomb(http.StatusBadRequest, err.Error())
}
ginx.NewRender(c).Data(failedResults, models.TargetUpdateBgid(rt.Ctx, f.Idents, f.Bgid, false))
ginx.NewRender(c).Data(failedResults, models.TargetOverrideBgids(rt.Ctx, f.Idents, []int64{f.Bgid}, nil))
}
type identsForm struct {
@@ -482,7 +521,7 @@ func (rt *Router) targetDel(c *gin.Context) {
ginx.Bomb(http.StatusBadRequest, err.Error())
}
ginx.NewRender(c).Data(failedResults, models.TargetDel(rt.Ctx, f.Idents))
ginx.NewRender(c).Data(failedResults, models.TargetDel(rt.Ctx, f.Idents, rt.TargetDeleteHook))
}
func (rt *Router) targetDelByService(c *gin.Context) {
@@ -501,7 +540,7 @@ func (rt *Router) targetDelByService(c *gin.Context) {
ginx.Bomb(http.StatusBadRequest, err.Error())
}
ginx.NewRender(c).Data(failedResults, models.TargetDel(rt.Ctx, f.Idents))
ginx.NewRender(c).Data(failedResults, models.TargetDel(rt.Ctx, f.Idents, rt.TargetDeleteHook))
}
func (rt *Router) checkTargetPerm(c *gin.Context, idents []string) {
@@ -531,3 +570,18 @@ func (rt *Router) targetsOfAlertRule(c *gin.Context) {
ginx.NewRender(c).Data(ret, err)
}
func (rt *Router) targetsOfHostQuery(c *gin.Context) {
var queries []models.HostQuery
ginx.BindJSON(c, &queries)
hostsQuery := models.GetHostsQuery(queries)
session := models.TargetFilterQueryBuild(rt.Ctx, hostsQuery, 0, 0)
var lst []*models.Target
err := session.Find(&lst).Error
if err != nil {
ginx.Bomb(http.StatusInternalServerError, err.Error())
}
ginx.NewRender(c).Data(lst, nil)
}

View File

@@ -196,6 +196,13 @@ func (rt *Router) taskTplDel(c *gin.Context) {
return
}
ids, err := models.GetAlertRuleIdsByTaskId(rt.Ctx, tid)
ginx.Dangerous(err)
if len(ids) > 0 {
ginx.NewRender(c).Message("can't del this task tpl, used by alert rule ids(%v) ", ids)
return
}
ginx.NewRender(c).Message(tpl.Del(rt.Ctx))
}

View File

@@ -1,14 +1,14 @@
package router
import (
"net/http"
"fmt"
"github.com/ccfos/nightingale/v6/center/cconf"
"github.com/ccfos/nightingale/v6/datasource/tdengine"
"github.com/ccfos/nightingale/v6/dscache"
"github.com/ccfos/nightingale/v6/models"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
"net/http"
)
type databasesQueryForm struct {
@@ -20,13 +20,13 @@ func (rt *Router) tdengineDatabases(c *gin.Context) {
var f databasesQueryForm
ginx.BindJSON(c, &f)
tdClient := rt.TdendgineClients.GetCli(f.DatasourceId)
if tdClient == nil {
datasource, hit := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if _, ok := datasource.(*tdengine.TDengine); !hit || !ok {
ginx.NewRender(c, http.StatusNotFound).Message("No such datasource")
return
}
databases, err := tdClient.GetDatabases()
databases, err := datasource.(*tdengine.TDengine).ShowDatabases(rt.Ctx.Ctx)
ginx.NewRender(c).Data(databases, err)
}
@@ -37,18 +37,29 @@ type tablesQueryForm struct {
IsStable bool `json:"is_stable"`
}
type Column struct {
Name string `json:"name"`
Type string `json:"type"`
Size int `json:"size"`
}
// get tdengine tables
func (rt *Router) tdengineTables(c *gin.Context) {
var f tablesQueryForm
ginx.BindJSON(c, &f)
tdClient := rt.TdendgineClients.GetCli(f.DatasourceId)
if tdClient == nil {
datasource, hit := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if _, ok := datasource.(*tdengine.TDengine); !hit || !ok {
ginx.NewRender(c, http.StatusNotFound).Message("No such datasource")
return
}
tables, err := tdClient.GetTables(f.Database, f.IsStable)
database := fmt.Sprintf("%s.tables", f.Database)
if f.IsStable {
database = fmt.Sprintf("%s.stables", f.Database)
}
tables, err := datasource.(*tdengine.TDengine).ShowTables(rt.Ctx.Ctx, database)
ginx.NewRender(c).Data(tables, err)
}
@@ -59,50 +70,31 @@ type columnsQueryForm struct {
Table string `json:"table"`
}
// get tdengine columns
func (rt *Router) tdengineColumns(c *gin.Context) {
var f columnsQueryForm
ginx.BindJSON(c, &f)
tdClient := rt.TdendgineClients.GetCli(f.DatasourceId)
if tdClient == nil {
datasource, hit := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if _, ok := datasource.(*tdengine.TDengine); !hit || !ok {
ginx.NewRender(c, http.StatusNotFound).Message("No such datasource")
return
}
columns, err := tdClient.GetColumns(f.Database, f.Table)
ginx.NewRender(c).Data(columns, err)
}
func (rt *Router) QueryData(c *gin.Context) {
var f models.QueryParam
ginx.BindJSON(c, &f)
var resp []models.DataResp
var err error
tdClient := rt.TdendgineClients.GetCli(f.DatasourceId)
for _, q := range f.Querys {
datas, err := tdClient.Query(q)
ginx.Dangerous(err)
resp = append(resp, datas...)
query := map[string]string{
"database": f.Database,
"table": f.Table,
}
ginx.NewRender(c).Data(resp, err)
}
func (rt *Router) QueryLog(c *gin.Context) {
var f models.QueryParam
ginx.BindJSON(c, &f)
tdClient := rt.TdendgineClients.GetCli(f.DatasourceId)
if len(f.Querys) == 0 {
ginx.Bomb(200, "querys is empty")
return
columns, err := datasource.(*tdengine.TDengine).DescribeTable(rt.Ctx.Ctx, query)
// 对齐前端,后续可以将 tdEngine 的查数据的接口都统一
tdColumns := make([]Column, len(columns))
for i, column := range columns {
tdColumns[i] = Column{
Name: column.Field,
Type: column.Type,
}
}
data, err := tdClient.QueryLog(f.Querys[0])
logger.Debugf("tdengine query:%s result: %+v", f.Querys[0], data)
ginx.NewRender(c).Data(data, err)
ginx.NewRender(c).Data(tdColumns, err)
}
// query sql template

View File

@@ -48,7 +48,7 @@ func (rt *Router) userGets(c *gin.Context) {
order := ginx.QueryStr(c, "order", "username")
desc := ginx.QueryBool(c, "desc", false)
rt.UserCache.UpdateUsersLastActiveTime()
go rt.UserCache.UpdateUsersLastActiveTime()
total, err := models.UserTotal(rt.Ctx, query, stime, etime)
ginx.Dangerous(err)

View File

@@ -1,6 +1,7 @@
package sso
import (
"fmt"
"log"
"time"
@@ -145,7 +146,7 @@ func Init(center cconf.Center, ctx *ctx.Context, configCache *memsto.ConfigCache
}
}
if configCache == nil {
logger.Error("configCache is nil, sso initialization failed")
log.Fatalln(fmt.Errorf("configCache is nil, sso initialization failed"))
}
ssoClient.configCache = configCache
userVariableMap := configCache.Get()

View File

@@ -33,7 +33,7 @@ type ClusterOptions struct {
MaxIdleConnsPerHost int
}
func Parse(fpath string, configPtr interface{}) error {
func Parse(fpath string, configPtr *Config) error {
var (
tBuf []byte
)

View File

@@ -5,8 +5,11 @@ import (
"errors"
"fmt"
"github.com/ccfos/nightingale/v6/dscache"
"github.com/ccfos/nightingale/v6/alert"
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/alert/dispatch"
"github.com/ccfos/nightingale/v6/alert/process"
alertrt "github.com/ccfos/nightingale/v6/alert/router"
"github.com/ccfos/nightingale/v6/center/metas"
@@ -16,13 +19,12 @@ import (
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/httpx"
"github.com/ccfos/nightingale/v6/pkg/logx"
"github.com/ccfos/nightingale/v6/pkg/macros"
"github.com/ccfos/nightingale/v6/prom"
"github.com/ccfos/nightingale/v6/pushgw/idents"
pushgwrt "github.com/ccfos/nightingale/v6/pushgw/router"
"github.com/ccfos/nightingale/v6/pushgw/writer"
"github.com/ccfos/nightingale/v6/storage"
"github.com/ccfos/nightingale/v6/tdengine"
"github.com/flashcatcloud/ibex/src/cmd/ibex"
)
@@ -52,12 +54,16 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
targetCache := memsto.NewTargetCache(ctx, syncStats, redis)
busiGroupCache := memsto.NewBusiGroupCache(ctx, syncStats)
configCvalCache := memsto.NewCvalCache(ctx, syncStats)
idents := idents.New(ctx, redis)
metas := metas.New(redis)
writers := writer.NewWriters(config.Pushgw)
pushgwRouter := pushgwrt.New(config.HTTP, config.Pushgw, config.Alert, targetCache, busiGroupCache, idents, metas, writers, ctx)
r := httpx.GinEngine(config.Global.RunMode, config.HTTP)
r := httpx.GinEngine(config.Global.RunMode, config.HTTP, configCvalCache.PrintBodyPaths, configCvalCache.PrintAccessLog)
pushgwRouter.Config(r)
macros.RegisterMacro(macros.MacroInVain)
dscache.Init(ctx, false)
if !config.Alert.Disable {
configCache := memsto.NewConfigCache(ctx, syncStats, nil, "")
@@ -71,11 +77,13 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
taskTplsCache := memsto.NewTaskTplCache(ctx)
promClients := prom.NewPromClient(ctx)
tdengineClients := tdengine.NewTdengineClient(ctx, config.Alert.Heartbeat)
dispatch.InitRegisterQueryFunc(promClients)
externalProcessors := process.NewExternalProcessors()
alert.Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache,
alertRuleCache, notifyConfigCache, taskTplsCache, dsCache, ctx, promClients, tdengineClients, userCache, userGroupCache)
alertRuleCache, notifyConfigCache, taskTplsCache, dsCache, ctx, promClients, userCache, userGroupCache)
alertrtRouter := alertrt.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors)

View File

@@ -0,0 +1,41 @@
package cron
import (
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/robfig/cron/v3"
"github.com/toolkits/pkg/logger"
)
func cleanNotifyRecord(ctx *ctx.Context, day int) {
lastWeek := time.Now().Unix() - 86400*int64(day)
err := models.DB(ctx).Model(&models.NotificaitonRecord{}).Where("created_at < ?", lastWeek).Delete(&models.NotificaitonRecord{}).Error
if err != nil {
logger.Errorf("Failed to clean notify record: %v", err)
}
}
// 每天凌晨1点执行清理任务
func CleanNotifyRecord(ctx *ctx.Context, day int) {
c := cron.New()
if day < 1 {
day = 7
}
// 使用cron表达式设置每天凌晨1点执行
_, err := c.AddFunc("0 1 * * *", func() {
cleanNotifyRecord(ctx, day)
})
if err != nil {
logger.Errorf("Failed to add clean notify record cron job: %v", err)
return
}
// 启动cron任务
c.Start()
}

241
datasource/ck/clickhouse.go Normal file
View File

@@ -0,0 +1,241 @@
package ck
import (
"context"
"fmt"
"strings"
"github.com/ccfos/nightingale/v6/datasource"
ck "github.com/ccfos/nightingale/v6/dskit/clickhouse"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/macros"
"github.com/mitchellh/mapstructure"
"github.com/toolkits/pkg/logger"
)
const (
CKType = "ck"
TimeFieldFormatEpochMilli = "epoch_millis"
TimeFieldFormatEpochSecond = "epoch_second"
DefaultLimit = 500
)
var (
ckPrivBanned = []string{
"INSERT",
"CREATE",
"DROP",
"DELETE",
"UPDATE",
"ALL",
}
ckBannedOp = map[string]struct{}{
"CREATE": {},
"INSERT": {},
"ALTER": {},
"REVOKE": {},
"DROP": {},
"RENAME": {},
"ATTACH": {},
"DETACH": {},
"OPTIMIZE": {},
"TRUNCATE": {},
"SET": {},
}
)
func init() {
datasource.RegisterDatasource(CKType, new(Clickhouse))
}
type CKShard struct {
Addr string `json:"ck.addr" mapstructure:"ck.addr"`
User string `json:"ck.user" mapstructure:"ck.user"`
Password string `json:"ck.password" mapstructure:"ck.password"`
Database string `json:"ck.db" mapstructure:"ck.db"`
IsEncrypted bool `json:"ck.is_encrypt" mapstructure:"ck.is_encrypt"`
}
type QueryParam struct {
Limit int `json:"limit" mapstructure:"limit"`
Sql string `json:"sql" mapstructure:"sql"`
Ref string `json:"ref" mapstructure:"ref"`
From int64 `json:"from" mapstructure:"from"`
To int64 `json:"to" mapstructure:"to"`
TimeField string `json:"time_field" mapstructure:"time_field"`
TimeFormat string `json:"time_format" mapstructure:"time_format"`
Keys datasource.Keys `json:"keys" mapstructure:"keys"`
Database string `json:"database" mapstructure:"database"`
Table string `json:"table" mapstructure:"table"`
}
type Clickhouse struct {
ck.Clickhouse `json:",inline" mapstructure:",squash"`
}
func (c *Clickhouse) Init(settings map[string]interface{}) (datasource.Datasource, error) {
newest := new(Clickhouse)
err := mapstructure.Decode(settings, newest)
return newest, err
}
func (c *Clickhouse) InitClient() error {
return c.InitCli()
}
func (c *Clickhouse) Validate(ctx context.Context) error {
if len(c.Nodes) == 0 {
return fmt.Errorf("ck shard is invalid, please check datasource setting")
}
addr := c.Nodes[0]
if len(strings.Trim(c.User, " ")) == 0 {
return fmt.Errorf("ck shard user is invalid, please check datasource setting")
}
if len(strings.Trim(addr, " ")) == 0 {
return fmt.Errorf("ck shard addr is invalid, please check datasource setting")
}
// if len(strings.Trim(shard.Password, " ")) == 0 {
// return fmt.Errorf("ck shard password is empty, please check datasource setting or set password for user")
// }
return nil
}
// Equal compares whether two objects are the same, used for caching
func (c *Clickhouse) Equal(p datasource.Datasource) bool {
plg, ok := p.(*Clickhouse)
if !ok {
logger.Errorf("unexpected plugin type, expected is ck")
return false
}
// only compare first shard
if len(c.Nodes) == 0 {
logger.Errorf("ck shard is empty")
return false
}
addr := c.Nodes[0]
if len(plg.Nodes) == 0 {
logger.Errorf("new ck plugin obj shard is empty")
return false
}
newAddr := plg.Nodes[0]
if c.User != plg.User {
return false
}
if addr != newAddr {
return false
}
if c.Password != plg.Password {
return false
}
return true
}
func (c *Clickhouse) MakeLogQuery(ctx context.Context, query interface{}, eventTags []string, start, end int64) (interface{}, error) {
return nil, nil
}
func (c *Clickhouse) MakeTSQuery(ctx context.Context, query interface{}, eventTags []string, start, end int64) (interface{}, error) {
return nil, nil
}
func (c *Clickhouse) QueryMapData(ctx context.Context, query interface{}) ([]map[string]string, error) {
return nil, nil
}
func (c *Clickhouse) QueryData(ctx context.Context, query interface{}) ([]models.DataResp, error) {
ckQueryParam := new(ck.QueryParam)
if err := mapstructure.Decode(query, ckQueryParam); err != nil {
return nil, err
}
if strings.Contains(ckQueryParam.Sql, "$__") {
var err error
ckQueryParam.Sql, err = macros.Macro(ckQueryParam.Sql, ckQueryParam.From, ckQueryParam.To)
if err != nil {
return nil, err
}
}
if ckQueryParam.Keys.ValueKey == "" {
return nil, fmt.Errorf("valueKey is required")
}
rows, err := c.QueryTimeseries(ctx, ckQueryParam)
if err != nil {
logger.Warningf("query:%+v get data err:%v", ckQueryParam, err)
return nil, err
}
if err != nil {
logger.Warningf("query:%+v get data err:%v", ckQueryParam, err)
return []models.DataResp{}, err
}
data := make([]models.DataResp, 0)
for i := range rows {
data = append(data, models.DataResp{
Ref: ckQueryParam.Ref,
Metric: rows[i].Metric,
Values: rows[i].Values,
})
}
return data, nil
}
func (c *Clickhouse) QueryLog(ctx context.Context, query interface{}) ([]interface{}, int64, error) {
ckQueryParam := new(QueryParam)
if err := mapstructure.Decode(query, ckQueryParam); err != nil {
return nil, 0, err
}
if strings.Contains(ckQueryParam.Sql, "$__") {
var err error
ckQueryParam.Sql, err = macros.Macro(ckQueryParam.Sql, ckQueryParam.From, ckQueryParam.To)
if err != nil {
return nil, 0, err
}
}
rows, err := c.Query(ctx, ckQueryParam)
if err != nil {
logger.Warningf("query:%+v get data err:%v", ckQueryParam, err)
return nil, 0, err
}
limit := getLimit(len(rows), ckQueryParam.Limit)
logs := make([]interface{}, 0)
for i := 0; i < limit; i++ {
logs = append(logs, rows[i])
}
return logs, int64(limit), nil
}
func getLimit(rowLen, pLimit int) int {
limit := DefaultLimit
if pLimit > 0 {
limit = pLimit
}
if rowLen > limit {
return limit
}
return rowLen
}

View File

@@ -0,0 +1,603 @@
package eslike
import (
"context"
"encoding/json"
"fmt"
"strconv"
"strings"
"time"
"github.com/araddon/dateparse"
"github.com/bitly/go-simplejson"
"github.com/ccfos/nightingale/v6/models"
"github.com/mitchellh/mapstructure"
"github.com/olivere/elastic/v7"
"github.com/prometheus/common/model"
"github.com/toolkits/pkg/logger"
)
type Query struct {
Ref string `json:"ref" mapstructure:"ref"`
Index string `json:"index" mapstructure:"index"`
Filter string `json:"filter" mapstructure:"filter"`
MetricAggr MetricAggr `json:"value" mapstructure:"value"`
GroupBy []GroupBy `json:"group_by" mapstructure:"group_by"`
DateField string `json:"date_field" mapstructure:"date_field"`
Interval int64 `json:"interval" mapstructure:"interval"`
Start int64 `json:"start" mapstructure:"start"`
End int64 `json:"end" mapstructure:"end"`
P int `json:"page" mapstructure:"page"` // 页码
Limit int `json:"limit" mapstructure:"limit"` // 每页个数
Ascending bool `json:"ascending" mapstructure:"ascending"` // 按照DataField排序
Timeout int `json:"timeout" mapstructure:"timeout"`
MaxShard int `json:"max_shard" mapstructure:"max_shard"`
}
type MetricAggr struct {
Field string `json:"field" mapstructure:"field"`
Func string `json:"func" mapstructure:"func"`
Ref string `json:"ref" mapstructure:"ref"` // 变量名A B C
}
type GroupBy struct {
Cate GroupByCate `json:"cate" mapstructure:"cate"` // 分组类型
Field string `json:"field" mapstructure:"field"`
MinDocCount int64 `json:"min_doc_count" mapstructure:"min_doc_count"`
Order string `json:"order" mapstructure:"order"`
OrderBy string `json:"order_by" mapstructure:"order_by"`
Size int `json:"size" mapstructure:"size"`
Params []Param `json:"params" mapstructure:"params"` // 类型是 filter 时使用
Interval int64 `json:"interval" mapstructure:"interval"` // 分组间隔
}
type SearchFunc func(ctx context.Context, indices []string, source interface{}, timeout int, maxShard int) (*elastic.SearchResult, error)
type QueryFieldsFunc func(indices []string) ([]string, error)
// 分组类型
type GroupByCate string
const (
Filters GroupByCate = "filters"
Histgram GroupByCate = "histgram"
Terms GroupByCate = "terms"
)
// 参数
type Param struct {
Alias string `json:"alias,omitempty"` // 别名a=b的形式filter 特有
Query string `json:"query,omitempty"` // 查询条件filter 特有
}
type MetricPtr struct {
Data map[string][][]float64
}
func IterGetMap(m, ret map[string]interface{}, prefixKey string) {
for k, v := range m {
switch v.(type) {
case map[string]interface{}:
var key string
if prefixKey != "" {
key = fmt.Sprintf("%s.%s", prefixKey, k)
} else {
key = k
}
IterGetMap(v.(map[string]interface{}), ret, key)
default:
ret[prefixKey+"."+k] = []interface{}{v}
}
}
}
func TransferData(metric, ref string, m map[string][][]float64) []models.DataResp {
var datas []models.DataResp
for k, v := range m {
data := models.DataResp{
Ref: ref,
Metric: make(model.Metric),
Labels: k,
Values: v,
}
data.Metric["__name__"] = model.LabelValue(metric)
labels := strings.Split(k, "--")
for _, label := range labels {
arr := strings.SplitN(label, "=", 2)
if len(arr) == 2 {
data.Metric[model.LabelName(arr[0])] = model.LabelValue(arr[1])
}
}
datas = append(datas, data)
}
for i := 0; i < len(datas); i++ {
for k, v := range datas[i].Metric {
if k == "__name__" {
datas[i].Metric[k] = model.LabelValue(ref) + "_" + v
}
}
}
return datas
}
func GetQueryString(filter string, q *elastic.RangeQuery) *elastic.BoolQuery {
var queryString *elastic.BoolQuery
if filter != "" {
if strings.Contains(filter, ":") || strings.Contains(filter, "AND") || strings.Contains(filter, "OR") || strings.Contains(filter, "NOT") {
queryString = elastic.NewBoolQuery().Must(elastic.NewQueryStringQuery(filter)).Filter(q)
} else {
queryString = elastic.NewBoolQuery().Filter(elastic.NewMultiMatchQuery(filter).Lenient(true).Type("phrase")).Filter(q)
}
} else {
queryString = elastic.NewBoolQuery().Should(q)
}
return queryString
}
func getUnixTs(timeStr string) int64 {
ts, err := strconv.ParseInt(timeStr, 10, 64)
if err == nil {
return ts
}
parsedTime, err := dateparse.ParseAny(timeStr)
if err != nil {
logger.Error("failed to ParseAny: ", err)
return 0
}
return parsedTime.UnixMilli()
}
func GetBuckts(labelKey string, keys []string, arr []interface{}, metrics *MetricPtr, labels string, ts int64, f string) {
var err error
bucketsKey := ""
if len(keys) > 0 {
bucketsKey = keys[0]
}
newlabels := ""
for i := 0; i < len(arr); i++ {
tmp := arr[i].(map[string]interface{})
keyAsString, getTs := tmp["key_as_string"]
if getTs {
ts = getUnixTs(keyAsString.(string))
}
keyValue := tmp["key"]
switch keyValue.(type) {
case json.Number, string:
if !getTs {
if labels != "" {
newlabels = fmt.Sprintf("%s--%s=%v", labels, labelKey, keyValue)
} else {
newlabels = fmt.Sprintf("%s=%v", labelKey, keyValue)
}
}
default:
continue
}
var finalValue float64
if len(keys) == 0 { // 计算 doc_count 的情况
count := tmp["doc_count"]
finalValue, err = count.(json.Number).Float64()
if err != nil {
logger.Warningf("labelKey:%s get value error:%v", labelKey, err)
}
newValues := []float64{float64(ts / 1000), finalValue}
metrics.Data[newlabels] = append(metrics.Data[newlabels], newValues)
continue
}
innerBuckets, exists := tmp[bucketsKey]
if !exists {
continue
}
nextBucketsArr, exists := innerBuckets.(map[string]interface{})["buckets"]
if exists {
if len(keys[1:]) >= 1 {
GetBuckts(bucketsKey, keys[1:], nextBucketsArr.([]interface{}), metrics, newlabels, ts, f)
} else {
GetBuckts(bucketsKey, []string{}, nextBucketsArr.([]interface{}), metrics, newlabels, ts, f)
}
} else {
// doc_count
if f == "count" || f == "nodata" {
count := tmp["doc_count"]
finalValue, err = count.(json.Number).Float64()
if err != nil {
logger.Warningf("get %v value error:%v", count, err)
}
} else {
values, exists := innerBuckets.(map[string]interface{})["value"]
if exists {
switch values.(type) {
case json.Number:
value, err := values.(json.Number).Float64()
if err != nil {
logger.Warningf("labelKey:%s get value error:%v", labelKey, err)
}
finalValue = value
}
} else {
switch values.(type) {
case map[string]interface{}:
var err error
values := innerBuckets.(map[string]interface{})["values"]
for _, v := range values.(map[string]interface{}) {
finalValue, err = v.(json.Number).Float64()
if err != nil {
logger.Warningf("labelKey:%s get value error:%v", labelKey, err)
}
}
default:
values := innerBuckets.(map[string]interface{})["values"]
for _, v := range values.(map[string]interface{}) {
// Todo 修复 v is nil 导致 panic 情况
finalValue, err = v.(json.Number).Float64()
if err != nil {
logger.Warningf("labelKey:%s get value error:%v", labelKey, err)
}
}
}
}
}
if _, exists := metrics.Data[newlabels]; !exists {
metrics.Data[newlabels] = [][]float64{}
}
newValues := []float64{float64(ts / 1000), finalValue}
metrics.Data[newlabels] = append(metrics.Data[newlabels], newValues)
}
}
}
func MakeLogQuery(ctx context.Context, query interface{}, eventTags []string, start, end int64) (interface{}, error) {
param := new(Query)
if err := mapstructure.Decode(query, param); err != nil {
return nil, err
}
for i := 0; i < len(eventTags); i++ {
eventTags[i] = strings.Replace(eventTags[i], "=", ":", 1)
}
if len(eventTags) > 0 {
if param.Filter == "" {
param.Filter = strings.Join(eventTags, " AND ")
} else {
param.Filter = param.Filter + " AND " + strings.Join(eventTags, " AND ")
}
}
param.Start = start
param.End = end
return param, nil
}
func MakeTSQuery(ctx context.Context, query interface{}, eventTags []string, start, end int64) (interface{}, error) {
param := new(Query)
if err := mapstructure.Decode(query, param); err != nil {
return nil, err
}
for i := 0; i < len(eventTags); i++ {
eventTags[i] = strings.Replace(eventTags[i], "=", ":", 1)
}
if len(eventTags) > 0 {
if param.Filter == "" {
param.Filter = strings.Join(eventTags, " AND ")
} else {
param.Filter = param.Filter + " AND " + strings.Join(eventTags, " AND ")
}
}
param.Start = start
param.End = end
return param, nil
}
func QueryData(ctx context.Context, queryParam interface{}, cliTimeout int64, version string, search SearchFunc) ([]models.DataResp, error) {
param := new(Query)
if err := mapstructure.Decode(queryParam, param); err != nil {
return nil, err
}
if param.Timeout == 0 {
param.Timeout = int(cliTimeout) / 1000
}
if param.Interval == 0 {
param.Interval = 60
}
if param.MaxShard < 1 {
param.MaxShard = 5
}
if param.DateField == "" {
param.DateField = "@timestamp"
}
indexArr := strings.Split(param.Index, ",")
q := elastic.NewRangeQuery(param.DateField)
now := time.Now().Unix()
var start, end int64
if param.End != 0 && param.Start != 0 {
end = param.End - param.End%param.Interval
start = param.Start - param.Start%param.Interval
} else {
end = now
start = end - param.Interval
}
delay, ok := ctx.Value("delay").(int64)
if ok && delay != 0 {
end = end - delay
start = start - delay
}
q.Gte(time.Unix(start, 0).UnixMilli())
q.Lte(time.Unix(end, 0).UnixMilli())
q.Format("epoch_millis")
field := param.MetricAggr.Field
groupBys := param.GroupBy
queryString := GetQueryString(param.Filter, q)
var aggr elastic.Aggregation
switch param.MetricAggr.Func {
case "avg":
aggr = elastic.NewAvgAggregation().Field(field)
case "max":
aggr = elastic.NewMaxAggregation().Field(field)
case "min":
aggr = elastic.NewMinAggregation().Field(field)
case "sum":
aggr = elastic.NewSumAggregation().Field(field)
case "count":
aggr = elastic.NewValueCountAggregation().Field(field)
case "p90":
aggr = elastic.NewPercentilesAggregation().Percentiles(90).Field(field)
case "p95":
aggr = elastic.NewPercentilesAggregation().Percentiles(95).Field(field)
case "p99":
aggr = elastic.NewPercentilesAggregation().Percentiles(99).Field(field)
case "median":
aggr = elastic.NewPercentilesAggregation().Percentiles(50).Field(field)
default:
return nil, fmt.Errorf("func %s not support", param.MetricAggr.Func)
}
tsAggr := elastic.NewDateHistogramAggregation().
Field(param.DateField).
MinDocCount(1)
if strings.HasPrefix(version, "7") {
tsAggr.FixedInterval(fmt.Sprintf("%ds", param.Interval))
} else {
// 兼容 7.0 以下的版本
// OpenSearch 也使用这个字段
tsAggr.Interval(fmt.Sprintf("%ds", param.Interval))
}
// group by
var groupByAggregation elastic.Aggregation
if len(groupBys) > 0 {
groupBy := groupBys[0]
if groupBy.MinDocCount == 0 {
groupBy.MinDocCount = 1
}
if groupBy.Size == 0 {
groupBy.Size = 300
}
switch groupBy.Cate {
case Terms:
if param.MetricAggr.Func != "count" {
groupByAggregation = elastic.NewTermsAggregation().Field(groupBy.Field).SubAggregation(field, aggr).OrderByKeyDesc().Size(groupBy.Size).MinDocCount(int(groupBy.MinDocCount))
} else {
groupByAggregation = elastic.NewTermsAggregation().Field(groupBy.Field).OrderByKeyDesc().Size(groupBy.Size).MinDocCount(int(groupBy.MinDocCount))
}
case Histgram:
if param.MetricAggr.Func != "count" {
groupByAggregation = elastic.NewHistogramAggregation().Field(groupBy.Field).Interval(float64(groupBy.Interval)).SubAggregation(field, aggr)
} else {
groupByAggregation = elastic.NewHistogramAggregation().Field(groupBy.Field).Interval(float64(groupBy.Interval))
}
case Filters:
for _, filterParam := range groupBy.Params {
if param.MetricAggr.Func != "count" {
groupByAggregation = elastic.NewFilterAggregation().Filter(elastic.NewTermQuery(filterParam.Query, "true")).SubAggregation(field, aggr)
} else {
groupByAggregation = elastic.NewFilterAggregation().Filter(elastic.NewTermQuery(filterParam.Query, "true"))
}
}
}
for i := 1; i < len(groupBys); i++ {
groupBy := groupBys[i]
if groupBy.MinDocCount == 0 {
groupBy.MinDocCount = 1
}
if groupBy.Size == 0 {
groupBy.Size = 300
}
switch groupBy.Cate {
case Terms:
groupByAggregation = elastic.NewTermsAggregation().Field(groupBy.Field).SubAggregation(groupBys[i-1].Field, groupByAggregation).OrderByKeyDesc().Size(groupBy.Size).MinDocCount(int(groupBy.MinDocCount))
case Histgram:
groupByAggregation = elastic.NewHistogramAggregation().Field(groupBy.Field).Interval(float64(groupBy.Interval)).SubAggregation(groupBys[i-1].Field, groupByAggregation)
case Filters:
for _, filterParam := range groupBy.Params {
groupByAggregation = elastic.NewFilterAggregation().Filter(elastic.NewTermQuery(filterParam.Query, "true")).SubAggregation(groupBys[i-1].Field, groupByAggregation)
}
}
}
tsAggr.SubAggregation(groupBys[len(groupBys)-1].Field, groupByAggregation)
} else if param.MetricAggr.Func != "count" {
tsAggr.SubAggregation(field, aggr)
}
source, _ := queryString.Source()
b, _ := json.Marshal(source)
logger.Debugf("query_data q:%+v tsAggr:%+v query_string:%s", param, tsAggr, string(b))
searchSource := elastic.NewSearchSource().
Query(queryString).
Aggregation("ts", tsAggr)
searchSourceString, err := searchSource.Source()
if err != nil {
logger.Warningf("query_data searchSource:%s to string error:%v", searchSourceString, err)
}
jsonSearchSource, err := json.Marshal(searchSourceString)
if err != nil {
logger.Warningf("query_data searchSource:%s to json error:%v", searchSourceString, err)
}
result, err := search(ctx, indexArr, searchSource, param.Timeout, param.MaxShard)
if err != nil {
logger.Warningf("query_data searchSource:%s query_data error:%v", searchSourceString, err)
return nil, err
}
logger.Debugf("query_data searchSource:%s resp:%s", string(jsonSearchSource), string(result.Aggregations["ts"]))
js, err := simplejson.NewJson(result.Aggregations["ts"])
if err != nil {
return nil, err
}
bucketsData, err := js.Get("buckets").Array()
if err != nil {
return nil, err
}
var keys []string
for i := len(groupBys) - 1; i >= 0; i-- {
keys = append(keys, groupBys[i].Field)
}
if param.MetricAggr.Func != "count" {
keys = append(keys, field)
}
metrics := &MetricPtr{Data: make(map[string][][]float64)}
GetBuckts("", keys, bucketsData, metrics, "", 0, param.MetricAggr.Func)
return TransferData(fmt.Sprintf("%s_%s", field, param.MetricAggr.Func), param.Ref, metrics.Data), nil
}
func HitFilter(typ string) bool {
switch typ {
case "keyword", "date", "long", "integer", "short", "byte", "double", "float", "half_float", "scaled_float", "unsigned_long":
return false
default:
return true
}
}
func QueryLog(ctx context.Context, queryParam interface{}, timeout int64, version string, maxShard int, search SearchFunc) ([]interface{}, int64, error) {
param := new(Query)
if err := mapstructure.Decode(queryParam, param); err != nil {
return nil, 0, err
}
if param.Timeout == 0 {
param.Timeout = int(timeout)
}
indexArr := strings.Split(param.Index, ",")
now := time.Now().Unix()
var start, end int64
if param.End != 0 && param.Start != 0 {
end = param.End - param.End%param.Interval
start = param.Start - param.Start%param.Interval
} else {
end = now
start = end - param.Interval
}
q := elastic.NewRangeQuery(param.DateField)
q.Gte(time.Unix(start, 0).UnixMilli())
q.Lte(time.Unix(end, 0).UnixMilli())
q.Format("epoch_millis")
queryString := GetQueryString(param.Filter, q)
if param.Limit <= 0 {
param.Limit = 10
}
if param.MaxShard < 1 {
param.MaxShard = maxShard
}
source := elastic.NewSearchSource().
TrackTotalHits(true).
Query(queryString).
From(param.P).
Size(param.Limit).
Sort(param.DateField, param.Ascending)
result, err := search(ctx, indexArr, source, param.Timeout, param.MaxShard)
if err != nil {
logger.Warningf("query data error:%v", err)
return nil, 0, err
}
total := result.TotalHits()
var ret []interface{}
b, _ := json.Marshal(source)
logger.Debugf("query data result query source:%s len:%d total:%d", string(b), len(result.Hits.Hits), total)
resultBytes, _ := json.Marshal(result)
logger.Debugf("query data result query source:%s result:%s", string(b), string(resultBytes))
if strings.HasPrefix(version, "6") {
for i := 0; i < len(result.Hits.Hits); i++ {
var x map[string]interface{}
err := json.Unmarshal(result.Hits.Hits[i].Source, &x)
if err != nil {
logger.Warningf("Unmarshal soruce error:%v", err)
continue
}
if result.Hits.Hits[i].Fields == nil {
result.Hits.Hits[i].Fields = make(map[string]interface{})
}
IterGetMap(x, result.Hits.Hits[i].Fields, "")
ret = append(ret, result.Hits.Hits[i])
}
} else {
for _, hit := range result.Hits.Hits {
ret = append(ret, hit)
}
}
return ret, total, nil
}

114
datasource/datasource.go Normal file
View File

@@ -0,0 +1,114 @@
package datasource
import (
"context"
"fmt"
"strings"
"github.com/ccfos/nightingale/v6/models"
)
type DatasourceType struct {
Id int64 `json:"id"`
Category string `json:"category"`
PluginType string `json:"type"`
PluginTypeName string `json:"type_name"`
}
type Keys struct {
ValueKey string `json:"valueKey" mapstructure:"valueKey"` // 多个用空格分隔
LabelKey string `json:"labelKey" mapstructure:"labelKey"` // 多个用空格分隔
TimeKey string `json:"timeKey" mapstructure:"timeKey"`
TimeFormat string `json:"timeFormat" mapstructure:"timeFormat"`
}
var DatasourceTypes map[int64]DatasourceType
func init() {
DatasourceTypes = make(map[int64]DatasourceType)
DatasourceTypes[1] = DatasourceType{
Id: 1,
Category: "timeseries",
PluginType: "prometheus",
PluginTypeName: "Prometheus Like",
}
DatasourceTypes[2] = DatasourceType{
Id: 2,
Category: "logging",
PluginType: "elasticsearch",
PluginTypeName: "Elasticsearch",
}
DatasourceTypes[3] = DatasourceType{
Id: 3,
Category: "logging",
PluginType: "aliyun-sls",
PluginTypeName: "SLS",
}
DatasourceTypes[4] = DatasourceType{
Id: 4,
Category: "timeseries",
PluginType: "ck",
PluginTypeName: "ClickHouse",
}
}
type NewDatasrouceFn func(settings map[string]interface{}) (Datasource, error)
var datasourceRegister = map[string]NewDatasrouceFn{}
type Datasource interface {
Init(settings map[string]interface{}) (Datasource, error) // 初始化配置
InitClient() error // 初始化客户端
Validate(ctx context.Context) error // 参数验证
Equal(p Datasource) bool // 验证是否相等
MakeLogQuery(ctx context.Context, query interface{}, eventTags []string, start, end int64) (interface{}, error)
MakeTSQuery(ctx context.Context, query interface{}, eventTags []string, start, end int64) (interface{}, error)
QueryData(ctx context.Context, query interface{}) ([]models.DataResp, error)
QueryLog(ctx context.Context, query interface{}) ([]interface{}, int64, error)
// 在生成告警事件时,会调用该方法,用于获取额外的数据
QueryMapData(ctx context.Context, query interface{}) ([]map[string]string, error)
}
func RegisterDatasource(typ string, p Datasource) {
if _, found := datasourceRegister[typ]; found {
return
}
datasourceRegister[typ] = p.Init
}
func GetDatasourceByType(typ string, settings map[string]interface{}) (Datasource, error) {
typ = strings.ReplaceAll(typ, ".logging", "")
fn, found := datasourceRegister[typ]
if !found {
return nil, fmt.Errorf("plugin type %s not found", typ)
}
plug, err := fn(settings)
if err != nil {
return nil, err
}
return plug, nil
}
type DatasourceInfo struct {
Id int64 `json:"id"`
Name string `json:"name"`
Description string `json:"description"`
ClusterName string `json:"cluster_name"`
Category string `json:"category"`
PluginId int64 `json:"plugin_id"`
Type string `json:"plugin_type"`
PluginTypeName string `json:"plugin_type_name"`
Settings map[string]interface{} `json:"settings"`
HTTPJson models.HTTP `json:"http"`
AuthJson models.Auth `json:"auth"`
Status string `json:"status"`
CreatedAt int64 `json:"created_at"`
UpdatedAt int64 `json:"updated_at"`
IsDefault bool `json:"is_default"`
}

405
datasource/es/es.go Normal file
View File

@@ -0,0 +1,405 @@
package es
import (
"context"
"encoding/json"
"fmt"
"net"
"net/http"
"net/url"
"reflect"
"sort"
"strings"
"time"
"github.com/ccfos/nightingale/v6/datasource"
"github.com/ccfos/nightingale/v6/datasource/commons/eslike"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/tlsx"
"github.com/mitchellh/mapstructure"
"github.com/olivere/elastic/v7"
"github.com/toolkits/pkg/logger"
)
const (
ESType = "elasticsearch"
)
type Elasticsearch struct {
Addr string `json:"es.addr" mapstructure:"es.addr"`
Nodes []string `json:"es.nodes" mapstructure:"es.nodes"`
Timeout int64 `json:"es.timeout" mapstructure:"es.timeout"` // millis
Basic BasicAuth `json:"es.basic" mapstructure:"es.basic"`
TLS TLS `json:"es.tls" mapstructure:"es.tls"`
Version string `json:"es.version" mapstructure:"es.version"`
Headers map[string]string `json:"es.headers" mapstructure:"es.headers"`
MinInterval int `json:"es.min_interval" mapstructure:"es.min_interval"` // seconds
MaxShard int `json:"es.max_shard" mapstructure:"es.max_shard"`
ClusterName string `json:"es.cluster_name" mapstructure:"es.cluster_name"`
EnableWrite bool `json:"es.enable_write" mapstructure:"es.enable_write"` // 允许写操作
Client *elastic.Client `json:"es.client" mapstructure:"es.client"`
}
type TLS struct {
SkipTlsVerify bool `json:"es.tls.skip_tls_verify" mapstructure:"es.tls.skip_tls_verify"`
}
type BasicAuth struct {
Enable bool `json:"es.auth.enable" mapstructure:"es.auth.enable"`
Username string `json:"es.user" mapstructure:"es.user"`
Password string `json:"es.password" mapstructure:"es.password"`
}
func init() {
datasource.RegisterDatasource(ESType, new(Elasticsearch))
}
func (e *Elasticsearch) Init(settings map[string]interface{}) (datasource.Datasource, error) {
newest := new(Elasticsearch)
err := mapstructure.Decode(settings, newest)
return newest, err
}
func (e *Elasticsearch) InitClient() error {
transport := &http.Transport{
Proxy: http.ProxyFromEnvironment,
DialContext: (&net.Dialer{
Timeout: time.Duration(e.Timeout) * time.Millisecond,
}).DialContext,
ResponseHeaderTimeout: time.Duration(e.Timeout) * time.Millisecond,
}
if len(e.Nodes) > 0 {
e.Addr = e.Nodes[0]
}
if strings.Contains(e.Addr, "https") {
tlsConfig := tlsx.ClientConfig{
InsecureSkipVerify: e.TLS.SkipTlsVerify,
UseTLS: true,
}
cfg, err := tlsConfig.TLSConfig()
if err != nil {
return err
}
transport.TLSClientConfig = cfg
}
var err error
options := []elastic.ClientOptionFunc{
elastic.SetURL(e.Nodes...),
}
if e.Basic.Username != "" {
options = append(options, elastic.SetBasicAuth(e.Basic.Username, e.Basic.Password))
}
headers := http.Header{}
for k, v := range e.Headers {
headers[k] = []string{v}
}
options = append(options, elastic.SetHeaders(headers))
options = append(options, elastic.SetHttpClient(&http.Client{Transport: transport}))
options = append(options, elastic.SetSniff(false))
options = append(options, elastic.SetHealthcheck(false))
e.Client, err = elastic.NewClient(options...)
return err
}
func (e *Elasticsearch) Equal(other datasource.Datasource) bool {
sort.Strings(e.Nodes)
sort.Strings(other.(*Elasticsearch).Nodes)
if strings.Join(e.Nodes, ",") != strings.Join(other.(*Elasticsearch).Nodes, ",") {
return false
}
if e.Basic.Username != other.(*Elasticsearch).Basic.Username {
return false
}
if e.Basic.Password != other.(*Elasticsearch).Basic.Password {
return false
}
if e.TLS.SkipTlsVerify != other.(*Elasticsearch).TLS.SkipTlsVerify {
return false
}
if e.EnableWrite != other.(*Elasticsearch).EnableWrite {
return false
}
if !reflect.DeepEqual(e.Headers, other.(*Elasticsearch).Headers) {
return false
}
return true
}
func (e *Elasticsearch) Validate(ctx context.Context) (err error) {
if len(e.Nodes) == 0 {
return fmt.Errorf("need a valid addr")
}
for _, addr := range e.Nodes {
_, err = url.Parse(addr)
if err != nil {
return fmt.Errorf("parse addr error: %v", err)
}
}
if e.Basic.Enable && (len(e.Basic.Username) == 0 || len(e.Basic.Password) == 0) {
return fmt.Errorf("need a valid user, password")
}
if e.MaxShard == 0 {
e.MaxShard = 5
}
if e.MinInterval < 10 {
e.MinInterval = 10
}
if e.Timeout == 0 {
e.Timeout = 60000
}
if !strings.HasPrefix(e.Version, "6") && !strings.HasPrefix(e.Version, "7") {
return fmt.Errorf("version must be 6.0+ or 7.0+")
}
return nil
}
func (e *Elasticsearch) MakeLogQuery(ctx context.Context, query interface{}, eventTags []string, start, end int64) (interface{}, error) {
return eslike.MakeLogQuery(ctx, query, eventTags, start, end)
}
func (e *Elasticsearch) MakeTSQuery(ctx context.Context, query interface{}, eventTags []string, start, end int64) (interface{}, error) {
return eslike.MakeTSQuery(ctx, query, eventTags, start, end)
}
func (e *Elasticsearch) QueryData(ctx context.Context, queryParam interface{}) ([]models.DataResp, error) {
search := func(ctx context.Context, indices []string, source interface{}, timeout int, maxShard int) (*elastic.SearchResult, error) {
return e.Client.Search().
Index(indices...).
Source(source).
Timeout(fmt.Sprintf("%ds", timeout)).
MaxConcurrentShardRequests(maxShard).
Do(ctx)
}
return eslike.QueryData(ctx, queryParam, e.Timeout, e.Version, search)
}
func (e *Elasticsearch) QueryIndices() ([]string, error) {
result, err := e.Client.IndexNames()
return result, err
}
func (e *Elasticsearch) QueryFields(indexs []string) ([]string, error) {
var fields []string
result, err := elastic.NewGetFieldMappingService(e.Client).Index(indexs...).Do(context.Background())
if err != nil {
return fields, err
}
fieldMap := make(map[string]struct{})
for _, indexMap := range result {
if m, exists := indexMap.(map[string]interface{})["mappings"]; exists {
for k, v := range m.(map[string]interface{}) {
// 兼容 es6 版本
if k == "doc" && strings.HasPrefix(e.Version, "6") {
// if k == "doc" {
for kk, vv := range v.(map[string]interface{}) {
typ := getFieldType(kk, vv.(map[string]interface{}))
if eslike.HitFilter(typ) {
continue
}
if _, exsits := fieldMap[kk]; !exsits {
fieldMap[kk] = struct{}{}
fields = append(fields, kk)
}
}
} else {
// es7 版本
typ := getFieldType(k, v.(map[string]interface{}))
if eslike.HitFilter(typ) {
continue
}
if _, exsits := fieldMap[k]; !exsits {
fieldMap[k] = struct{}{}
fields = append(fields, k)
}
}
}
}
}
sort.Strings(fields)
return fields, nil
}
func (e *Elasticsearch) QueryLog(ctx context.Context, queryParam interface{}) ([]interface{}, int64, error) {
search := func(ctx context.Context, indices []string, source interface{}, timeout int, maxShard int) (*elastic.SearchResult, error) {
// 应该是之前为了获取 fields 字段,做的这个兼容
// fields, err := e.QueryFields(indices)
// if err != nil {
// logger.Warningf("query data error:%v", err)
// return nil, err
// }
// if source != nil && strings.HasPrefix(e.Version, "7") {
// source = source.(*elastic.SearchSource).DocvalueFields(fields...)
// }
return e.Client.Search().
Index(indices...).
MaxConcurrentShardRequests(maxShard).
Source(source).
Timeout(fmt.Sprintf("%ds", timeout)).
Do(ctx)
}
return eslike.QueryLog(ctx, queryParam, e.Timeout, e.Version, e.MaxShard, search)
}
func (e *Elasticsearch) QueryFieldValue(indexs []string, field string, query string) ([]string, error) {
var values []string
search := e.Client.Search().
Index(indexs...).
Size(0)
if query != "" {
search = search.Query(elastic.NewBoolQuery().Must(elastic.NewQueryStringQuery(query)))
}
search = search.Aggregation("distinct", elastic.NewTermsAggregation().Field(field).Size(10000))
result, err := search.Do(context.Background())
if err != nil {
return values, err
}
agg, found := result.Aggregations.Terms("distinct")
if !found {
return values, nil
}
for _, bucket := range agg.Buckets {
values = append(values, bucket.Key.(string))
}
return values, nil
}
func (e *Elasticsearch) Test(ctx context.Context) (err error) {
err = e.Validate(ctx)
if err != nil {
return err
}
if e.Addr == "" {
return fmt.Errorf("addr is invalid")
}
if e.Version == "7.10+" {
options := []elastic.ClientOptionFunc{
elastic.SetURL(e.Addr),
}
if e.Basic.Enable {
options = append(options, elastic.SetBasicAuth(e.Basic.Username, e.Basic.Password))
}
client, err := elastic.NewClient(options...)
if err != nil {
return fmt.Errorf("config is invalid:%v", err)
}
_, err = client.ElasticsearchVersion(e.Addr)
if err != nil {
return fmt.Errorf("config is invalid:%v", err)
}
} else {
return fmt.Errorf("version must be 7.10+")
}
return nil
}
func getFieldType(key string, m map[string]interface{}) string {
if innerMap, exists := m["mapping"]; exists {
if innerM, exists := innerMap.(map[string]interface{})[key]; exists {
if typ, exists := innerM.(map[string]interface{})["type"]; exists {
return typ.(string)
}
} else {
arr := strings.Split(key, ".")
if innerM, exists := innerMap.(map[string]interface{})[arr[len(arr)-1]]; exists {
if typ, exists := innerM.(map[string]interface{})["type"]; exists {
return typ.(string)
}
}
}
}
return ""
}
func (e *Elasticsearch) QueryMapData(ctx context.Context, query interface{}) ([]map[string]string, error) {
search := func(ctx context.Context, indices []string, source interface{}, timeout int, maxShard int) (*elastic.SearchResult, error) {
return e.Client.Search().
Index(indices...).
Source(source).
Timeout(fmt.Sprintf("%ds", timeout)).
Do(ctx)
}
param := new(eslike.Query)
if err := mapstructure.Decode(query, param); err != nil {
return nil, err
}
// 扩大查询范围, 解决上一次查询消耗时间太多,导致本次查询时间范围起止时间,滞后问题
param.Interval += 30
res, _, err := eslike.QueryLog(ctx, param, e.Timeout, e.Version, e.MaxShard, search)
if err != nil {
return nil, err
}
var result []map[string]string
for _, item := range res {
logger.Debugf("query:%v item:%v", query, item)
if itemMap, ok := item.(*elastic.SearchHit); ok {
mItem := make(map[string]string)
// 遍历 fields 字段的每个键值对
sourceMap := make(map[string]interface{})
err := json.Unmarshal(itemMap.Source, &sourceMap)
if err != nil {
logger.Warningf("unmarshal source%s error:%v", string(itemMap.Source), err)
continue
}
for k, v := range sourceMap {
mItem[k] = fmt.Sprintf("%v", v)
}
// 将处理好的 map 添加到 m 切片中
result = append(result, mItem)
// 只取第一条数据
break
}
}
return result, nil
}

15
datasource/prom/prom.go Normal file
View File

@@ -0,0 +1,15 @@
package prom
type Prometheus struct {
PrometheusAddr string `json:"prometheus.addr"`
PrometheusBasic struct {
PrometheusUser string `json:"prometheus.user"`
PrometheusPass string `json:"prometheus.password"`
} `json:"prometheus.basic"`
Headers map[string]string `json:"prometheus.headers"`
PrometheusTimeout int64 `json:"prometheus.timeout"`
ClusterName string `json:"prometheus.cluster_name"`
WriteAddr string `json:"prometheus.write_addr"`
TsdbType string `json:"prometheus.tsdb_type"`
InternalAddr string `json:"prometheus.internal_addr"`
}

View File

@@ -0,0 +1,460 @@
package tdengine
import (
"context"
"encoding/json"
"fmt"
"reflect"
"strconv"
"strings"
"time"
"github.com/prometheus/common/model"
"github.com/toolkits/pkg/logger"
"github.com/ccfos/nightingale/v6/datasource"
td "github.com/ccfos/nightingale/v6/dskit/tdengine"
"github.com/ccfos/nightingale/v6/models"
"github.com/mitchellh/mapstructure"
)
const (
TDEngineType = "tdengine"
)
type TDengine struct {
td.Tdengine `json:",inline" mapstructure:",squash"`
}
type TdengineQuery struct {
From string `json:"from"`
Interval int64 `json:"interval"`
Keys Keys `json:"keys"`
Query string `json:"query"` // 查询条件
Ref string `json:"ref"` // 变量
To string `json:"to"`
}
type Keys struct {
LabelKey string `json:"labelKey"` // 多个用空格分隔
MetricKey string `json:"metricKey"` // 多个用空格分隔
TimeFormat string `json:"timeFormat"`
}
func init() {
datasource.RegisterDatasource(TDEngineType, new(TDengine))
}
func (td *TDengine) Init(settings map[string]interface{}) (datasource.Datasource, error) {
newest := new(TDengine)
err := mapstructure.Decode(settings, newest)
return newest, err
}
func (td *TDengine) InitClient() error {
td.InitCli()
return nil
}
func (td *TDengine) Equal(other datasource.Datasource) bool {
otherTD, ok := other.(*TDengine)
if !ok {
return false
}
if td.Addr != otherTD.Addr {
return false
}
if td.Basic != nil && otherTD.Basic != nil {
if td.Basic.User != otherTD.Basic.User {
return false
}
if td.Basic.Password != otherTD.Basic.Password {
return false
}
}
if td.Token != otherTD.Token {
return false
}
if td.Timeout != otherTD.Timeout {
return false
}
if td.DialTimeout != otherTD.DialTimeout {
return false
}
if td.MaxIdleConnsPerHost != otherTD.MaxIdleConnsPerHost {
return false
}
if len(td.Headers) != len(otherTD.Headers) {
return false
}
for k, v := range td.Headers {
if otherV, ok := otherTD.Headers[k]; !ok || v != otherV {
return false
}
}
return true
}
func (td *TDengine) Validate(ctx context.Context) (err error) {
return nil
}
func (td *TDengine) MakeLogQuery(ctx context.Context, query interface{}, eventTags []string, start, end int64) (interface{}, error) {
return nil, nil
}
func (td *TDengine) MakeTSQuery(ctx context.Context, query interface{}, eventTags []string, start, end int64) (interface{}, error) {
return nil, nil
}
func (td *TDengine) QueryData(ctx context.Context, queryParam interface{}) ([]models.DataResp, error) {
return td.Query(queryParam, 0)
}
func (td *TDengine) QueryLog(ctx context.Context, queryParam interface{}) ([]interface{}, int64, error) {
b, err := json.Marshal(queryParam)
if err != nil {
return nil, 0, err
}
var q TdengineQuery
err = json.Unmarshal(b, &q)
if err != nil {
return nil, 0, err
}
if q.Interval == 0 {
q.Interval = 60
}
if q.From == "" {
// 2023-09-21T05:37:30.000Z format
to := time.Now().Unix()
q.To = time.Unix(to, 0).UTC().Format(time.RFC3339)
from := to - q.Interval
q.From = time.Unix(from, 0).UTC().Format(time.RFC3339)
}
replacements := map[string]string{
"$from": fmt.Sprintf("'%s'", q.From),
"$to": fmt.Sprintf("'%s'", q.To),
"$interval": fmt.Sprintf("%ds", q.Interval),
}
for key, val := range replacements {
q.Query = strings.ReplaceAll(q.Query, key, val)
}
if !strings.Contains(q.Query, "limit") {
q.Query = q.Query + " limit 200"
}
data, err := td.QueryTable(q.Query)
if err != nil {
return nil, 0, err
}
return ConvertToTable(data), int64(len(data.Data)), nil
}
func (td *TDengine) QueryMapData(ctx context.Context, query interface{}) ([]map[string]string, error) {
return nil, nil
}
func (td *TDengine) Query(query interface{}, delay ...int) ([]models.DataResp, error) {
b, err := json.Marshal(query)
if err != nil {
return nil, err
}
var q TdengineQuery
err = json.Unmarshal(b, &q)
if err != nil {
return nil, err
}
if q.Interval == 0 {
q.Interval = 60
}
delaySec := 0
if len(delay) > 0 {
delaySec = delay[0]
}
if q.From == "" {
// 2023-09-21T05:37:30.000Z format
to := time.Now().Unix() - int64(delaySec)
q.To = time.Unix(to, 0).UTC().Format(time.RFC3339)
from := to - q.Interval
q.From = time.Unix(from, 0).UTC().Format(time.RFC3339)
}
replacements := map[string]string{
"$from": fmt.Sprintf("'%s'", q.From),
"$to": fmt.Sprintf("'%s'", q.To),
"$interval": fmt.Sprintf("%ds", q.Interval),
}
for key, val := range replacements {
q.Query = strings.ReplaceAll(q.Query, key, val)
}
data, err := td.QueryTable(q.Query)
if err != nil {
return nil, err
}
logger.Debugf("tdengine query:%s result: %+v", q.Query, data)
return ConvertToTStData(data, q.Keys, q.Ref)
}
func ConvertToTStData(src td.APIResponse, key Keys, ref string) ([]models.DataResp, error) {
metricIdxMap := make(map[string]int)
labelIdxMap := make(map[string]int)
metricMap := make(map[string]struct{})
if key.MetricKey != "" {
metricList := strings.Split(key.MetricKey, " ")
for _, metric := range metricList {
metricMap[metric] = struct{}{}
}
}
labelMap := make(map[string]string)
if key.LabelKey != "" {
labelList := strings.Split(key.LabelKey, " ")
for _, label := range labelList {
labelMap[label] = label
}
}
tsIdx := -1
for colIndex, colData := range src.ColumnMeta {
colName := colData[0].(string)
var colType string
// 处理v2版本数字类型和v3版本字符串类型
switch t := colData[1].(type) {
case float64:
// v2版本数字类型映射
switch int(t) {
case 1:
colType = "BOOL"
case 2:
colType = "TINYINT"
case 3:
colType = "SMALLINT"
case 4:
colType = "INT"
case 5:
colType = "BIGINT"
case 6:
colType = "FLOAT"
case 7:
colType = "DOUBLE"
case 8:
colType = "BINARY"
case 9:
colType = "TIMESTAMP"
case 10:
colType = "NCHAR"
default:
colType = "UNKNOWN"
}
case string:
// v3版本直接使用字符串类型
colType = t
default:
logger.Warningf("unexpected column type format: %v", colData[1])
continue
}
switch colType {
case "TIMESTAMP":
tsIdx = colIndex
case "BIGINT", "INT", "INT UNSIGNED", "BIGINT UNSIGNED", "FLOAT", "DOUBLE",
"SMALLINT", "SMALLINT UNSIGNED", "TINYINT", "TINYINT UNSIGNED", "BOOL":
if len(metricMap) > 0 {
if _, ok := metricMap[colName]; !ok {
continue
}
metricIdxMap[colName] = colIndex
} else {
metricIdxMap[colName] = colIndex
}
default:
if len(labelMap) > 0 {
if _, ok := labelMap[colName]; !ok {
continue
}
labelIdxMap[colName] = colIndex
} else {
labelIdxMap[colName] = colIndex
}
}
}
if tsIdx == -1 {
return nil, fmt.Errorf("timestamp column not found, please check your query")
}
var result []models.DataResp
m := make(map[string]*models.DataResp)
for _, row := range src.Data {
for metricName, metricIdx := range metricIdxMap {
value, err := interfaceToFloat64(row[metricIdx])
if err != nil {
logger.Warningf("parse %v value failed: %v", row, err)
continue
}
metric := make(model.Metric)
for labelName, labelIdx := range labelIdxMap {
metric[model.LabelName(labelName)] = model.LabelValue(row[labelIdx].(string))
}
metric[model.MetricNameLabel] = model.LabelValue(metricName)
// transfer 2022-06-29T05:52:16.603Z to unix timestamp
t, err := parseTimeString(row[tsIdx].(string))
if err != nil {
logger.Warningf("parse %v timestamp failed: %v", row, err)
continue
}
timestamp := t.Unix()
if _, ok := m[metric.String()]; !ok {
m[metric.String()] = &models.DataResp{
Metric: metric,
Values: [][]float64{
{float64(timestamp), value},
},
}
} else {
m[metric.String()].Values = append(m[metric.String()].Values, []float64{float64(timestamp), value})
}
}
}
for _, v := range m {
v.Ref = ref
result = append(result, *v)
}
return result, nil
}
func interfaceToFloat64(input interface{}) (float64, error) {
// Check for the kind of the value first
if input == nil {
return 0, fmt.Errorf("unsupported type: %T", input)
}
kind := reflect.TypeOf(input).Kind()
switch kind {
case reflect.Float64:
return input.(float64), nil
case reflect.Float32:
return float64(input.(float32)), nil
case reflect.Int, reflect.Int32, reflect.Int64, reflect.Int8, reflect.Int16:
return float64(reflect.ValueOf(input).Int()), nil
case reflect.Uint, reflect.Uint32, reflect.Uint64, reflect.Uint8, reflect.Uint16:
return float64(reflect.ValueOf(input).Uint()), nil
case reflect.String:
return strconv.ParseFloat(input.(string), 64)
case reflect.Bool:
if input.(bool) {
return 1.0, nil
}
return 0.0, nil
default:
return 0, fmt.Errorf("unsupported type: %T", input)
}
}
func parseTimeString(ts string) (time.Time, error) {
// 尝试不同的时间格式
formats := []string{
// 标准格式
time.Layout, // "01/02 03:04:05PM '06 -0700"
time.ANSIC, // "Mon Jan _2 15:04:05 2006"
time.UnixDate, // "Mon Jan _2 15:04:05 MST 2006"
time.RubyDate, // "Mon Jan 02 15:04:05 -0700 2006"
time.RFC822, // "02 Jan 06 15:04 MST"
time.RFC822Z, // "02 Jan 06 15:04 -0700"
time.RFC850, // "Monday, 02-Jan-06 15:04:05 MST"
time.RFC1123, // "Mon, 02 Jan 2006 15:04:05 MST"
time.RFC1123Z, // "Mon, 02 Jan 2006 15:04:05 -0700"
time.RFC3339, // "2006-01-02T15:04:05Z07:00"
time.RFC3339Nano, // "2006-01-02T15:04:05.999999999Z07:00"
time.Kitchen, // "3:04PM"
// 实用时间戳格式
time.Stamp, // "Jan _2 15:04:05"
time.StampMilli, // "Jan _2 15:04:05.000"
time.StampMicro, // "Jan _2 15:04:05.000000"
time.StampNano, // "Jan _2 15:04:05.000000000"
time.DateTime, // "2006-01-02 15:04:05"
time.DateOnly, // "2006-01-02"
time.TimeOnly, // "15:04:05"
// 常用自定义格式
"2006-01-02T15:04:05", // 无时区的ISO格式
"2006-01-02T15:04:05.000Z",
"2006-01-02T15:04:05Z",
"2006-01-02 15:04:05.999999999", // 纳秒
"2006-01-02 15:04:05.999999", // 微秒
"2006-01-02 15:04:05.999", // 毫秒
"2006/01/02",
"20060102",
"01/02/2006",
"2006年01月02日",
"2006年01月02日 15:04:05",
}
var lastErr error
for _, format := range formats {
t, err := time.Parse(format, ts)
if err == nil {
return t, nil
}
lastErr = err
}
// 尝试解析 Unix 时间戳
if timestamp, err := strconv.ParseInt(ts, 10, 64); err == nil {
switch len(ts) {
case 10: // 秒
return time.Unix(timestamp, 0), nil
case 13: // 毫秒
return time.Unix(timestamp/1000, (timestamp%1000)*1000000), nil
case 16: // 微秒
return time.Unix(timestamp/1000000, (timestamp%1000000)*1000), nil
case 19: // 纳秒
return time.Unix(timestamp/1000000000, timestamp%1000000000), nil
}
}
return time.Time{}, fmt.Errorf("failed to parse time with any format: %v", lastErr)
}
func ConvertToTable(src td.APIResponse) []interface{} {
var resp []interface{}
for i := range src.Data {
cur := make(map[string]interface{})
for j := range src.Data[i] {
cur[src.ColumnMeta[j][0].(string)] = src.Data[i][j]
}
resp = append(resp, cur)
}
return resp
}

View File

@@ -1,5 +1,3 @@
version: "3.7"
networks:
nightingale:
driver: bridge

View File

@@ -19,8 +19,8 @@ precision = "ms"
# global collect interval
interval = 15
[global.labels]
source="categraf"
# [global.labels]
# source="categraf"
# region = "shanghai"
# env = "localhost"

View File

@@ -0,0 +1,42 @@
[[instances]]
address = "mysql:3306"
username = "root"
password = "1234"
# # set tls=custom to enable tls
# parameters = "tls=false"
# extra_status_metrics = true
# extra_innodb_metrics = false
# gather_processlist_processes_by_state = false
# gather_processlist_processes_by_user = false
# gather_schema_size = true
# gather_table_size = false
# gather_system_table_size = false
# gather_slave_status = true
# # timeout
# timeout_seconds = 3
# # interval = global.interval * interval_times
# interval_times = 1
# important! use global unique string to specify instance
labels = { instance="docker-compose-mysql" }
## Optional TLS Config
# use_tls = false
# tls_min_version = "1.2"
# tls_ca = "/etc/categraf/ca.pem"
# tls_cert = "/etc/categraf/cert.pem"
# tls_key = "/etc/categraf/key.pem"
## Use TLS but skip chain & host verification
# insecure_skip_verify = true
#[[instances.queries]]
# mesurement = "lock_wait"
# metric_fields = [ "total" ]
# timeout = "3s"
# request = '''
#SELECT count(*) as total FROM information_schema.innodb_trx WHERE trx_state='LOCK WAIT'
#'''

View File

@@ -0,0 +1,37 @@
[[instances]]
address = "redis:6379"
username = ""
password = ""
# pool_size = 2
## 是否开启slowlog 收集
# gather_slowlog = true
## 最多收集少条slowlog
# slowlog_max_len = 100
## 收集距离现在多少秒以内的slowlog
## 注意插件的采集周期,该参数不要小于采集周期否则会有slowlog查不到
# slowlog_time_window=30
# 指标
# redis_slow_log{ident=dev-01 client_addr=127.0.0.1:56364 client_name= cmd="info ALL" log_id=983} 74 (单位微秒)
# # Optional. Specify redis commands to retrieve values
# commands = [
# {command = ["get", "sample-key1"], metric = "custom_metric_name1"},
# {command = ["get", "sample-key2"], metric = "custom_metric_name2"}
# ]
# # interval = global.interval * interval_times
# interval_times = 1
# important! use global unique string to specify instance
labels = { instance="docker-compose-redis" }
## Optional TLS Config
# use_tls = false
# tls_min_version = "1.2"
# tls_ca = "/etc/categraf/ca.pem"
# tls_cert = "/etc/categraf/cert.pem"
# tls_key = "/etc/categraf/key.pem"
## Use TLS but skip chain & host verification
# insecure_skip_verify = true

View File

@@ -50,7 +50,7 @@ Enable = true
# user001 = "ccc26da7b9aba533cbb263a36c07dcc5"
[HTTP.APIForService]
Enable = true
Enable = false
[HTTP.APIForService.BasicAuth]
user001 = "ccc26da7b9aba533cbb263a36c07dcc5"

View File

@@ -50,7 +50,7 @@ Enable = true
# user001 = "ccc26da7b9aba533cbb263a36c07dcc5"
[HTTP.APIForService]
Enable = true
Enable = false
[HTTP.APIForService.BasicAuth]
user001 = "ccc26da7b9aba533cbb263a36c07dcc5"

View File

@@ -25,7 +25,7 @@ services:
network_mode: host
prometheus:
image: prom/prometheus
image: prom/prometheus:v2.55.1
container_name: prometheus
hostname: prometheus
restart: always

View File

@@ -50,7 +50,7 @@ Enable = true
# user001 = "ccc26da7b9aba533cbb263a36c07dcc5"
[HTTP.APIForService]
Enable = true
Enable = false
[HTTP.APIForService.BasicAuth]
user001 = "ccc26da7b9aba533cbb263a36c07dcc5"

View File

@@ -561,7 +561,7 @@ CREATE TABLE alert_cur_event (
target_note varchar(191) not null default '' ,
first_trigger_time bigint,
trigger_time bigint not null,
trigger_value varchar(255) not null,
trigger_value varchar(2048) not null,
annotations text not null ,
rule_config text not null ,
tags varchar(1024) not null default '' ,
@@ -621,7 +621,7 @@ CREATE TABLE alert_his_event (
target_note varchar(191) not null default '' ,
first_trigger_time bigint,
trigger_time bigint not null,
trigger_value varchar(255) not null,
trigger_value varchar(2048) not null,
recover_time bigint not null default 0,
last_eval_time bigint not null default 0 ,
tags varchar(1024) not null default '' ,
@@ -790,6 +790,7 @@ CREATE TABLE es_index_pattern (
time_field varchar(128) not null default '@timestamp',
allow_hide_system_indices smallint not null default 0,
fields_format varchar(4096) not null default '',
cross_cluster_enabled int not null default 0,
create_at bigint default '0',
create_by varchar(64) default '',
update_at bigint default '0',
@@ -859,6 +860,7 @@ CREATE TABLE builtin_components (
ident VARCHAR(191) NOT NULL,
logo VARCHAR(191) NOT NULL,
readme TEXT NOT NULL,
disabled INT NOT NULL DEFAULT 0,
created_at BIGINT NOT NULL DEFAULT 0,
created_by VARCHAR(191) NOT NULL DEFAULT '',
updated_at BIGINT NOT NULL DEFAULT 0,
@@ -885,4 +887,20 @@ CREATE TABLE builtin_payloads (
CREATE INDEX idx_component ON builtin_payloads (component);
CREATE INDEX idx_builtin_payloads_name ON builtin_payloads (name);
CREATE INDEX idx_cate ON builtin_payloads (cate);
CREATE INDEX idx_type ON builtin_payloads (type);
CREATE INDEX idx_type ON builtin_payloads (type);
CREATE TABLE dash_annotation (
id bigserial PRIMARY KEY,
dashboard_id bigint not null,
panel_id varchar(191) not null,
tags text,
description text,
config text,
time_start bigint not null default 0,
time_end bigint not null default 0,
create_at bigint not null default 0,
create_by varchar(64) not null default '',
update_at bigint not null default 0,
update_by varchar(64) not null default ''
);

View File

@@ -50,7 +50,7 @@ Enable = true
# user001 = "ccc26da7b9aba533cbb263a36c07dcc5"
[HTTP.APIForService]
Enable = true
Enable = false
[HTTP.APIForService.BasicAuth]
user001 = "ccc26da7b9aba533cbb263a36c07dcc5"

File diff suppressed because it is too large Load Diff

View File

@@ -1,3 +1,10 @@
GRANT ALL ON *.* TO 'root'@'127.0.0.1' IDENTIFIED BY '1234';
GRANT ALL ON *.* TO 'root'@'localhost' IDENTIFIED BY '1234';
GRANT ALL ON *.* TO 'root'@'%' IDENTIFIED BY '1234';
CREATE USER IF NOT EXISTS 'root'@'127.0.0.1' IDENTIFIED BY '1234';
GRANT ALL PRIVILEGES ON *.* TO 'root'@'127.0.0.1' WITH GRANT OPTION;
CREATE USER IF NOT EXISTS 'root'@'localhost' IDENTIFIED BY '1234';
GRANT ALL PRIVILEGES ON *.* TO 'root'@'localhost' WITH GRANT OPTION;
CREATE USER IF NOT EXISTS 'root'@'%' IDENTIFIED BY '1234';
GRANT ALL PRIVILEGES ON *.* TO 'root'@'%' WITH GRANT OPTION;
FLUSH PRIVILEGES;

View File

@@ -83,4 +83,68 @@ ALTER TABLE recording_rule ADD COLUMN cron_pattern VARCHAR(255) DEFAULT '' COMME
/* v7.0.0-beta.14 */
ALTER TABLE alert_cur_event ADD COLUMN original_tags TEXT COMMENT 'labels key=val,,k2=v2';
ALTER TABLE alert_his_event ADD COLUMN original_tags TEXT COMMENT 'labels key=val,,k2=v2';
ALTER TABLE alert_his_event ADD COLUMN original_tags TEXT COMMENT 'labels key=val,,k2=v2';
/* v7.1.0 */
ALTER TABLE target ADD COLUMN os VARCHAR(31) DEFAULT '' COMMENT 'os type';
/* v7.2.0 */
CREATE TABLE notification_record (
`id` BIGINT PRIMARY KEY AUTO_INCREMENT,
`event_id` BIGINT NOT NULL,
`sub_id` BIGINT NOT NULL,
`channel` VARCHAR(255) NOT NULL,
`status` TINYINT NOT NULL DEFAULT 0,
`target` VARCHAR(1024) NOT NULL,
`details` VARCHAR(2048),
`created_at` BIGINT NOT NULL,
INDEX idx_evt (event_id)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
/* v7.3.0 2024-08-26 */
ALTER TABLE `target` ADD COLUMN `host_tags` TEXT COMMENT 'global labels set in conf file';
/* v7.3.4 2024-08-28 */
ALTER TABLE `builtin_payloads` ADD COLUMN `component_id` bigint(20) NOT NULL DEFAULT 0 COMMENT 'component_id';
/* v7.4.0 2024-09-20 */
CREATE TABLE `target_busi_group` (
`id` bigint NOT NULL AUTO_INCREMENT,
`target_ident` varchar(191) NOT NULL,
`group_id` bigint NOT NULL,
`update_at` bigint NOT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `idx_target_group` (`target_ident`,`group_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
/* v7.7.2 2024-12-02 */
ALTER TABLE alert_subscribe MODIFY COLUMN rule_ids varchar(1024);
ALTER TABLE alert_subscribe MODIFY COLUMN busi_groups varchar(4096);
/* v8.0.0-beta.1 2024-12-13 */
ALTER TABLE `alert_rule` ADD COLUMN `cron_pattern` VARCHAR(64);
ALTER TABLE `builtin_components` MODIFY COLUMN `logo` mediumtext COMMENT '''logo of component''';
/* v8.0.0-beta.2 2024-12-26 */
ALTER TABLE `es_index_pattern` ADD COLUMN `cross_cluster_enabled` int not null default 0;
/* v8.0.0-beta.3 2024-01-03 */
ALTER TABLE `builtin_components` ADD COLUMN `disabled` INT NOT NULL DEFAULT 0 COMMENT 'is disabled or not';
CREATE TABLE `dash_annotation` (
`id` bigint unsigned not null auto_increment,
`dashboard_id` bigint not null comment 'dashboard id',
`panel_id` varchar(191) not null comment 'panel id',
`tags` text comment 'tags array json string',
`description` text comment 'annotation description',
`config` text comment 'annotation config',
`time_start` bigint not null default 0 comment 'start timestamp',
`time_end` bigint not null default 0 comment 'end timestamp',
`create_at` bigint not null default 0 comment 'create time',
`create_by` varchar(64) not null default '' comment 'creator',
`update_at` bigint not null default 0 comment 'update time',
`update_by` varchar(64) not null default '' comment 'updater',
PRIMARY KEY (`id`),
KEY `idx_dashboard_id` (`dashboard_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;

View File

@@ -17,6 +17,8 @@ CREATE TABLE `users` (
`update_by` varchar(64) not null default ''
);
CREATE UNIQUE INDEX idx_users_username ON `users` (username);
insert into `users`(id, username, nickname, password, roles, create_at, create_by, update_at, update_by) values(1, 'root', '超管', 'root.2020', 'Admin', strftime('%s', 'now'), 'system', strftime('%s', 'now'), 'system');
CREATE TABLE `user_group` (
@@ -182,8 +184,9 @@ CREATE TABLE `board` (
`create_by` varchar(64) not null default '',
`update_at` bigint not null default 0,
`update_by` varchar(64) not null default '',
unique (`group_id`, `name`)
`public_cate` bigint not null default 0
);
CREATE UNIQUE INDEX idx_board_group_id_name ON `board` (group_id, name);
CREATE INDEX `idx_board_ident` ON `board` (`ident` asc);
-- for dashboard new version
@@ -192,6 +195,15 @@ CREATE TABLE `board_payload` (
`payload` mediumtext not null
);
CREATE TABLE `chart` (
`id` integer primary key autoincrement,
`group_id` integer not null,
`configs` text,
`weight` integer not null default 0
);
CREATE INDEX idx_chart_group_id ON `chart` (group_id);
CREATE TABLE `chart_share` (
`id` integer primary key autoincrement,
`cluster` varchar(128) not null,
@@ -238,7 +250,9 @@ CREATE TABLE `alert_rule` (
`create_at` bigint not null default 0,
`create_by` varchar(64) not null default '',
`update_at` bigint not null default 0,
`update_by` varchar(64) not null default ''
`update_by` varchar(64) not null default '',
`cron_pattern` varchar(64),
`datasource_queries` text
);
CREATE INDEX `idx_alert_rule_group_id` ON `alert_rule` (`group_id` asc);
CREATE INDEX `idx_alert_rule_update_at` ON `alert_rule` (`update_at` asc);
@@ -308,11 +322,18 @@ CREATE TABLE `target` (
`tags` varchar(512) not null default '',
`host_ip` varchar(15) default '',
`agent_version` varchar(255) default '',
`host_tags` text,
`engine_name` varchar(255) default '',
`os` varchar(31) default '',
`update_at` bigint not null default 0
);
CREATE INDEX `idx_target_group_id` ON `target` (`group_id` asc);
CREATE INDEX `idx_target_group_id` ON `target` (`group_id` asc);
CREATE UNIQUE INDEX idx_target_ident ON `target` (ident);
CREATE INDEX idx_host_ip ON `target` (host_ip);
CREATE INDEX idx_agent_version ON `target` (agent_version);
CREATE INDEX idx_engine_name ON `target` (engine_name);
CREATE INDEX idx_os ON `target` (os);
CREATE TABLE `metric_view` (
`id` integer primary key autoincrement,
@@ -337,12 +358,14 @@ CREATE TABLE `recording_rule` (
`disabled` tinyint(1) not null default 0,
`prom_ql` varchar(8192) not null,
`prom_eval_interval` int not null,
`cron_pattern` varchar(255) default '',
`append_tags` varchar(255) default '',
`query_configs` text not null,
`create_at` bigint default '0',
`create_by` varchar(64) default '',
`update_at` bigint default '0',
`update_by` varchar(64) default ''
`update_by` varchar(64) default '',
`datasource_queries` text
);
CREATE INDEX `idx_recording_rule_group_id` ON `recording_rule` (`group_id` asc);
CREATE INDEX `idx_recording_rule_update_at` ON `recording_rule` (`update_at` asc);
@@ -389,7 +412,7 @@ CREATE TABLE `alert_cur_event` (
`target_note` varchar(191) not null default '',
`first_trigger_time` bigint,
`trigger_time` bigint not null,
`trigger_value` varchar(255) not null,
`trigger_value` varchar(2048) not null,
`annotations` text not null,
`rule_config` text not null,
`tags` varchar(1024) not null default ''
@@ -427,9 +450,10 @@ CREATE TABLE `alert_his_event` (
`target_note` varchar(191) not null default '',
`first_trigger_time` bigint,
`trigger_time` bigint not null,
`trigger_value` varchar(255) not null,
`trigger_value` varchar(2048) not null,
`recover_time` bigint not null default 0,
`last_eval_time` bigint not null default 0,
`original_tags` varchar(8192),
`tags` varchar(1024) not null default '',
`annotations` text not null,
`rule_config` text not null
@@ -459,6 +483,8 @@ CREATE INDEX `idx_builtin_components_ident` ON `builtin_components` (`ident` asc
CREATE TABLE `builtin_payloads` (
`id` integer primary key autoincrement,
`component_id` integer not null default 0,
`uuid` integer not null,
`type` varchar(191) not null,
`component` varchar(191) not null,
`cate` varchar(191) not null,
@@ -474,6 +500,20 @@ CREATE INDEX `idx_builtin_payloads_component` ON `builtin_payloads` (`component`
CREATE INDEX `idx_builtin_payloads_name` ON `builtin_payloads` (`name` asc);
CREATE INDEX `idx_builtin_payloads_cate` ON `builtin_payloads` (`cate` asc);
CREATE INDEX `idx_builtin_payloads_type` ON `builtin_payloads` (`type` asc);
CREATE INDEX idx_uuid ON `builtin_payloads` (uuid);
CREATE TABLE `notification_record` (
`id` integer primary key autoincrement,
`event_id` integer not null,
`sub_id` integer,
`channel` varchar(255) not null,
`status` integer,
`target` varchar(1024) not null,
`details` varchar(2048) default '',
`created_at` integer not null
);
CREATE INDEX idx_evt ON notification_record (event_id);
CREATE TABLE `task_tpl` (
`id` integer primary key autoincrement,
@@ -553,6 +593,8 @@ CREATE TABLE `datasource`
`updated_by` varchar(64) not null default ''
);
CREATE UNIQUE INDEX idx_datasource_name ON datasource (name);
CREATE TABLE `builtin_cate` (
`id` integer primary key autoincrement,
`name` varchar(191) not null,
@@ -570,6 +612,8 @@ CREATE TABLE `notify_tpl` (
`update_by` varchar(64) not null default ''
);
CREATE UNIQUE INDEX idx_notify_tpl_channel ON notify_tpl (channel);
CREATE TABLE `sso_config` (
`id` integer primary key autoincrement,
`name` varchar(191) not null unique,
@@ -577,6 +621,8 @@ CREATE TABLE `sso_config` (
`update_at` bigint not null default 0
);
CREATE UNIQUE INDEX idx_sso_config_name ON sso_config (name);
CREATE TABLE `es_index_pattern` (
`id` integer primary key autoincrement,
`datasource_id` bigint not null default 0,
@@ -584,6 +630,7 @@ CREATE TABLE `es_index_pattern` (
`time_field` varchar(128) not null default '@timestamp',
`allow_hide_system_indices` tinyint(1) not null default 0,
`fields_format` varchar(4096) not null default '',
`cross_cluster_enabled` int not null default 0,
`create_at` bigint default '0',
`create_by` varchar(64) default '',
`update_at` bigint default '0',
@@ -591,6 +638,8 @@ CREATE TABLE `es_index_pattern` (
unique (`datasource_id`, `name`)
);
CREATE UNIQUE INDEX idx_es_index_pattern_datasource_id_name ON es_index_pattern (datasource_id, name);
CREATE TABLE `builtin_metrics` (
`id` integer primary key autoincrement,
`collector` varchar(191) NOT NULL,
@@ -603,13 +652,15 @@ CREATE TABLE `builtin_metrics` (
`created_at` bigint NOT NULL DEFAULT 0,
`created_by` varchar(191) NOT NULL DEFAULT '',
`updated_at` bigint NOT NULL DEFAULT 0,
`updated_by` varchar(191) NOT NULL DEFAULT ''
`updated_by` varchar(191) NOT NULL DEFAULT '',
`uuid integer` not null default 0
);
-- CREATE UNIQUE INDEX `idx_builtin_metrics_collector_typ_name` ON `builtin_metrics` (`lang`,`collector`, `typ`, `name` asc);
-- CREATE INDEX `idx_builtin_metrics_collector` ON `builtin_metrics` (`collector` asc);
-- CREATE INDEX `idx_builtin_metrics_typ` ON `builtin_metrics` (`typ` asc);
-- CREATE INDEX `idx_builtin_metrics_name` ON `builtin_metrics` (`name` asc);
-- CREATE INDEX `idx_builtin_metrics_lang` ON `builtin_metrics` (`lang` asc);
CREATE UNIQUE INDEX idx_collector_typ_name ON builtin_metrics (lang, collector, typ, name);
CREATE INDEX idx_collector ON builtin_metrics (collector);
CREATE INDEX idx_typ ON builtin_metrics (typ);
CREATE INDEX idx_builtinmetric_name ON builtin_metrics (name);
CREATE INDEX idx_lang ON builtin_metrics (lang);
CREATE TABLE `metric_filter` (
@@ -624,6 +675,30 @@ CREATE TABLE `metric_filter` (
);
CREATE INDEX `idx_metric_filter_name` ON `metric_filter` (`name` asc);
CREATE TABLE `target_busi_group` (
`id` integer primary key autoincrement,
`target_ident` varchar(191) not null,
`group_id` integer not null,
`update_at` integer not null
);
CREATE UNIQUE INDEX idx_target_busi_group ON target_busi_group (target_ident, group_id);
CREATE TABLE `dash_annotation` (
`id` integer primary key autoincrement,
`dashboard_id` bigint not null,
`panel_id` varchar(191) not null,
`tags` text,
`description` text,
`config` text,
`time_start` bigint not null default 0,
`time_end` bigint not null default 0,
`create_at` bigint not null default 0,
`create_by` varchar(64) not null default '',
`update_at` bigint not null default 0,
`update_by` varchar(64) not null default ''
);
CREATE TABLE `task_meta`
(

59
dscache/cache.go Normal file
View File

@@ -0,0 +1,59 @@
package dscache
import (
"sync"
"github.com/ccfos/nightingale/v6/datasource"
"github.com/toolkits/pkg/logger"
)
type Cache struct {
datas map[string]map[int64]datasource.Datasource
mutex *sync.RWMutex
}
var DsCache = Cache{
datas: make(map[string]map[int64]datasource.Datasource),
mutex: new(sync.RWMutex),
}
func (cs *Cache) Put(cate string, dsId int64, ds datasource.Datasource) {
cs.mutex.Lock()
if _, found := cs.datas[cate]; !found {
cs.datas[cate] = make(map[int64]datasource.Datasource)
}
if _, found := cs.datas[cate][dsId]; found {
if cs.datas[cate][dsId].Equal(ds) {
cs.mutex.Unlock()
return
}
}
cs.mutex.Unlock()
// InitClient() 在用户配置错误或远端不可用时, 会非常耗时, mutex被长期持有, 导致Get()会超时
err := ds.InitClient()
if err != nil {
logger.Errorf("init plugin:%s %d %+v client fail: %v", cate, dsId, ds, err)
return
}
logger.Debugf("init plugin:%s %d %+v client success", cate, dsId, ds)
cs.mutex.Lock()
cs.datas[cate][dsId] = ds
cs.mutex.Unlock()
}
func (cs *Cache) Get(cate string, dsId int64) (datasource.Datasource, bool) {
cs.mutex.RLock()
defer cs.mutex.RUnlock()
if _, found := cs.datas[cate]; !found {
return nil, false
}
if _, found := cs.datas[cate][dsId]; !found {
return nil, false
}
return cs.datas[cate][dsId], true
}

190
dscache/sync.go Normal file
View File

@@ -0,0 +1,190 @@
package dscache
import (
"context"
"strings"
"sync/atomic"
"time"
"github.com/ccfos/nightingale/v6/datasource"
_ "github.com/ccfos/nightingale/v6/datasource/ck"
"github.com/ccfos/nightingale/v6/datasource/es"
"github.com/ccfos/nightingale/v6/dskit/tdengine"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/toolkits/pkg/logger"
)
var FromAPIHook func()
func Init(ctx *ctx.Context, fromAPI bool) {
go getDatasourcesFromDBLoop(ctx, fromAPI)
}
type ListInput struct {
Page int `json:"p"`
Limit int `json:"limit"`
Category string `json:"category"`
PluginType string `json:"plugin_type"` // promethues
Status string `json:"status"`
}
type DSReply struct {
RequestID string `json:"request_id"`
Data struct {
Items []datasource.DatasourceInfo `json:"items"`
} `json:"data"`
}
type DSReplyEncrypt struct {
RequestID string `json:"request_id"`
Data string `json:"data"`
}
var PromDefaultDatasourceId int64
func getDatasourcesFromDBLoop(ctx *ctx.Context, fromAPI bool) {
for {
if !fromAPI {
items, err := models.GetDatasources(ctx)
if err != nil {
logger.Errorf("get datasource from database fail: %v", err)
//stat.CounterExternalErrorTotal.WithLabelValues("db", "get_cluster").Inc()
time.Sleep(time.Second * 2)
continue
}
var dss []datasource.DatasourceInfo
for _, item := range items {
if item.PluginType == "prometheus" && item.IsDefault {
atomic.StoreInt64(&PromDefaultDatasourceId, item.Id)
}
logger.Debugf("get datasource: %+v", item)
ds := datasource.DatasourceInfo{
Id: item.Id,
Name: item.Name,
Description: item.Description,
Category: item.Category,
PluginId: item.PluginId,
Type: item.PluginType,
PluginTypeName: item.PluginTypeName,
Settings: item.SettingsJson,
HTTPJson: item.HTTPJson,
AuthJson: item.AuthJson,
Status: item.Status,
IsDefault: item.IsDefault,
}
if item.PluginType == "elasticsearch" {
esN9eToDatasourceInfo(&ds, item)
} else if item.PluginType == "opensearch" {
osN9eToDatasourceInfo(&ds, item)
} else if item.PluginType == "tdengine" {
tdN9eToDatasourceInfo(&ds, item)
} else {
ds.Settings = make(map[string]interface{})
for k, v := range item.SettingsJson {
ds.Settings[k] = v
}
}
dss = append(dss, ds)
}
PutDatasources(dss)
} else {
FromAPIHook()
}
time.Sleep(time.Second * 2)
}
}
func tdN9eToDatasourceInfo(ds *datasource.DatasourceInfo, item models.Datasource) {
ds.Settings = make(map[string]interface{})
ds.Settings["tdengine.cluster_name"] = item.Name
ds.Settings["tdengine.addr"] = item.HTTPJson.Url
ds.Settings["tdengine.timeout"] = item.HTTPJson.Timeout
ds.Settings["tdengine.dial_timeout"] = item.HTTPJson.DialTimeout
ds.Settings["tdengine.max_idle_conns_per_host"] = item.HTTPJson.MaxIdleConnsPerHost
ds.Settings["tdengine.headers"] = item.HTTPJson.Headers
ds.Settings["tdengine.basic"] = tdengine.TDengineBasicAuth{
User: item.AuthJson.BasicAuthUser,
Password: item.AuthJson.BasicAuthPassword,
}
}
func esN9eToDatasourceInfo(ds *datasource.DatasourceInfo, item models.Datasource) {
ds.Settings = make(map[string]interface{})
ds.Settings["es.nodes"] = []string{item.HTTPJson.Url}
if len(item.HTTPJson.Urls) > 0 {
ds.Settings["es.nodes"] = item.HTTPJson.Urls
}
ds.Settings["es.timeout"] = item.HTTPJson.Timeout
ds.Settings["es.basic"] = es.BasicAuth{
Username: item.AuthJson.BasicAuthUser,
Password: item.AuthJson.BasicAuthPassword,
}
ds.Settings["es.tls"] = es.TLS{
SkipTlsVerify: item.HTTPJson.TLS.SkipTlsVerify,
}
ds.Settings["es.version"] = item.SettingsJson["version"]
ds.Settings["es.headers"] = item.HTTPJson.Headers
ds.Settings["es.min_interval"] = item.SettingsJson["min_interval"]
ds.Settings["es.max_shard"] = item.SettingsJson["max_shard"]
ds.Settings["es.enable_write"] = item.SettingsJson["enable_write"]
}
// for opensearch
func osN9eToDatasourceInfo(ds *datasource.DatasourceInfo, item models.Datasource) {
ds.Settings = make(map[string]interface{})
ds.Settings["os.nodes"] = []string{item.HTTPJson.Url}
ds.Settings["os.timeout"] = item.HTTPJson.Timeout
ds.Settings["os.basic"] = es.BasicAuth{
Username: item.AuthJson.BasicAuthUser,
Password: item.AuthJson.BasicAuthPassword,
}
ds.Settings["os.tls"] = es.TLS{
SkipTlsVerify: item.HTTPJson.TLS.SkipTlsVerify,
}
ds.Settings["os.version"] = item.SettingsJson["version"]
ds.Settings["os.headers"] = item.HTTPJson.Headers
ds.Settings["os.min_interval"] = item.SettingsJson["min_interval"]
ds.Settings["os.max_shard"] = item.SettingsJson["max_shard"]
}
func PutDatasources(items []datasource.DatasourceInfo) {
ids := make([]int64, 0)
for _, item := range items {
if item.Type == "prometheus" {
continue
}
if item.Type == "loki" {
continue
}
if item.Name == "" {
logger.Warningf("cluster name is empty, ignore %+v", item)
continue
}
typ := strings.ReplaceAll(item.Type, ".logging", "")
ds, err := datasource.GetDatasourceByType(typ, item.Settings)
if err != nil {
logger.Debugf("get plugin:%+v fail: %v", item, err)
continue
}
err = ds.Validate(context.Background())
if err != nil {
logger.Warningf("get plugin:%+v fail: %v", item, err)
continue
}
ids = append(ids, item.Id)
// 异步初始化 client 不然数据源同步的会很慢
go DsCache.Put(typ, item.Id, ds)
}
logger.Debugf("get plugin by type success Ids:%v", ids)
}

View File

@@ -0,0 +1,303 @@
package clickhouse
import (
"context"
"database/sql"
"errors"
"fmt"
"io"
"strings"
"time"
"github.com/ccfos/nightingale/v6/dskit/sqlbase"
"github.com/ccfos/nightingale/v6/dskit/types"
"github.com/ClickHouse/clickhouse-go/v2"
"github.com/mitchellh/mapstructure"
"github.com/toolkits/pkg/net/httplib"
ckDriver "gorm.io/driver/clickhouse"
"gorm.io/gorm"
)
const (
ckDataSource = "clickhouse://%s:%s@%s?read_timeout=10s"
DefaultLimit = 500
)
type Clickhouse struct {
Nodes []string `json:"ck.nodes" mapstructure:"ck.nodes"`
User string `json:"ck.user" mapstructure:"ck.user"`
Password string `json:"ck.password" mapstructure:"ck.password"`
Timeout int `json:"ck.timeout" mapstructure:"ck.timeout"`
MaxQueryRows int `json:"ck.max_query_rows" mapstructure:"ck.max_query_rows"`
Client *gorm.DB `json:"-"`
ClientByHTTP *sql.DB `json:"-"`
}
func (c *Clickhouse) InitCli() error {
if c.MaxQueryRows == 0 {
c.MaxQueryRows = DefaultLimit
}
if len(c.Nodes) == 0 {
return fmt.Errorf("not found ck shard, please check datasource config")
}
addr := c.Nodes[0]
url := addr
if !strings.HasPrefix(url, "http://") {
url = "http://" + url
}
resp, err := httplib.Get(url).SetTimeout(time.Second * 1).Response()
// 忽略HTTP Code错误, 因为可能不是HTTP协议
if err != nil {
return err
}
defer resp.Body.Close()
// HTTP 协议
if resp.StatusCode == 200 {
jsonBytes, _ := io.ReadAll(resp.Body)
if len(jsonBytes) > 0 && strings.Contains(strings.ToLower(string(jsonBytes)), "ok.") {
ckconn := clickhouse.OpenDB(&clickhouse.Options{
Addr: []string{addr},
Auth: clickhouse.Auth{
Username: c.User,
Password: c.Password,
},
Settings: clickhouse.Settings{
"max_execution_time": 60,
},
DialTimeout: 10 * time.Second,
Protocol: clickhouse.HTTP,
})
if ckconn == nil {
return errors.New("db conn failed")
}
c.ClientByHTTP = ckconn
return nil
}
}
db, err := gorm.Open(
ckDriver.New(
ckDriver.Config{
DSN: fmt.Sprintf(ckDataSource,
c.User, c.Password, addr),
DisableDatetimePrecision: true,
DontSupportRenameColumn: true,
SkipInitializeWithVersion: false,
}),
)
if err != nil {
return err
}
c.Client = db
return nil
}
const (
ShowDatabases = "SHOW DATABASES"
ShowTables = "SELECT name FROM system.tables WHERE database = '%s'"
DescTable = "SELECT name,type FROM system.columns WHERE database='%s' AND table = '%s';"
)
func (c *Clickhouse) QueryRows(ctx context.Context, query string) (*sql.Rows, error) {
var (
rows *sql.Rows
err error
)
if c.ClientByHTTP != nil {
rows, err = c.ClientByHTTP.Query(query)
if err != nil {
return nil, err
}
} else if c.Client != nil {
rows, err = c.Client.Raw(query).Rows()
if err != nil {
return nil, err
}
} else {
return nil, fmt.Errorf("clickhouse client is nil")
}
return rows, nil
}
// ShowDatabases lists all databases in Clickhouse
func (c *Clickhouse) ShowDatabases(ctx context.Context) ([]string, error) {
var (
res []string
)
rows, err := c.QueryRows(ctx, ShowDatabases)
if err != nil {
return nil, err
}
for rows.Next() {
var r string
if err := rows.Scan(&r); err != nil {
return nil, err
}
res = append(res, r)
}
return res, nil
}
// ShowTables lists all tables in a given database
func (c *Clickhouse) ShowTables(ctx context.Context, database string) ([]string, error) {
var (
res []string
)
showTables := fmt.Sprintf(ShowTables, database)
rows, err := c.QueryRows(ctx, showTables)
if err != nil {
return nil, err
}
for rows.Next() {
var r string
if err := rows.Scan(&r); err != nil {
return nil, err
}
res = append(res, r)
}
return res, nil
}
// DescribeTable describes the schema of a specified table in Clickhouse
func (c *Clickhouse) DescribeTable(ctx context.Context, query interface{}) ([]*types.ColumnProperty, error) {
var (
ret []*types.ColumnProperty
)
ckQueryParam := new(QueryParam)
if err := mapstructure.Decode(query, ckQueryParam); err != nil {
return nil, err
}
descTable := fmt.Sprintf(DescTable, ckQueryParam.Database, ckQueryParam.Table)
rows, err := c.QueryRows(ctx, descTable)
if err != nil {
return nil, err
}
for rows.Next() {
var column types.ColumnProperty
if err := rows.Scan(&column.Field, &column.Type); err != nil {
return nil, err
}
ret = append(ret, &column)
}
return ret, nil
}
func (c *Clickhouse) ExecQueryBySqlDB(ctx context.Context, sql string) ([]map[string]interface{}, error) {
rows, err := c.QueryRows(ctx, sql)
if err != nil {
return nil, err
}
defer rows.Close()
columns, err := rows.Columns()
if err != nil {
return nil, err
}
var results []map[string]interface{}
for rows.Next() {
columnValues := make([]interface{}, len(columns))
columnPointers := make([]interface{}, len(columns))
for i := range columnValues {
columnPointers[i] = &columnValues[i]
}
if err := rows.Scan(columnPointers...); err != nil {
continue
}
rowMap := make(map[string]interface{})
for i, colName := range columns {
val := columnValues[i]
bytes, ok := val.([]byte)
if ok {
rowMap[colName] = string(bytes)
} else {
rowMap[colName] = val
}
}
results = append(results, rowMap)
}
return results, nil
}
func (c *Clickhouse) Query(ctx context.Context, query interface{}) ([]map[string]interface{}, error) {
ckQuery := new(QueryParam)
if err := mapstructure.Decode(query, ckQuery); err != nil {
return nil, err
}
// 校验SQL的合法性, 过滤掉 write请求
sqlItem := strings.Split(strings.ToUpper(ckQuery.Sql), " ")
for _, item := range sqlItem {
if _, ok := ckBannedOp[item]; ok {
return nil, fmt.Errorf("operation %s is forbid, only read db, please check your sql", item)
}
}
// 检查匹配数据长度,防止数据量过大
err := c.CheckMaxQueryRows(ctx, ckQuery.Sql)
if err != nil {
return nil, err
}
dbRows := make([]map[string]interface{}, 0)
if c.ClientByHTTP != nil {
dbRows, err = c.ExecQueryBySqlDB(ctx, ckQuery.Sql)
} else {
err = c.Client.Raw(ckQuery.Sql).Find(&dbRows).Error
}
if err != nil {
return nil, fmt.Errorf("fetch data failed, sql is %s, err is %s", ckQuery.Sql, err.Error())
}
return dbRows, nil
}
func (c *Clickhouse) CheckMaxQueryRows(ctx context.Context, sql string) error {
subSql := strings.ReplaceAll(sql, ";", "")
subSql = fmt.Sprintf("SELECT COUNT(*) as count FROM (%s) AS subquery;", subSql)
dbRows, err := c.ExecQueryBySqlDB(ctx, subSql)
if err != nil {
return fmt.Errorf("fetch data failed, sql is %s, err is %s", subSql, err.Error())
}
if len(dbRows) > 0 {
if count, exists := dbRows[0]["count"]; exists {
v, err := sqlbase.ParseFloat64Value(count)
if err != nil {
return err
}
if v > float64(c.MaxQueryRows) {
return fmt.Errorf("query result rows count %d exceeds the maximum limit %d", int(v), c.MaxQueryRows)
}
}
}
return nil
}

View File

@@ -0,0 +1,43 @@
package clickhouse
import (
"context"
"encoding/json"
"fmt"
"testing"
"time"
"github.com/ccfos/nightingale/v6/dskit/types"
)
func Test_Timeseries(t *testing.T) {
ck := &Clickhouse{
Nodes: []string{"127.0.0.1:8123"},
User: "default",
Password: "123456",
}
err := ck.InitCli()
if err != nil {
t.Fatal(err)
}
data, err := ck.QueryTimeseries(context.TODO(), &QueryParam{
Sql: `select * from default.student limit 20`,
From: time.Now().Unix() - 300,
To: time.Now().Unix(),
TimeField: "created_at",
TimeFormat: "datetime",
Keys: types.Keys{
LabelKey: "age",
},
})
if err != nil {
t.Fatal(err)
}
bs, err := json.Marshal(data)
if err != nil {
t.Fatal(err)
}
fmt.Println(string(bs))
}

View File

@@ -0,0 +1,58 @@
package clickhouse
import (
"context"
"fmt"
"github.com/ccfos/nightingale/v6/dskit/sqlbase"
"github.com/ccfos/nightingale/v6/dskit/types"
)
const (
TimeFieldFormatEpochMilli = "epoch_millis"
TimeFieldFormatEpochSecond = "epoch_second"
)
// 时序数据相关的API
type QueryParam struct {
Limit int `json:"limit" mapstructure:"limit"`
Sql string `json:"sql" mapstructure:"sql"`
Ref string `json:"ref" mapstructure:"ref"`
From int64 `json:"from" mapstructure:"from"`
To int64 `json:"to" mapstructure:"to"`
TimeField string `json:"time_field" mapstructure:"time_field"`
TimeFormat string `json:"time_format" mapstructure:"time_format"`
Keys types.Keys `json:"keys" mapstructure:"keys"`
Database string `json:"database" mapstructure:"database"`
Table string `json:"table" mapstructure:"table"`
}
var (
ckBannedOp = map[string]struct{}{
"CREATE": {},
"INSERT": {},
"ALTER": {},
"REVOKE": {},
"DROP": {},
"RENAME": {},
"ATTACH": {},
"DETACH": {},
"OPTIMIZE": {},
"TRUNCATE": {},
"SET": {},
}
)
func (c *Clickhouse) QueryTimeseries(ctx context.Context, query *QueryParam) ([]types.MetricValues, error) {
if query.Keys.ValueKey == "" {
return nil, fmt.Errorf("valueKey is required")
}
rows, err := c.Query(ctx, query)
if err != nil {
return nil, err
}
// 构造成时续数据
return sqlbase.FormatMetricValues(query.Keys, rows, true), nil
}

236
dskit/sqlbase/base.go Normal file
View File

@@ -0,0 +1,236 @@
// @Author: Ciusyan 5/19/24
package sqlbase
import (
"context"
"database/sql"
"fmt"
"strings"
"time"
"github.com/ccfos/nightingale/v6/dskit/types"
"gorm.io/gorm"
)
// NewDB creates a new Gorm DB instance based on the provided gorm.Dialector and configures the connection pool
func NewDB(ctx context.Context, dialector gorm.Dialector, maxIdleConns, maxOpenConns int, connMaxLifetime time.Duration) (*gorm.DB, error) {
// Create a new Gorm DB instance
db, err := gorm.Open(dialector, &gorm.Config{})
if err != nil {
return nil, err
}
// Configure the connection pool
sqlDB, err := db.DB()
if err != nil {
return nil, err
}
sqlDB.SetMaxIdleConns(maxIdleConns)
sqlDB.SetMaxOpenConns(maxOpenConns)
sqlDB.SetConnMaxLifetime(connMaxLifetime)
return db.WithContext(ctx), sqlDB.Ping()
}
// ShowTables retrieves a list of all tables in the specified database
func ShowTables(ctx context.Context, db *gorm.DB, query string) ([]string, error) {
var tables []string
rows, err := db.WithContext(ctx).Raw(query).Rows()
if err != nil {
return nil, err
}
defer rows.Close()
for rows.Next() {
var table string
if err := rows.Scan(&table); err != nil {
return nil, err
}
tables = append(tables, table)
}
return tables, nil
}
// ShowDatabases retrieves a list of all databases in the connected database server
func ShowDatabases(ctx context.Context, db *gorm.DB, query string) ([]string, error) {
var databases []string
rows, err := db.WithContext(ctx).Raw(query).Rows()
if err != nil {
return nil, err
}
defer rows.Close()
for rows.Next() {
var database string
if err := rows.Scan(&database); err != nil {
return nil, err
}
databases = append(databases, database)
}
return databases, nil
}
// DescTable describes the schema of a specified table in MySQL or PostgreSQL
func DescTable(ctx context.Context, db *gorm.DB, query string) ([]*types.ColumnProperty, error) {
rows, err := db.WithContext(ctx).Raw(query).Rows()
if err != nil {
return nil, err
}
defer rows.Close()
var columns []*types.ColumnProperty
for rows.Next() {
var (
field string
typ string
null string
key sql.NullString
defaultValue sql.NullString
extra sql.NullString
)
switch db.Dialector.Name() {
case "mysql":
if err := rows.Scan(&field, &typ, &null, &key, &defaultValue, &extra); err != nil {
continue
}
case "postgres", "sqlserver":
if err := rows.Scan(&field, &typ, &null, &defaultValue); err != nil {
continue
}
case "oracle":
if err := rows.Scan(&field, &typ, &null); err != nil {
continue
}
}
// Convert the database-specific type to internal type
type2, indexable := convertDBType(db.Dialector.Name(), typ)
columns = append(columns, &types.ColumnProperty{
Field: field,
Type: typ,
Type2: type2,
Indexable: indexable,
})
}
return columns, nil
}
// ExecQuery executes the specified query and returns the result rows
func ExecQuery(ctx context.Context, db *gorm.DB, sql string) ([]map[string]interface{}, error) {
rows, err := db.WithContext(ctx).Raw(sql).Rows()
if err != nil {
return nil, err
}
defer rows.Close()
columns, err := rows.Columns()
if err != nil {
return nil, err
}
var results []map[string]interface{}
for rows.Next() {
columnValues := make([]interface{}, len(columns))
columnPointers := make([]interface{}, len(columns))
for i := range columnValues {
columnPointers[i] = &columnValues[i]
}
if err := rows.Scan(columnPointers...); err != nil {
continue
}
rowMap := make(map[string]interface{})
for i, colName := range columns {
val := columnValues[i]
bytes, ok := val.([]byte)
if ok {
rowMap[colName] = string(bytes)
} else {
rowMap[colName] = val
}
}
results = append(results, rowMap)
}
return results, nil
}
// SelectRows selects rows from a specified table based on a given query
func SelectRows(ctx context.Context, db *gorm.DB, table, query string) ([]map[string]interface{}, error) {
sql := fmt.Sprintf("SELECT * FROM %s", table)
if query != "" {
sql += " WHERE " + query
}
return ExecQuery(ctx, db, sql)
}
// convertDBType converts MySQL or PostgreSQL data types to custom internal types and determines if they are indexable
func convertDBType(dialect, dbType string) (string, bool) {
typ := strings.ToLower(dbType)
// Common type conversions
switch {
case strings.HasPrefix(typ, "int"), strings.HasPrefix(typ, "tinyint"),
strings.HasPrefix(typ, "smallint"), strings.HasPrefix(typ, "mediumint"),
strings.HasPrefix(typ, "bigint"), strings.HasPrefix(typ, "serial"),
strings.HasPrefix(typ, "bigserial"):
return types.LogExtractValueTypeLong, true
case strings.HasPrefix(typ, "varchar"), strings.HasPrefix(typ, "text"),
strings.HasPrefix(typ, "char"), strings.HasPrefix(typ, "tinytext"),
strings.HasPrefix(typ, "mediumtext"), strings.HasPrefix(typ, "longtext"),
strings.HasPrefix(typ, "character varying"), strings.HasPrefix(typ, "nvarchar"),
strings.HasPrefix(typ, "nchar"):
return types.LogExtractValueTypeText, true
case strings.HasPrefix(typ, "float"), strings.HasPrefix(typ, "double"),
strings.HasPrefix(typ, "decimal"), strings.HasPrefix(typ, "numeric"),
strings.HasPrefix(typ, "real"), strings.HasPrefix(typ, "double precision"):
return types.LogExtractValueTypeFloat, true
case strings.HasPrefix(typ, "date"), strings.HasPrefix(typ, "datetime"),
strings.HasPrefix(typ, "timestamp"), strings.HasPrefix(typ, "timestamptz"),
strings.HasPrefix(typ, "time"), strings.HasPrefix(typ, "smalldatetime"):
return types.LogExtractValueTypeDate, false
case strings.HasPrefix(typ, "boolean"), strings.HasPrefix(typ, "bit"):
return types.LogExtractValueTypeBool, false
}
// Specific type conversions for MySQL
if dialect == "mysql" {
switch {
default:
return typ, false
}
}
// Specific type conversions for PostgreSQL
if dialect == "postgres" {
switch {
default:
return typ, false
}
}
if dialect == "oracle" {
switch {
default:
return typ, false
}
}
// Can continue to add specific 'dialect' type ...
return typ, false
}

250
dskit/sqlbase/timeseries.go Normal file
View File

@@ -0,0 +1,250 @@
// @Author: Ciusyan 5/20/24
package sqlbase
import (
"context"
"crypto/md5"
"encoding/json"
"fmt"
"reflect"
"sort"
"strconv"
"strings"
"time"
"github.com/ccfos/nightingale/v6/dskit/types"
"github.com/prometheus/common/model"
"gorm.io/gorm"
)
type QueryParam struct {
Sql string `json:"sql"`
Keys types.Keys `json:"keys" mapstructure:"keys"`
}
var (
BannedOp = map[string]struct{}{
"CREATE": {},
"INSERT": {},
"UPDATE": {},
"DELETE": {},
"ALTER": {},
"REVOKE": {},
"DROP": {},
"RENAME": {},
"TRUNCATE": {},
"SET": {},
}
)
// Query executes a given SQL query and returns the results
func Query(ctx context.Context, db *gorm.DB, query *QueryParam) ([]map[string]interface{}, error) {
// Validate SQL to prevent write operations if needed
sqlItem := strings.Split(strings.ToUpper(query.Sql), " ")
for _, item := range sqlItem {
if _, ok := BannedOp[item]; ok {
return nil, fmt.Errorf("operation %s is forbidden, only read operations are allowed, please check your SQL", item)
}
}
return ExecQuery(ctx, db, query.Sql)
}
// QueryTimeseries executes a time series data query using the given parameters
func QueryTimeseries(ctx context.Context, db *gorm.DB, query *QueryParam, ignoreDefault ...bool) ([]types.MetricValues, error) {
rows, err := Query(ctx, db, query)
if err != nil {
return nil, err
}
return FormatMetricValues(query.Keys, rows, ignoreDefault...), nil
}
func FormatMetricValues(keys types.Keys, rows []map[string]interface{}, ignoreDefault ...bool) []types.MetricValues {
ignore := false
if len(ignoreDefault) > 0 {
ignore = ignoreDefault[0]
}
keyMap := make(map[string]string)
for _, valueMetric := range strings.Split(keys.ValueKey, " ") {
keyMap[valueMetric] = "value"
}
for _, labelMetric := range strings.Split(keys.LabelKey, " ") {
keyMap[labelMetric] = "label"
}
if keys.TimeKey == "" {
keys.TimeKey = "time"
}
if len(keys.TimeKey) > 0 {
keyMap[keys.TimeKey] = "time"
}
var dataResps []types.MetricValues
dataMap := make(map[string]*types.MetricValues)
for _, row := range rows {
labels := make(map[string]string)
metricValue := make(map[string]float64)
metricTs := make(map[string]float64)
// Process each column based on its designated role (value, label, time)
for k, v := range row {
switch keyMap[k] {
case "value":
val, err := ParseFloat64Value(v)
if err != nil {
continue
}
metricValue[k] = val
case "label":
labels[k] = fmt.Sprintf("%v", v)
case "time":
ts, err := ParseTime(v, keys.TimeFormat)
if err != nil {
continue
}
metricTs[k] = float64(ts.Unix())
default:
// Default to labels for any unrecognized columns
if !ignore {
labels[k] = fmt.Sprintf("%v", v)
}
}
}
// Compile and store the metric values
for metricName, value := range metricValue {
metrics := make(model.Metric)
var labelsStr []string
for k1, v1 := range labels {
metrics[model.LabelName(k1)] = model.LabelValue(v1)
labelsStr = append(labelsStr, fmt.Sprintf("%s=%s", k1, v1))
}
metrics["__name__"] = model.LabelValue(metricName)
labelsStr = append(labelsStr, fmt.Sprintf("__name__=%s", metricName))
// Hash the labels to use as a key
sort.Strings(labelsStr)
labelsStrHash := fmt.Sprintf("%x", md5.Sum([]byte(strings.Join(labelsStr, ","))))
// Append new values to the existing metric, if present
ts, exists := metricTs[keys.TimeKey]
if !exists {
ts = float64(time.Now().Unix()) // Default to current time if not specified
}
valuePair := []float64{ts, value}
if existing, ok := dataMap[labelsStrHash]; ok {
existing.Values = append(existing.Values, valuePair)
} else {
dataResp := types.MetricValues{
Metric: metrics,
Values: [][]float64{valuePair},
}
dataMap[labelsStrHash] = &dataResp
}
}
}
// Convert the map to a slice for the response
for _, v := range dataMap {
sort.Slice(v.Values, func(i, j int) bool { return v.Values[i][0] < v.Values[j][0] }) // Sort by timestamp
dataResps = append(dataResps, *v)
}
return dataResps
}
// parseFloat64Value attempts to convert an interface{} to float64 using reflection
func ParseFloat64Value(val interface{}) (float64, error) {
v := reflect.ValueOf(val)
switch v.Kind() {
case reflect.Float64, reflect.Float32:
return v.Float(), nil
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
return float64(v.Int()), nil
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
return float64(v.Uint()), nil
case reflect.String:
return strconv.ParseFloat(v.String(), 64)
case reflect.Slice:
if v.Type().Elem().Kind() == reflect.Uint8 {
return strconv.ParseFloat(string(v.Bytes()), 64)
}
case reflect.Interface:
return ParseFloat64Value(v.Interface())
case reflect.Ptr:
if !v.IsNil() {
return ParseFloat64Value(v.Elem().Interface())
}
case reflect.Struct:
if num, ok := val.(json.Number); ok {
return num.Float64()
}
}
return 0, fmt.Errorf("cannot convert type %T to float64", val)
}
// ParseTime attempts to parse a time value from an interface{} using a specified format
func ParseTime(val interface{}, format string) (time.Time, error) {
v := reflect.ValueOf(val)
switch v.Kind() {
case reflect.String:
str := v.String()
return parseTimeFromString(str, format)
case reflect.Slice:
if v.Type().Elem().Kind() == reflect.Uint8 {
str := string(v.Bytes())
return parseTimeFromString(str, format)
}
case reflect.Int, reflect.Int64:
return time.Unix(v.Int(), 0), nil
case reflect.Float64:
return time.Unix(int64(v.Float()), 0), nil
case reflect.Interface:
return ParseTime(v.Interface(), format)
case reflect.Ptr:
if !v.IsNil() {
return ParseTime(v.Elem().Interface(), format)
}
case reflect.Struct:
if t, ok := val.(time.Time); ok {
return t, nil
}
}
return time.Time{}, fmt.Errorf("invalid time value type: %v", val)
}
func parseTimeFromString(str, format string) (time.Time, error) {
// If a custom time format is provided, use it to parse the string
if format != "" {
parsedTime, err := time.Parse(format, str)
if err == nil {
return parsedTime, nil
}
return time.Time{}, fmt.Errorf("failed to parse time '%s' with format '%s': %v", str, format, err)
}
// Try to parse the string as RFC3339, RFC3339Nano, or Unix timestamp
if parsedTime, err := time.Parse(time.RFC3339, str); err == nil {
return parsedTime, nil
}
if parsedTime, err := time.Parse(time.RFC3339Nano, str); err == nil {
return parsedTime, nil
}
if timestamp, err := strconv.ParseInt(str, 10, 64); err == nil {
return time.Unix(timestamp, 0), nil
}
if timestamp, err := strconv.ParseFloat(str, 64); err == nil {
return time.Unix(int64(timestamp), 0), nil
}
return time.Time{}, fmt.Errorf("failed to parse time '%s'", str)
}

View File

@@ -0,0 +1,149 @@
// @Author: Ciusyan 5/17/24
package sqlbase
import (
"encoding/json"
"testing"
"time"
"github.com/ccfos/nightingale/v6/dskit/types"
)
func TestFormatMetricValues(t *testing.T) {
tests := []struct {
name string
keys types.Keys
rows []map[string]interface{}
want []types.MetricValues
}{
{
name: "cases1",
keys: types.Keys{
ValueKey: "grade a_grade",
LabelKey: "id student_name",
TimeKey: "update_time",
TimeFormat: "2006-01-02 15:04:05",
},
rows: []map[string]interface{}{
{
"id": "10007",
"grade": 20003,
"student_name": "邵子韬",
"a_grade": 69,
"update_time": "2024-05-14 10:00:00",
},
{
"id": "10007",
"grade": 20003,
"student_name": "邵子韬",
"a_grade": 69,
"update_time": "2024-05-14 10:05:00",
},
{
"id": "10007",
"grade": 20003,
"student_name": "邵子韬",
"a_grade": 69,
"update_time": "2024-05-14 10:10:00",
},
{
"id": "10008",
"grade": 20004,
"student_name": "Ciusyan",
"a_grade": 100,
"update_time": "2024-05-14 12:00:00",
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := FormatMetricValues(tt.keys, tt.rows)
for _, g := range got {
t.Log(g)
}
})
}
}
func TestParseFloat64Value(t *testing.T) {
ptr := func(val float64) *float64 {
return &val
}
tests := []struct {
name string
input interface{}
want float64
wantErr bool
}{
{"float64", 1.23, 1.23, false},
{"float32", float32(1.23), float64(float32(1.23)), false},
{"int", 123, 123, false},
{"int64", int64(123), 123, false},
{"uint", uint(123), 123, false},
{"uint64", uint64(123), 123, false},
{"string", "1.23", 1.23, false},
{"[]byte", []byte("1.23"), 1.23, false},
{"json.Number", json.Number("1.23"), 1.23, false},
{"interface", interface{}(1.23), 1.23, false},
{"pointer", ptr(1.23), 1.23, false},
{"invalid string", "abc", 0, true},
{"invalid type", struct{}{}, 0, true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := ParseFloat64Value(tt.input)
if (err != nil) != tt.wantErr {
t.Errorf("parseFloat64Value() error = %v, wantErr %v", err, tt.wantErr)
return
}
if got != tt.want {
t.Errorf("parseFloat64Value() = %v, want %v", got, tt.want)
}
})
}
}
func TestParseTime(t *testing.T) {
ptrTime := func(t time.Time) *time.Time {
return &t
}
tests := []struct {
name string
input interface{}
format string
want time.Time
wantErr bool
}{
{"RFC3339", "2024-05-14T12:34:56Z", "", time.Date(2024, 5, 14, 12, 34, 56, 0, time.UTC), false},
{"RFC3339Nano", "2024-05-14T12:34:56.789Z", "", time.Date(2024, 5, 14, 12, 34, 56, 789000000, time.UTC), false},
{"Unix timestamp int", int64(1715642135), "", time.Unix(1715642135, 0), false},
{"Unix timestamp float64", 1715642135.0, "", time.Unix(int64(1715642135), 0), false},
{"custom format", "14/05/2024", "02/01/2006", time.Date(2024, 5, 14, 0, 0, 0, 0, time.UTC), false},
{"slice", []byte("2024-05-14T12:34:56Z"), "", time.Date(2024, 5, 14, 12, 34, 56, 0, time.UTC), false},
{"interface", interface{}("2024-05-14T12:34:56Z"), "", time.Date(2024, 5, 14, 12, 34, 56, 0, time.UTC), false},
{"pointer", ptrTime(time.Date(2024, 5, 14, 12, 34, 56, 0, time.UTC)), "", time.Date(2024, 5, 14, 12, 34, 56, 0, time.UTC), false},
{"invalid format", "14-05-2024", "02/01/2006", time.Time{}, true},
{"invalid type", struct{}{}, "", time.Time{}, true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := ParseTime(tt.input, tt.format)
if (err != nil) != tt.wantErr {
t.Errorf("ParseTime() error = %v, wantErr %v", err, tt.wantErr)
return
}
if !got.Equal(tt.want) {
t.Errorf("ParseTime() = %v, want %v", got, tt.want)
}
})
}
}

Some files were not shown because too many files have changed in this diff Show More