Compare commits

..

316 Commits

Author SHA1 Message Date
ning
70cb2afc7c code refactor 2024-06-14 11:52:56 +08:00
ning
a814bb39b8 code refactor 2024-06-14 11:39:21 +08:00
ning
ece1e4305d feat: oidc support default team 2024-06-14 11:28:49 +08:00
Yening Qin
0a0049c6fb feat: callback support send event to im and remove alert subrule callback resend (#1992)
* feat: Callback operation adds IM connection function (#1984)

* refactor: change alert sub callback

---------

Co-authored-by: Yang Zhiyan <101268302+Yziyan@users.noreply.github.com>
2024-06-14 00:38:02 +08:00
Ulric Qin
1b56ebe62e Merge branch 'main' of github.com:ccfos/nightingale 2024-06-13 15:54:39 +08:00
Ulric Qin
a5e92b95b0 add link in github issue template 2024-06-13 15:54:24 +08:00
ulricqin
8e9d06d43e Update README.md 2024-06-13 15:03:04 +08:00
Ulric Qin
ab289de785 update github issue template 2024-06-13 12:32:47 +08:00
Ulric Qin
8667b7743a Merge branch 'main' of github.com:ccfos/nightingale 2024-06-13 12:21:10 +08:00
Ulric Qin
45b9436f69 update github issue template 2024-06-13 12:20:56 +08:00
ning
3d03bcf329 docs: add perm point 2024-06-11 19:10:04 +08:00
ning
1851601889 refactor: get usergroups service api 2024-06-07 20:07:13 +08:00
ning
fa9745decf refactor: update event api 2024-06-06 17:55:08 +08:00
ning
6f007deeaa refactor: change get list api 2024-06-06 16:42:17 +08:00
ning
8fad705065 fix: edge alert use ibex 2024-06-06 16:07:18 +08:00
ning
675076779e refactor: ibex migrate add charset 2024-06-06 12:13:22 +08:00
710leo
b9e78eee22 docs: change action 2024-06-05 22:26:26 +08:00
710leo
2219584abb docs: change action 2024-06-05 22:16:42 +08:00
710leo
ebe31fd6bc docs: change action 2024-06-05 22:12:12 +08:00
nīng
95ca69e170 docs: change action 2024-06-05 22:04:56 +08:00
nīng
ef1b5d8d16 docs: change action 2024-06-05 21:51:23 +08:00
ning
5b375cf037 docs: change action 2024-06-05 19:42:39 +08:00
ning
108b729cae Merge branch 'main' of github.com:ccfos/nightingale 2024-06-05 18:11:44 +08:00
ning
a385972fa9 refactor: add some i18n 2024-06-05 18:11:31 +08:00
yuweizzz
98a0a9d94c feat: support sqlite (#1978)
* demo sqlite
2024-06-05 17:28:56 +08:00
ning
c79eec648d fix: n9e-edge ibex 2024-06-05 17:12:52 +08:00
Yening Qin
603eadd1f2 feat: alert event support recovery value (#1982)
* feature: the alert response event supports query recovery values (#1975)

* refactor: rule note use

---------

Co-authored-by: Yang Zhiyan <101268302+Yziyan@users.noreply.github.com>
2024-06-05 17:01:31 +08:00
Yening Qin
61a2f552be refactor: integration init (#1981) 2024-06-05 15:14:01 +08:00
ning
e3453328a7 refactor: integration init 2024-06-03 11:54:29 +08:00
ning
4424a6b89c refactor: get event list api 2024-06-03 11:12:32 +08:00
ning
9fdb2f0753 refactor: get event list api 2024-06-03 10:53:30 +08:00
ning
3d358e367f refactor: get event list api 2024-06-03 10:47:36 +08:00
Ulric Qin
5264874628 Update automq metrics 2024-06-03 10:43:24 +08:00
Ulric Qin
e0a3ff248c update Linux integration's markdown 2024-06-03 10:24:02 +08:00
Ulric Qin
1fecf78ede update Linux alerting rules 2024-06-03 09:46:33 +08:00
Ulric Qin
839b45904b Merge branch 'main' of github.com:ccfos/nightingale 2024-06-03 09:23:08 +08:00
Ulric Qin
cd0f43f808 add Automq alerts 2024-06-03 09:22:41 +08:00
ning
8047f3deee refactor: get event api 2024-05-31 19:08:28 +08:00
ning
f209ed5bee refactor: get event api 2024-05-31 17:20:27 +08:00
Ulric Qin
8c61d8c14d Update AutoMQ dashboards 2024-05-31 15:47:43 +08:00
Ulric Qin
f7372b1c3b update AutomMQ markdown 2024-05-31 14:13:35 +08:00
Ulric Qin
a39ced86aa add markdown for automq 2024-05-31 12:15:51 +08:00
Ulric Qin
f365b7db2a Merge branch 'main' of github.com:ccfos/nightingale 2024-05-31 11:41:44 +08:00
Ulric Qin
7eaec13b6c add metrics for AutoMQ 2024-05-31 11:41:30 +08:00
ulricqin
2e824a165e Update README.md 2024-05-31 10:28:34 +08:00
ulricqin
f2909b6029 Update README.md 2024-05-31 10:27:56 +08:00
Ulric Qin
a543a5ad09 update automq dashboard: cluster_overview.json 2024-05-30 21:07:22 +08:00
Ulric Qin
2ee34bf1f9 Merge branch 'main' of github.com:ccfos/nightingale 2024-05-30 21:01:34 +08:00
Ulric Qin
4623622dd0 add Detailed metrics dashboard for Automq 2024-05-30 21:01:20 +08:00
ning
4f259137e5 Merge branch 'main' of github.com:ccfos/nightingale 2024-05-30 20:30:10 +08:00
ning
75f1e8a80b refactor: event list api 2024-05-30 20:29:56 +08:00
Ulric Qin
3648d8dc45 add Automq dashboards 2024-05-30 20:28:34 +08:00
Ulric Qin
8c90d7ab33 Merge branch 'main' of github.com:ccfos/nightingale 2024-05-30 19:59:03 +08:00
Ulric Qin
c6ac3fb959 add AutoMQ Dashboards 2024-05-30 19:58:48 +08:00
ning
ce854b3166 docs: change some sql 2024-05-30 17:54:37 +08:00
ning
a2be5230fa docs: change some sql 2024-05-30 17:53:45 +08:00
ning
21276a77b6 docs: change some sql 2024-05-30 17:52:11 +08:00
Yening Qin
cffd012ec6 feat: user add last_avtive_time (#1974) 2024-05-30 17:44:32 +08:00
ning
a9ebdad1cd docs: change sql 2024-05-30 17:36:33 +08:00
ning
785c577728 docs: change sql 2024-05-30 17:36:17 +08:00
ning
0e2a66570e fix: edge host miss alert 2024-05-30 16:57:04 +08:00
Ulric Qin
76583a6227 add automq icon 2024-05-30 16:15:12 +08:00
Yening Qin
48e0e1a9f8 feat: add integration tpl center (#1973) 2024-05-30 15:42:09 +08:00
Yang Zhiyan
17bb7fa468 feat: support event list view only by business group (#1969) 2024-05-30 15:33:43 +08:00
ulricqin
fc2638680a Update oracle_alert.json 2024-05-30 07:24:36 +08:00
ulricqin
e01a899ae1 Update README.md 2024-05-30 07:18:22 +08:00
ning
07c1ef6bd4 docs: add some sql 2024-05-28 15:37:04 +08:00
ning
bfa7059098 docs: add some sql 2024-05-28 15:31:23 +08:00
laiwei
096a2d3675 add nvidia gpu metrics dashboard 2024-05-24 14:02:14 +08:00
ning
2232733922 fix: delete target service api 2024-05-23 17:03:52 +08:00
ning
b15f638688 refactor: code format 2024-05-23 16:43:25 +08:00
ning
4f818e3642 Merge branch 'main' of github.com:ccfos/nightingale 2024-05-23 11:06:50 +08:00
ning
638c62da2f refactor: automatically generate jwt signing key 2024-05-23 11:06:33 +08:00
shardingHe
e1a9c995c2 docs: merge the metric data from metric.toml into oracle.toml (#1962)
Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2024-05-23 11:01:58 +08:00
Yang Zhiyan
1898675075 optimize: Optimize targets-related interfaces (#1961) 2024-05-23 10:27:09 +08:00
Resurgence72
ce7f0272d8 对 prometheus 2.50.0 版本引入的 NewPossibleNonCounterInfo warnings 做适配 (#1939) 2024-05-21 17:29:33 +08:00
赵尚
93159f07fd refactor: change the task time limit from 1 day to 5 days. (#1959) 2024-05-21 15:32:45 +08:00
Yening Qin
7d410baa2d refactor: recovery event support inhibit (#1958) 2024-05-20 20:35:35 +08:00
Ulric Qin
20b30c3e2c update ping metrics 2024-05-20 10:26:18 +08:00
Ulric Qin
8805bf6598 fix typo of logout router 2024-05-20 10:23:08 +08:00
Ulric Qin
fe6a64dae8 Merge branch 'main' of github.com:ccfos/nightingale 2024-05-20 10:22:39 +08:00
Ulric Qin
2c564a2c58 add cdn metrics doc 2024-05-20 10:22:16 +08:00
ulricqin
ae3c13224d Update host_generic_categraf.json 2024-05-17 10:52:22 +08:00
ulricqin
9a4015f13f Update host_generic_categraf.json 2024-05-16 17:46:48 +08:00
Yening Qin
274ca09551 Update switch branch.json 2024-05-16 13:44:08 +08:00
ulricqin
3d9b4fc14e Update categraf-procstat.json 2024-05-16 11:38:53 +08:00
ning
07436a5e0d refactor: change event order 2024-05-16 11:15:18 +08:00
ning
f7b2f1acb9 refactor: change event order 2024-05-16 11:08:35 +08:00
ning
4f4287030a docs: update snmp board tpl 2024-05-15 22:19:14 +08:00
ning
e25e712c48 refactor: change boards clone api 2024-05-15 22:16:16 +08:00
ning
66951d7e77 refactor: change boards clone api 2024-05-14 14:24:32 +08:00
ulricqin
f5ff27cd18 Create host_generic_categraf.json 2024-05-13 18:03:54 +08:00
ning
9e3f6e6285 refactor: add create user verify 2024-05-13 17:25:08 +08:00
ning
48e3df2cb4 refactor: new ldap conn 2024-05-13 17:07:18 +08:00
Yening Qin
ac5d69dba4 feat: ldap support role mapping (#1948)
* feature: LDAP implements role mapping capabilities (#1932)

* feature: Implement the team mapping function (#1934)

* refactor: ldap login add timeout

---------

Co-authored-by: Ciusyan <101268302+Yziyan@users.noreply.github.com>
Co-authored-by: ciusyan <yangzhiyan_i@didiglobal.com>
2024-05-13 16:56:19 +08:00
Ulric Qin
597351c424 code refactor 2024-05-13 10:39:27 +08:00
Ulric Qin
1f6b2e341a update README 2024-05-13 10:28:19 +08:00
ulricqin
035752ace2 Update README.md 2024-05-13 10:15:55 +08:00
ulricqin
60a1437207 Update README.md 2024-05-13 10:12:58 +08:00
ulricqin
e31414bc8c Update README.md 2024-05-13 10:12:08 +08:00
ning
785a294845 refactor: update event.TriggerValue 2024-05-11 11:17:38 +08:00
ning
98933eee34 docs: update sql 2024-05-10 16:32:05 +08:00
ulricqin
20905810d7 Delete integrations/Netstat/metrics directory 2024-05-10 15:30:08 +08:00
ulricqin
c1bde83639 Delete integrations/Kernel_Vmstat/metrics directory 2024-05-10 15:29:30 +08:00
ulricqin
782a0e9616 Delete integrations/Processes/metrics directory 2024-05-10 15:28:50 +08:00
ning
6a3720bc8b docs: update ops 2024-05-10 14:19:49 +08:00
ning
de252359d6 Merge branch 'main' of github.com:ccfos/nightingale 2024-05-10 14:11:54 +08:00
ning
deb313ca3d refactor: change server and server clusters routes to include permission check 2024-05-10 14:11:32 +08:00
yang xiaokai
d119de56be docs: creating SNMP monitoring for Cisco like switches (#1945)
* Add files via upload

* Add files via upload

* Update and rename DCN.toml to Cisco.toml

---------

Co-authored-by: Yening Qin <710leo@gmail.com>
2024-05-10 13:27:24 +08:00
tuogege
f05417fa23 docs: fix wrong table name about 'WriteRelabels' (#1942) 2024-05-10 11:56:28 +08:00
ning
9ab2eb591f docs: update integration 2024-05-10 10:55:22 +08:00
Yening Qin
3f476d770f feat: add builtin metrics (#1944) 2024-05-10 10:41:51 +08:00
ning
ced6759686 Merge branch 'main' of github.com:ccfos/nightingale 2024-05-09 16:48:43 +08:00
ning
eba3014c59 fix: alert engine rebuild hash 2024-05-09 16:48:29 +08:00
ulricqin
3aeb4e16e9 Update webhook.go. Refactor Host header settings 2024-05-09 15:54:37 +08:00
ning
3b62722251 refactor: change server and server clusters routes to include permission check 2024-05-09 11:42:09 +08:00
shardingHe
fb1cc4868e feat: add user variable for sso(decrypted). (#1936)
* add user variable for sso(decrypted).
---------

Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2024-05-08 14:10:25 +08:00
ning
4a0dcf0dbf refactor: add forbidden status check for empty group id list 2024-05-08 11:59:13 +08:00
ning
4f913f146e Remove table prefix from all config files for consistency 2024-05-07 21:04:12 +08:00
Ulric Qin
533560f432 Merge branch 'main' of github.com:ccfos/nightingale 2024-05-07 20:03:52 +08:00
Ulric Qin
cf7b479a1b update integration metrics 2024-05-07 20:01:41 +08:00
Yening Qin
2e4c29a0de docs: delete integrations/Ping/collect/ping2.toml 2024-05-07 12:21:43 +08:00
Ulric Qin
6f0ceb94c6 Merge branch 'main' of github.com:ccfos/nightingale 2024-04-29 16:57:08 +08:00
Ulric Qin
800d7ba04b update integrations metrics 2024-04-29 16:56:56 +08:00
Thomas Zhao
fb6a6d2b93 Resolve a problem of pushgw WriteRrelabel not working actually (#1928)
* fix: timeSeries is not updated after relabeled

* fix: GaugeSampleQueueSize defined but not registered

---------

Co-authored-by: zhaotuo <zhaotuo@mail.jj.cn>
2024-04-29 10:22:17 +08:00
ning
cf2b19ae90 Update the size of callbacks and runbook_url columns to varchar(4096) in the alert_rule table 2024-04-28 11:56:52 +08:00
Ulric Qin
fb1cc93613 Merge branch 'main' of github.com:ccfos/nightingale 2024-04-26 19:27:07 +08:00
Ulric Qin
c2bba796c2 add some integrations metrics 2024-04-26 19:26:53 +08:00
ning
a02bf83842 fix: query busigroup error by checking if t is not nil before accessing its GroupId property 2024-04-25 17:18:50 +08:00
ning
cd9f129e2d docs: remove memory metric reference from dashboard descriptions 2024-04-22 17:30:03 +08:00
dependabot[bot]
e85c80bdcf build(deps): bump golang.org/x/net from 0.17.0 to 0.23.0 (#1921)
Bumps [golang.org/x/net](https://github.com/golang/net) from 0.17.0 to 0.23.0.
- [Commits](https://github.com/golang/net/compare/v0.17.0...v0.23.0)

---
updated-dependencies:
- dependency-name: golang.org/x/net
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-04-22 10:52:44 +08:00
Ciusyan
7e83e0c482 Add fields to the users interface and fix email sending bugs (#1919)
* fix: SMTP configuration error may not exit the current sending email goroutine

* optimize: Add user information fields

---------

Co-authored-by: ciusyan <yangzhiyan_i@didiglobal.com>
2024-04-22 10:45:23 +08:00
ning
92ac3125f3 refactor: change ibex version 2024-04-19 00:05:20 +08:00
ning
a61feca369 Merge branch 'main' of github.com:ccfos/nightingale 2024-04-17 17:32:06 +08:00
ning
8b0b811919 Refactor redundant code for initializing Redis storage in alert/alert.go and cmd/edge/edge.go 2024-04-17 17:31:53 +08:00
Ulric Qin
8742526c7f check configDir exists 2024-04-17 17:29:49 +08:00
ning
ee757cfd92 Enable PProf for profiling and monitoring in config.toml 2024-04-17 16:54:00 +08:00
ulricqin
b12cfea379 Update edge.toml 2024-04-17 16:34:27 +08:00
kongfei605
45365e3e03 chore: update dashboards for ipmi (#1913) 2024-04-16 19:44:08 +08:00
kongfei605
1b676eefd2 Merge pull request #1912 from shardingHe/sync_dashboard_impi_and_sqlserver
docs: sync ipmi & sqlserver dashboards
2024-04-16 13:36:32 +08:00
shardingHe
0092dc44fd sync ipmi & sqlserver dashboards
update ipmi collect config
2024-04-16 13:05:12 +08:00
shardingHe
4941b376f3 feat: update buildIn dashboard custom (#1911) 2024-04-12 15:06:31 +08:00
ulricqin
e46813cd17 Update docker-compose.yaml 2024-04-12 10:02:48 +08:00
ning
58ebd224c2 refactor: change datasource api 2024-04-09 15:06:56 +08:00
ning
95ece6e16f refactor: update endpoint for deleting a datasource to remove unnecessary trailing slash 2024-04-09 15:04:38 +08:00
ning
b82cbd06fa merge main 2024-04-09 14:23:10 +08:00
ning
16210892da docs: change dockerfile 2024-04-09 14:10:52 +08:00
Ulric Qin
a452d63a56 Merge branch 'main' of github.com:ccfos/nightingale 2024-04-09 13:53:14 +08:00
Ulric Qin
51c7abedd3 Delete Dockerfile 2024-04-09 13:52:59 +08:00
ning
6d0a2420a8 Merge branch 'main' of github.com:ccfos/nightingale 2024-04-09 10:25:26 +08:00
ning
9cf687b73d fix: ldap user login info sync 2024-04-09 10:25:10 +08:00
ulricqin
49c9e41df5 Update host_table_view_demo.json 2024-04-08 16:01:31 +08:00
ning
2ec2e64213 refactor: Remove redundant DB2FE function from models 2024-04-08 15:29:19 +08:00
ning
867a61c8dc docs: change docker compose config 2024-04-07 19:36:36 +08:00
HongKuang
12263d1453 chore: fix function name in comment (#1905)
Signed-off-by: hongkuang <liurenhong@outlook.com>
2024-04-07 18:50:06 +08:00
Yening Qin
c0cacb2e64 refactor: change docker compose (#1906)
* update init sql

* change compose config
2024-04-07 18:49:32 +08:00
ning
0637b343b1 refactor: update ibex version 2024-04-06 23:21:10 +08:00
ning
2473e144ef refactor: update ibex version 2024-04-06 23:19:38 +08:00
Yening Qin
00a37d6de7 feat: Integration ibex (#1904)
* Ibex integrate (#1876)

---------

Co-authored-by: Deke Wang <94156972+wdkcc@users.noreply.github.com>
2024-04-06 22:02:07 +08:00
ning
50c664e6bf code refactor 2024-04-03 15:37:31 +08:00
Yening Qin
22b7d20455 refactor: sync user info to duty (#1903) 2024-04-02 21:49:32 +08:00
ning
141262e5a5 refactor: datasource update 2024-03-31 15:16:40 +08:00
ning
4717abfa77 update built-in rule 2024-03-26 15:43:18 +08:00
ning
1bf1a01c32 Merge branch 'main' of github.com:ccfos/nightingale 2024-03-21 16:38:35 +08:00
ning
05b714de38 fix: cas user login 2024-03-21 16:38:22 +08:00
xtan
11377d4e5f docs: docker remove ops.yaml (#1894) 2024-03-20 10:43:08 +08:00
ning
46ea46fdfe docs: update edge.toml 2024-03-19 17:46:32 +08:00
Yening Qin
d4f0483238 feat: ldap support sycn user and oidc suport logout (#1893)
* ldap user sync (#1858)

---------

Co-authored-by: Deke Wang <94156972+wdkcc@users.noreply.github.com>
2024-03-19 17:19:08 +08:00
ning
a79610f5ea fix: go mod deps 2024-03-19 15:29:27 +08:00
dependabot[bot]
d9fb71b9a0 build(deps): bump google.golang.org/protobuf from 1.31.0 to 1.33.0 (#1885)
Bumps google.golang.org/protobuf from 1.31.0 to 1.33.0.

---
updated-dependencies:
- dependency-name: google.golang.org/protobuf
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-03-19 15:25:35 +08:00
dependabot[bot]
37057fa0cf build(deps): bump github.com/jackc/pgproto3/v2 from 2.3.1 to 2.3.3 (#1886)
Bumps [github.com/jackc/pgproto3/v2](https://github.com/jackc/pgproto3) from 2.3.1 to 2.3.3.
- [Commits](https://github.com/jackc/pgproto3/compare/v2.3.1...v2.3.3)

---
updated-dependencies:
- dependency-name: github.com/jackc/pgproto3/v2
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-03-19 15:25:26 +08:00
yingjun
b234128a45 fix: typo error (#1891) 2024-03-19 15:25:06 +08:00
Yening Qin
67a2d57966 refactor: optimize alert mute and some config (#1892)
* Refactor alert mute and recording rule retrieval to handle cases where group IDs are empty in the current session

* Update `tags` column default value to '[]' in `alert_mute` table and `AlertMute` struct

* Update gids variable to handle empty query parameter in alertMuteGetsByGids and recordingRuleGetsByGids functions

* Refactor alertMuteGet function to include database to frontend transformation

* change mute datasource

* Refactor AlertSubscribeGetsByBGIds function to handle empty bgids parameter gracefully

* Refactor taskTplTotal and taskTplGets functions to allow filtering by groupIds when provided, enhancing flexibility and optimization

* Refactor taskGetsByGids to handle empty gids and non-admin user cases more efficiently
2024-03-19 15:24:24 +08:00
Ulric Qin
3a1516877e code refactor 2024-03-19 09:55:11 +08:00
Ulric Qin
53f31d175f code refactor 2024-03-19 09:54:24 +08:00
kongfei605
25323e9ce2 update dashboards for springboot (#1867)
* update dashboards for springboot

* update dashboard for springboot
2024-03-13 23:58:23 +08:00
shardingHe
3136596add docs: update aliyun dashboards (#1884)
* remove invalid dashboard for aliyun

* update dashboard for aliyun

* update dashboard for aliyun

---------

Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2024-03-13 23:57:45 +08:00
ning
e7200b0b23 refactor: compatible mute rule time period 00:00-23:59 2024-03-12 14:35:01 +08:00
shardingHe
dfb19c1dde docs: remove invalid dashboard for aliyun (#1881)
Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2024-03-08 10:23:36 +08:00
ning
2363b35263 docs: remove rsa config 2024-03-08 10:18:25 +08:00
shardingHe
99367aaf88 fix: sync smtp config (#1879)
Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2024-03-07 10:26:07 +08:00
xtan
ad17ef328f docs: fix pg docker config (#1874) 2024-03-05 21:16:53 +08:00
ning
5f149f6a38 Merge branch 'main' of github.com:ccfos/nightingale 2024-03-03 21:50:27 +08:00
ning
73ed57301b add iamleader 2024-03-03 21:49:45 +08:00
Ulric Qin
138b929db4 Merge branch 'main' of github.com:ccfos/nightingale 2024-03-01 18:27:10 +08:00
Ulric Qin
4585e94cd1 set DatasourceIdsJson to []int64{} if it is nil 2024-03-01 18:26:56 +08:00
ning
69ad6344f5 refactor: add ldap login log 2024-02-29 18:13:05 +08:00
Ulric Qin
a55665bd14 fix dash 2024-02-28 12:16:24 +08:00
Ulric Qin
b5e2053b0c Merge branch 'main' of github.com:ccfos/nightingale 2024-02-28 10:34:15 +08:00
Ulric Qin
94265eab9f add rsa configurations 2024-02-28 10:33:46 +08:00
ning
eb79d473b0 fix sync sso config 2024-02-27 12:00:53 +08:00
ning
c4e0a9962f fix sync sso config 2024-02-27 11:50:53 +08:00
shardingHe
ee613616ca docs: add a new IPMI dashboard for version v0.3.44-pre and subsequent versions. (#1869)
Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2024-02-23 22:36:19 +08:00
shardingHe
6bbf00c371 docs: update sqlserver config (#1868)
Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2024-02-23 22:35:26 +08:00
ulricqin
f9f45d315d Update edge.toml 2024-02-22 10:31:57 +08:00
Yening Qin
84f215b7f1 refactor: handle event (#1866) 2024-02-22 09:46:37 +08:00
ning
016220bb2a refactor: change notify debug log 2024-02-21 19:07:53 +08:00
ning
ba1eb73ace refactor: add more notify debug log 2024-02-21 18:44:54 +08:00
Yening Qin
b304091fb3 fix: handle host recovery event (#1865) 2024-02-21 16:14:57 +08:00
Ulric Qin
840eaea667 Merge branch 'main' of github.com:ccfos/nightingale 2024-02-20 15:18:00 +08:00
Ulric Qin
956cc9fd68 update integrations icon 2024-02-20 15:17:46 +08:00
ning
e78e212f83 replace parser pkg 2024-02-20 11:11:44 +08:00
Ulric Qin
cdc2d4c039 update integrations icon 2024-02-18 21:02:19 +08:00
Ulric Qin
cd4b0c4f94 update integrations icon 2024-02-18 20:43:16 +08:00
Deke Wang
53ada6cc40 feat: board batch clone (#1861)
* feat: sync user in ldap to mysql

* feat:board batch clone

* Revert "feat: sync user in ldap to mysql"

This reverts commit 6063c34f0e.

* fix: use transactions to store board and payload

* fix: the busigroup is incorrectly specified

* chore: adjust import order of pkg

* refactor: batch clone the board codes

* fix: set value='' in reterr if err==nil

* refactor: move AtomicAdd to board.go

* chore: adjust import order of pkg

* chore: adjust import order of pkg
2024-02-18 10:03:08 +08:00
ning
2e6cb0f21d update windows_by_categraf dashboards 2024-02-07 16:41:23 +08:00
laiwei
4287591a6b update n9e readme 2024-02-06 18:57:13 +08:00
laiwei
2fe0c21e36 update n9e readme 2024-02-06 18:55:46 +08:00
ning
bfa043aeba refactor: optimize trigger values 2024-02-01 23:43:03 +08:00
ning
f4336ca5e9 refactor alert process 2024-02-01 22:47:48 +08:00
ning
8125cb7090 refactor alert process 2024-02-01 22:25:53 +08:00
ning
0ae1e7fbc4 refactor: auto remove ops.yaml 2024-01-31 18:57:42 +08:00
Yening Qin
88f8111a56 refactor: alert eval (#1855)
* refactor
2024-01-30 18:08:37 +08:00
ning
dbfaa519ba fix: edge query db panic 2024-01-30 11:13:12 +08:00
ning
402e803146 Merge branch 'main' of github.com:ccfos/nightingale 2024-01-30 11:09:57 +08:00
Ulric Qin
5eae14a3c9 add default settings: ForceUseServerTS = true 2024-01-30 09:54:25 +08:00
Ulric Qin
e0bfc45f5a add default configurations: ForceUseServerTS 2024-01-30 09:53:19 +08:00
Ulric Qin
7d8fb7aab7 use fasttime instead of time.Now.Unix 2024-01-30 09:50:24 +08:00
Deke Wang
846ef00aed fix: Compose host network metric log (#1852)
* fix: The status of the log is all info in compose-host-network-metric-log

* fix: set KAFKA_CFG_MESSAGE_MAX_BYTES to avoid null value error

* chore: keep the same span

---------
Co-authored-by: wdk <wdk_cc@163.com>
2024-01-29 19:01:55 +08:00
ulricqin
f2f730e88c Update linux_by_categraf.json 2024-01-29 12:09:36 +08:00
ulricqin
311a9405e4 Update bug_report.yml 2024-01-29 07:33:01 +08:00
Yening Qin
6c53981883 refactor: optimize sync user to duty (#1850)
* fix: usergroup del

* refactor user to duty

* code refactor
2024-01-26 17:01:41 +08:00
ning
f23f960368 fix: usergroup del 2024-01-26 12:59:43 +08:00
ulricqin
f593c6d310 fix migrate sql: task_records 2024-01-25 09:17:42 +08:00
ning
3fb5ea96bc chage IdentDropThreshold 2024-01-24 19:28:01 +08:00
ning
30c697a3df refactor: flashtudy sycn team 2024-01-24 14:59:26 +08:00
ning
1d50d05329 fix: sync user to flashtudy 2024-01-19 20:35:36 +08:00
Yening Qin
840221d9ec feat: auto drop metrics by ident threshold (#1845)
* auto drop data by ident

* refactor drop ident
2024-01-19 19:01:13 +08:00
Deke Wang
e52a76921f feat: sync user and user_group to flashduty (#1842)
* fix build event

* fix:  append labels

* Add the function of batch subscription alert rules (#1825)

* add: docker-compose files for logs processing

* update: set restart:always

* fix: compose-host-network-metric-log

* update: regularize

* add: batch subscription

* add: sql columns for rule_ids and rule_names

* add: add migrate of AlertSubscribe

* update: Remove redundant codes

* fix: The question of 1821

* fix: Optimized for getting rule_ids and rule_names

* fix: error handle

* fix: add rule_ids for update api

* fix: Clear the rule_id to zero when updating

* refactor: Compatible with old rule_id

* refactor: rename

* fix: set rule_id=0 when updating subscription rules

---------

Co-authored-by: wdk <wdk_cc@163.com>

* feat: sync user and team to flashduty

* fix: sync to flashduty

* fix: failed to update team change to flashduty

* fix: sync default user when create team

* chore: delete the generated binary file

* refactor: user_group refact

* fix: func AddUsers(fdConf *cconf.FlashDuty, appKey string, users []User) error {

* fix: remove sync for user in router

* fix: user_grroup no change in n9e when put user_group

* chore: set default api_url=https://api.flashcat.cloud

* chore: refactor user_group

* chore: refact codes

* chore: set api=https://jira.flashcat.cloud/api for test

* chore: set api=https://api.flashcat.cloud

* chore: adjust the import order

* chore: remove excess code

* chore: refact codes

* chore: remove excess codes

* chore: adjust import order

* chore: adjust import order

* chore: adjust import order

* chore: refact code

* chore: optimized codes

* code refactor

* chore: remove excess code

---------

Co-authored-by: ning <710leo@gmail.com>
Co-authored-by: wdk <wdk_cc@163.com>
2024-01-19 18:20:00 +08:00
ning
80fdb37129 code refactor 2024-01-18 18:52:52 +08:00
ning
bbef4aa8d9 refactor datasource api 2024-01-18 12:01:34 +08:00
ning
35eba3b1e1 refactor: datasource api 2024-01-17 17:20:11 +08:00
Yening Qin
28a1230d26 refactor:host alert in edge (#1838)
* refactor update ident

* target add engine name

* refactor: heartbeat hash ring

* target update_ts to redis

* add log

* refactor GetHostUpdateTime

* update heartbeat

* add targets-of-alert-rule

* fix recovery
2024-01-17 16:04:37 +08:00
Yening Qin
86dd6a9608 feat: support subscribe multi alert rules (#1834)
* add: batch subscription

* add: sql columns for rule_ids and rule_names

* add: add migrate of AlertSubscribe

* update: Remove redundant codes

* fix: The question of 1821

* fix: Optimized for getting rule_ids and rule_names

* fix: error handle

* fix: add rule_ids for update api

* fix: Clear the rule_id to zero when updating

* refactor: Compatible with old rule_id

* refactor: rename

* fix: set rule_id=0 when updating subscription rules

---------

Co-authored-by: wdk <wdk_cc@163.com>

* refactor: remove prod and cate in alert mute (#1828)

* add: remove monitor type in alter mute

* chore: remove prod in alter mute

* chore: remove cate in alter mute

---------

Co-authored-by: wdk <wdk_cc@163.com>

---------

Co-authored-by: Deke Wang <94156972+wdkcc@users.noreply.github.com>
Co-authored-by: wdk <wdk_cc@163.com>
2024-01-12 20:01:57 +08:00
真小明同学
f7a40b7324 fix: remove stats code that could cause panic (#1831)
Co-authored-by: xem <xemxx@qq.com>
2024-01-11 10:49:49 +08:00
laiwei
e2e8eb837d make readme zh as default 2024-01-10 11:37:20 +08:00
Yening Qin
020f7ae07e fix: extractIdentFromTimeSeries append labels (#1824)
* fix:  append labels
2024-01-08 19:56:44 +08:00
Yening Qin
8311667930 refactor: share board (#1822)
* support board-share
2024-01-08 13:07:08 +08:00
Deke Wang
741ab94150 docs: add docker-compose files for logs processing (#1819)
* add: docker-compose files for logs processing

* update: set restart:always

* fix: compose-host-network-metric-log

* update: regularize

* fix: categraf will panic when it starts

* fix: add WAIT_HOSTS of kafka for categraf and set es index=n9e-y.m.d

---------

Co-authored-by: wdk <wdk_cc@163.com>
2024-01-04 19:23:05 +08:00
Yening Qin
5d6ca183be fix build event (#1817) 2024-01-04 16:49:07 +08:00
ning
0f937ad6d0 refactor: event trigger 2023-12-28 17:48:33 +08:00
ning
ab38f220f7 feat:add my alert rule callbacks api 2023-12-27 17:45:29 +08:00
Yening Qin
a8c0b3bfd5 refactor: optimize oidc get user info (#1811)
* update oidc
2023-12-27 16:28:27 +08:00
shardingHe
5d1629bf0b docs: add oracle alert rule template (#1806)
Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2023-12-27 16:13:21 +08:00
ning
da7fa40c70 refactor: extract ident 2023-12-27 10:59:49 +08:00
ning
f1f0ee193f refactor: update var config api 2023-12-26 19:34:52 +08:00
ning
deccccead0 refactor: add target GetTagsMap() 2023-12-25 11:59:58 +08:00
ning
47b4464ad8 update migrate.sql 2023-12-22 17:31:05 +08:00
liooooo
3cf4a2edc1 fix:error variable incorrect invocation,it needs decryptErr but got err,it will cause error message loss and while err == nil & decryptErr != nil the program will panic (#1802) 2023-12-21 23:17:12 +08:00
Yening Qin
350f3a66dd feat: host support extra meta (#1804) 2023-12-21 23:14:30 +08:00
dependabot[bot]
f8edcabb05 build(deps): bump golang.org/x/crypto from 0.14.0 to 0.17.0 (#1798)
Bumps [golang.org/x/crypto](https://github.com/golang/crypto) from 0.14.0 to 0.17.0.
- [Commits](https://github.com/golang/crypto/compare/v0.14.0...v0.17.0)

---
updated-dependencies:
- dependency-name: golang.org/x/crypto
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-12-21 14:18:58 +08:00
ning
00cafc613d add user service api 2023-12-20 21:01:38 +08:00
Yening Qin
8c614dc8a1 add /targets/bind perm & fix panic (#1801)
* fix panic

* add /targets/bind
2023-12-19 20:50:30 +08:00
ning
216c9d8852 feat: add single alert rule service api 2023-12-14 20:15:38 +08:00
ning
741e3eb89b fix: build 2023-12-14 15:18:04 +08:00
dependabot[bot]
bc06684694 build(deps): bump github.com/mojocn/base64Captcha from 1.3.5 to 1.3.6 (#1793)
Bumps [github.com/mojocn/base64Captcha](https://github.com/mojocn/base64Captcha) from 1.3.5 to 1.3.6.
- [Release notes](https://github.com/mojocn/base64Captcha/releases)
- [Commits](https://github.com/mojocn/base64Captcha/compare/v1.3.5...v1.3.6)

---
updated-dependencies:
- dependency-name: github.com/mojocn/base64Captcha
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-12-14 12:53:06 +08:00
shardingHe
2539cb9c1a update canal dashboards (#1794)
Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2023-12-14 12:52:36 +08:00
ning
32dd3d5098 refactor: add event log 2023-12-08 16:21:58 +08:00
Yening Qin
b6cf382b86 feat: optimize tplx (#1789)
* optimize-tplx
2023-12-08 14:40:48 +08:00
ning
03d19a797c fix refresh event group name 2023-12-08 11:30:40 +08:00
ning
98cbc14039 code refactor 2023-12-08 11:21:48 +08:00
ning
248bb50b3e Merge branch 'main' of github.com:ccfos/nightingale 2023-12-08 11:13:25 +08:00
ning
01f1dcf93e optimize show annotations 2023-12-08 11:13:10 +08:00
Yening Qin
fdac82b8dc refactor: add more self metrics (#1788)
*  add stats metrics
2023-12-08 00:32:18 +08:00
shardingHe
0f926cb218 feat: Dashboards for SQL Server (#1781)
* remove defaultValue for datasource.

* add sqlserver dashboards

---------

Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2023-12-08 00:11:21 +08:00
ning
de35b61b52 refactor: aop log 2023-12-05 20:32:50 +08:00
ning
341aa3f070 update migrate 2023-12-05 11:08:11 +08:00
ning
f47254e72d fix: notify tpl put 2023-12-05 10:59:29 +08:00
Yening Qin
0b75d4d2ed feat: support drop timeseries by labels (#1784)
* feat: drop sample
2023-12-02 11:44:52 +08:00
Yening Qin
d204aa0cd4 refactor build noitfy tpl (#1783)
* refactor build tpl

* change notify api
2023-12-02 11:19:34 +08:00
Yening Qin
4f6584a41d refactor: extract Ident from timeseries (#1782) 2023-12-01 16:37:03 +08:00
ning
8f8f24ccfe refactor: change sql-template api 2023-11-30 16:57:42 +08:00
ning
0f2257b8bb Merge branch 'main' of github.com:ccfos/nightingale 2023-11-30 12:06:59 +08:00
ning
8bd99f13c1 docs: change i18n 2023-11-30 12:06:41 +08:00
shardingHe
f8deb89592 feat: add gorm logger (#1768)
* add gorm logger

* add gorm logger implement, slow log save to warning-level file

* optimize gorm logger

* optimize code

---------

Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2023-11-30 12:02:23 +08:00
Yening Qin
701407581b feat: support multi resources get (#1780)
* get alert rules by gids

* add perm check

* code refactor

* add tasks api

* code refactor

* add site-info api

* update targets api

* add /site-settings
2023-11-28 15:37:40 +08:00
ning
ba2ee05bc0 docs: rename clone board name 2023-11-23 20:38:12 +08:00
ning
c6e649129e refactor: role ops api 2023-11-22 14:49:10 +08:00
Yening Qin
329249ea99 refactor: ops i18n (#1778)
* ops i18n

* update gomod

* code refactor
2023-11-22 13:57:30 +08:00
Yening Qin
65d8a30396 refactor: datasource api (#1777)
* ds refactor
2023-11-20 23:28:15 +08:00
ning
e29a45c4a3 refactor: update role_operation 2023-11-17 17:13:44 +08:00
Yening Qin
438078cdc5 target add agent version (#1776)
* add agent-version
2023-11-17 16:57:56 +08:00
ning
ae07ba7523 docs: update default cas config 2023-11-15 12:14:06 +08:00
ning
f201b12dd8 Merge branch 'main' of github.com:ccfos/nightingale 2023-11-15 11:58:36 +08:00
ning
ee5322f406 change alert_rule callbacks length 2023-11-15 11:58:21 +08:00
Yening Qin
60a2e0c963 feat: writer support mult addrs (#1775)
* support mult write
2023-11-15 10:56:13 +08:00
Ulric Qin
2b55ed9b46 code refactor 2023-11-15 10:33:38 +08:00
ning
68eb7cb57e docs: update migrate sql 2023-11-13 17:54:33 +08:00
ning
6387b601b1 docs: add auto migrate sql 2023-11-13 17:52:10 +08:00
ning
af58fa8802 code refactor 2023-11-13 17:40:31 +08:00
ning
80daea5744 Merge branch 'main' of github.com:ccfos/nightingale 2023-11-13 17:33:38 +08:00
ning
bf9a471484 docs: add auto migrate sql 2023-11-13 17:31:56 +08:00
shardingHe
195ed9761c docs: built-in dashboards remove defaultValue for datasource (#1773)
Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2023-11-13 14:31:13 +08:00
Ulric Qin
fdc0123681 update logo 2023-11-11 22:21:15 +08:00
ning
6fd75ae552 Merge branch 'main' of github.com:ccfos/nightingale 2023-11-10 14:42:31 +08:00
ning
10c462a477 fix: insert ibex-settings perm point 2023-11-10 14:42:18 +08:00
Ulric Qin
694c43292a add api: /api/n9e/user/busi-groups 2023-11-09 20:07:30 +08:00
ning
cfa78dc9e2 feat: alert_subscribe support note and disabled 2023-11-09 00:15:36 +08:00
ning
cc80f5b685 fix: add ibex-settings perm point 2023-11-08 19:08:58 +08:00
ning
58f4a11669 refactor: datasource update 2023-11-08 18:17:22 +08:00
Yening Qin
4f57624a67 refactor: timeteries handle hook (#1772)
* refactor:  timeteries handle hook
2023-11-08 15:11:39 +08:00
Yening Qin
9558520dcd feat: host filter support * (#1769) 2023-11-07 11:19:18 +08:00
dependabot[bot]
8ded3623a4 build(deps): bump golang.org/x/image from 0.5.0 to 0.10.0 (#1767)
Bumps [golang.org/x/image](https://github.com/golang/image) from 0.5.0 to 0.10.0.
- [Commits](https://github.com/golang/image/compare/v0.5.0...v0.10.0)

---
updated-dependencies:
- dependency-name: golang.org/x/image
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-11-07 11:12:54 +08:00
ning
12fcca2faf docs: change integrations 2023-11-03 15:32:27 +08:00
xtan
9dc20fc674 fix: fix windows dashboard (#1762) 2023-10-27 19:32:23 +08:00
Lars Lehtonen
16430550d1 models: fix clobbered error (#1764) 2023-10-27 19:32:07 +08:00
ning
b34b66785d change notify api perm 2023-10-27 19:22:13 +08:00
ning
2cf38b6027 docs: update built-in linux dashboard 2023-10-27 19:08:46 +08:00
ning
e1b4edaa68 refactor: user-variable-configs api 2023-10-27 18:58:22 +08:00
ning
97f3f70d57 refactor perm api 2023-10-27 18:13:04 +08:00
ning
cee0ce6620 code refactor 2023-10-27 17:40:11 +08:00
ning
89b659695f add ops 2023-10-27 17:35:00 +08:00
Yening Qin
d52848ab1b feat: http support print body log (#1757)
* add log
2023-10-24 11:37:34 +08:00
shardingHe
314a8d71ef Fix:use text template to replace data (#1756)
* add text/template func, avoid escape issues caused by special characters

* rename the function of template

* change use of template. ReplaceTemplateUseHtml replaced to ReplaceTemplateUseText

* change use of template. ReplaceTemplateUseHtml replaced to ReplaceTemplateUseText

---------

Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2023-10-23 17:22:34 +08:00
shardingHe
bfa85cd8f1 fix: add func for tplx, avoid escape issues caused by special characters (#1755)
* add text/template func, avoid escape issues caused by special characters

* rename the function of template

---------

Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2023-10-23 16:35:08 +08:00
shardingHe
2254cb1f87 fix: move dashboards to the right place (#1753)
* move dashboards to the right place

* move dashboards to the right place

---------

Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2023-10-21 11:49:48 +08:00
551 changed files with 124847 additions and 89478 deletions

View File

@@ -1,67 +0,0 @@
name: Bug Report
description: Report a bug encountered while running Nightingale
labels: ["kind/bug"]
body:
- type: markdown
attributes:
value: |
Thanks for taking time to fill out this bug report!
The more detailed the form is filled in, the easier the problem will be solved.
- type: textarea
id: config
attributes:
label: Relevant server.conf | webapi.conf
description: Place config in the toml code section. This will be automatically formatted into toml, so no need for backticks.
render: toml
validations:
required: true
- type: textarea
id: logs
attributes:
label: Relevant logs
description: categraf | telegraf | server | webapi | prometheus | chrome request/response ...
render: text
validations:
required: true
- type: input
id: system-info
attributes:
label: System info
description: Include nightingale version, operating system, and other relevant details
placeholder: ex. n9e 5.9.2, n9e-fe 5.5.0, categraf 0.1.0, Ubuntu 20.04, Docker 20.10.8
validations:
required: true
- type: textarea
id: reproduce
attributes:
label: Steps to reproduce
description: Describe the steps to reproduce the bug.
value: |
1.
2.
3.
...
validations:
required: true
- type: textarea
id: expected-behavior
attributes:
label: Expected behavior
description: Describe what you expected to happen when you performed the above steps.
validations:
required: true
- type: textarea
id: actual-behavior
attributes:
label: Actual behavior
description: Describe what actually happened when you performed the above steps.
validations:
required: true
- type: textarea
id: additional-info
attributes:
label: Additional info
description: Include gist of relevant config, logs, etc.
validations:
required: false

33
.github/ISSUE_TEMPLATE/question.yml vendored Normal file
View File

@@ -0,0 +1,33 @@
name: Bug Report & Usage Question
description: Reporting a bug or asking a question about how to use Nightingale
labels: []
body:
- type: markdown
attributes:
value: |
The more detailed the form is filled in, the easier the problem will be solved.
提供的信息越详细,问题解决的可能性就越大。另外, 提问之前请先搜索历史 issue (包括 close 的), 以免重复提问。
- type: textarea
id: question
attributes:
label: Question and Steps to reproduce
description: Describe your question and steps to reproduce the bug. 描述问题以及复现步骤
validations:
required: true
- type: textarea
id: logs
attributes:
label: Relevant logs and configurations
description: Relevant logs and configurations. 报错日志([查看方法](https://flashcat.cloud/docs/content/flashcat-monitor/nightingale-v6/faq/how-to-check-logs/))以及各个相关组件的配置信息
render: text
validations:
required: true
- type: textarea
id: system-info
attributes:
label: Version
description: Include nightingale version, operating system, and other relevant details. 请告知夜莺的版本、操作系统的版本、CPU架构等信息
validations:
required: true

View File

@@ -26,7 +26,8 @@ jobs:
- name: Run GoReleaser
uses: goreleaser/goreleaser-action@v3
with:
version: latest
distribution: goreleaser
version: '~> v1'
args: release --rm-dist
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

2
.gitignore vendored
View File

@@ -46,6 +46,8 @@ _test
/docker/n9e
/docker/compose-bridge/mysqldata
/docker/compose-host-network/mysqldata
/docker/compose-host-network-metric-log/mysqldata
/docker/compose-host-network-metric-log/n9e-logs
/docker/compose-postgres/pgdata
/etc.local*
/front/statik/statik.go

134
README.md
View File

@@ -1,104 +1,110 @@
<p align="center">
<a href="https://github.com/ccfos/nightingale">
<img src="doc/img/nightingale_logo_h.png" alt="nightingale - cloud native monitoring" width="240" /></a>
<img src="doc/img/Nightingale_L_V.png" alt="nightingale - cloud native monitoring" width="100" /></a>
</p>
<p align="center">
<b>开源告警管理专家 一体化的可观测平台</b>
</p>
<p align="center">
<img alt="GitHub latest release" src="https://img.shields.io/github/v/release/ccfos/nightingale"/>
<a href="https://n9e.github.io">
<a href="https://flashcat.cloud/docs/">
<img alt="Docs" src="https://img.shields.io/badge/docs-get%20started-brightgreen"/></a>
<a href="https://hub.docker.com/u/flashcatcloud">
<img alt="Docker pulls" src="https://img.shields.io/docker/pulls/flashcatcloud/nightingale"/></a>
<img alt="GitHub Repo stars" src="https://img.shields.io/github/stars/ccfos/nightingale">
<img alt="GitHub Repo issues" src="https://img.shields.io/github/issues/ccfos/nightingale">
<img alt="GitHub Repo issues closed" src="https://img.shields.io/github/issues-closed/ccfos/nightingale">
<img alt="GitHub forks" src="https://img.shields.io/github/forks/ccfos/nightingale">
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
<img alt="GitHub contributors" src="https://img.shields.io/github/contributors-anon/ccfos/nightingale"/></a>
<img alt="GitHub Repo stars" src="https://img.shields.io/github/stars/ccfos/nightingale">
<img alt="GitHub forks" src="https://img.shields.io/github/forks/ccfos/nightingale">
<br/><img alt="GitHub Repo issues" src="https://img.shields.io/github/issues/ccfos/nightingale">
<img alt="GitHub Repo issues closed" src="https://img.shields.io/github/issues-closed/ccfos/nightingale">
<img alt="GitHub latest release" src="https://img.shields.io/github/v/release/ccfos/nightingale"/>
<img alt="License" src="https://img.shields.io/badge/license-Apache--2.0-blue"/>
<a href="https://n9e-talk.slack.com/">
<img alt="GitHub contributors" src="https://img.shields.io/badge/join%20slack-%23n9e-brightgreen.svg"/></a>
<img alt="License" src="https://img.shields.io/badge/license-Apache--2.0-blue"/>
</p>
<p align="center">
An open-source cloud-native monitoring system that is <b>all-in-one</b> <br/>
<b>Out-of-the-box</b>, it integrates data collection, visualization, and monitoring alert <br/>
We recommend upgrading your <b>Prometheus + AlertManager + Grafana</b> combination to Nightingale!
</p>
[English](./README.md) | [中文](./README_zh.md)
## Highlighted Features
[English](./README_en.md) | [中文](./README.md)
- **Out-of-the-box**
- Supports multiple deployment methods such as **Docker, Helm Chart, and cloud services**, integrates data collection, monitoring, and alerting into one system, and comes with various monitoring dashboards, quick views, and alert rule templates. **It greatly reduces the construction cost, learning cost, and usage cost of cloud-native monitoring systems**.
- **Professional Alerting**
- Provides visual alert configuration and management, supports various alert rules, offers the ability to configure silence and subscription rules, supports multiple alert delivery channels, and has features such as alert self-healing and event management.
- **Cloud-Native**
- Quickly builds an enterprise-level cloud-native monitoring system through a turnkey approach, supports multiple collectors such as [Categraf](https://github.com/flashcatcloud/categraf), Telegraf, and Grafana-agent, supports multiple data sources such as Prometheus, VictoriaMetrics, M3DB, ElasticSearch, and Jaeger, and is compatible with importing Grafana dashboards. **It seamlessly integrates with the cloud-native ecosystem**.
- **High Performance and High Availability**
- Due to the multi-data-source management engine of Nightingale and its excellent architecture design, and utilizing a high-performance time-series database, it can handle data collection, storage, and alert analysis scenarios with billions of time-series data, saving a lot of costs.
- Nightingale components can be horizontally scaled with no single point of failure. It has been deployed in thousands of enterprises and tested in harsh production practices. Many leading Internet companies have used Nightingale for cluster machines with hundreds of nodes, processing billions of time-series data.
- **Flexible Extension and Centralized Management**
- Nightingale can be deployed on a 1-core 1G cloud host, deployed in a cluster of hundreds of machines, or run in Kubernetes. Time-series databases, alert engines, and other components can also be decentralized to various data centers and regions, balancing edge deployment with centralized management. **It solves the problem of data fragmentation and lack of unified views**.
## 夜莺 Nightingale 是什么
夜莺监控是一款开源云原生观测分析工具,采用 All-in-One 的设计理念,集数据采集、可视化、监控告警、数据分析于一体,与云原生生态紧密集成,提供开箱即用的企业级监控分析和告警能力。夜莺于 2020 年 3 月 20 日,在 github 上发布 v1 版本,已累计迭代 100 多个版本。
夜莺最初由滴滴开发和开源,并于 2022 年 5 月 11 日捐赠予中国计算机学会开源发展委员会CCF ODC为 CCF ODC 成立后接受捐赠的第一个开源项目。夜莺的核心研发团队,也是 Open-Falcon 项目原核心研发人员,从 2014 年Open-Falcon 是 2014 年开源)算起来,也有 10 年了,只为把监控这个事情做好。
#### If you are using Prometheus and have one or more of the following requirement scenarios, it is recommended that you upgrade to Nightingale:
## 快速开始
- 👉[文档中心](https://flashcat.cloud/docs/) | [下载中心](https://flashcat.cloud/download/nightingale/)
- ❤️[报告 Bug](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=&projects=&template=question.yml)
- ℹ️为了提供更快速的访问体验,上述文档和下载站点托管于 [FlashcatCloud](https://flashcat.cloud)
- Multiple systems such as Prometheus, Alertmanager, Grafana, etc. are fragmented and lack a unified view and cannot be used out of the box;
- The way to manage Prometheus and Alertmanager by modifying configuration files has a big learning curve and is difficult to collaborate;
- Too much data to scale-up your Prometheus cluster;
- Multiple Prometheus clusters running in production environments, which faced high management and usage costs;
## 功能特点
#### If you are using Zabbix and have the following scenarios, it is recommended that you upgrade to Nightingale:
- Monitoring too much data and wanting a better scalable solution;
- A high learning curve and a desire for better efficiency of collaborative use in a multi-person, multi-team model;
- Microservice and cloud-native architectures with variable monitoring data lifecycles and high monitoring data dimension bases, which are not easily adaptable to the Zabbix data model;
- 对接多种时序库:支持对接 Prometheus、VictoriaMetrics、Thanos、Mimir、M3DB、TDengine 等多种时序库,实现统一告警管理。
- 专业告警能力:内置支持多种告警规则,可以扩展支持常见通知媒介,支持告警屏蔽/抑制/订阅/自愈、告警事件管理。
- 高性能可视化引擎:支持多种图表样式,内置众多 Dashboard 模版,也可导入 Grafana 模版,开箱即用,开源协议商业友好。
- 支持常见采集器:支持 [Categraf](https://flashcat.cloud/product/categraf)、Telegraf、Grafana-agent、Datadog-agent、各种 Exporter 作为采集器,没有什么数据是不能监控的。
- 👀无缝搭配 [Flashduty](https://flashcat.cloud/product/flashcat-duty/)实现告警聚合收敛、认领、升级、排班、IM集成确保告警处理不遗漏减少打扰高效协同。
#### If you are using [open-falcon](https://github.com/open-falcon/falcon-plus), we recommend you to upgrade to Nightingale
- For more information about open-falcon and Nightingale, please refer to read [Ten features and trends of cloud-native monitoring](https://mp.weixin.qq.com/s?__biz=MzkzNjI5OTM5Nw==&mid=2247483738&idx=1&sn=e8bdbb974a2cd003c1abcc2b5405dd18&chksm=c2a19fb0f5d616a63185cd79277a79a6b80118ef2185890d0683d2bb20451bd9303c78d083c5#rd)。
## 截图演示
## Getting Started
即时查询,类似 Prometheus 内置的查询分析页面,做 ad-hoc 查询,夜莺做了一些 UI 优化,同时提供了一些内置 promql 指标,让不太了解 promql 的用户也可以快速查询。
[https://n9e.github.io/](https://n9e.github.io/)
![即时查询](https://download.flashcat.cloud/ulric/20240513103305.png)
## Screenshots
当然,也可以直接通过指标视图查看,有了指标视图,即时查询基本可以不用了,或者只有高端玩家使用即时查询,普通用户直接通过指标视图查询即可。
https://user-images.githubusercontent.com/792850/216888712-2565fcea-9df5-47bd-a49e-d60af9bd76e8.mp4
![指标视图](https://download.flashcat.cloud/ulric/20240513103530.png)
## Architecture
夜莺内置了常用仪表盘,可以直接导入使用。也可以导入 Grafana 仪表盘,不过只能兼容 Grafana 基本图表,如果已经习惯了 Grafana 建议继续使用 Grafana 看图,把夜莺作为一个告警引擎使用。
<img src="doc/img/arch-product.png" width="600">
![内置仪表盘](https://download.flashcat.cloud/ulric/20240513103628.png)
Nightingale monitoring can receive monitoring data reported by various collectors (such as [Categraf](https://github.com/flashcatcloud/categraf) , telegraf, grafana-agent, Prometheus, etc.) and write them to various popular time-series databases (such as Prometheus, M3DB, VictoriaMetrics, Thanos, TDEngine, etc.). It provides configuration capabilities for alert rules, silence rules, and subscription rules, as well as the ability to view monitoring data. It also provides automatic alarm self-healing mechanisms (such as automatically calling back to a webhook address or executing a script after an alarm is triggered), and the ability to store and manage historical alarm events and view them in groups.
除了内置的仪表盘,也内置了很多告警规则,开箱即用。
If the performance of a standalone time-series database (such as Prometheus) has bottlenecks or poor disaster recovery, we recommend using [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics). The VictoriaMetrics architecture is relatively simple, has excellent performance, and is easy to deploy and maintain. The architecture diagram is as shown above. For more detailed documentation on VictoriaMetrics, please refer to its [official website](https://victoriametrics.com/).
**We welcome you to participate in the Nightingale open-source project and community in various ways, including but not limited to**
- Adding and improving documentation => [n9e.github.io](https://n9e.github.io/)
- Sharing your best practices and experience in using Nightingale monitoring => [Article sharing]((https://n9e.github.io/docs/prologue/share/))
- Submitting product suggestions => [github issue](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Ffeature&template=enhancement.md)
- Submitting code to make Nightingale monitoring faster, more stable, and easier to use => [github pull request](https://github.com/didi/nightingale/pulls)
![内置告警规则](https://download.flashcat.cloud/ulric/20240513103825.png)
**Respecting, recognizing, and recording the work of every contributor** is the first guiding principle of the Nightingale open-source community. We advocate effective questioning, which not only respects the developer's time but also contributes to the accumulation of knowledge in the entire community
- Before asking a question, please first refer to the [FAQ](https://www.gitlink.org.cn/ccfos/nightingale/wiki/faq)
- We use [GitHub Discussions](https://github.com/ccfos/nightingale/discussions) as the communication forum. You can search and ask questions here.
- We also recommend that you join ours [Slack channel](https://n9e-talk.slack.com/) to exchange experiences with other Nightingale users.
## 产品架构
社区使用夜莺最多的场景就是使用夜莺做告警引擎,对接多套时序库,统一告警规则管理。绘图仍然使用 Grafana 居多。作为一个告警引擎,夜莺的产品架构如下:
![产品架构](https://download.flashcat.cloud/ulric/20240221152601.png)
对于个别边缘机房,如果和中心夜莺服务端网络链路不好,希望提升告警可用性,我们也提供边缘机房告警引擎下沉部署模式,这个模式下,即便网络割裂,告警功能也不受影响。
![边缘部署模式](https://download.flashcat.cloud/ulric/20240222102119.png)
## 近期计划
- [ ] 仪表盘:支持内嵌 Grafana
- [ ] 告警规则:通知时支持配置过滤标签,避免告警事件中一堆不重要的标签
- [x] 告警规则:支持配置恢复时的 Promql告警恢复通知也可以带上恢复时的值了
- [ ] 机器管理自定义标签拆分管理agent 自动上报的标签和用户在页面自定义的标签分开管理,对于 agent 自动上报的标签,以 agent 为准,直接覆盖服务端 DB 中的数据
- [ ] 机器管理:机器支持角色字段,即无头标签,用于描述混部场景
- [ ] 机器管理:把业务组的 busigroup 标签迁移到机器的属性里,让机器支持挂到多个业务组
- [ ] 告警规则:增加 Host Metrics 类别,支持按照业务组、角色、标签等筛选机器,规则 promql 支持变量,支持在机器颗粒度配置变量值
- [ ] 告警通知:重构整个通知逻辑,引入事件处理的 pipeline支持对告警事件做自定义处理和灵活分派
## 交流渠道
- 报告Bug优先推荐提交[夜莺GitHub Issue](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Fbug&projects=&template=bug_report.yml)
- 推荐完整浏览[夜莺文档站点](https://flashcat.cloud/docs/content/flashcat-monitor/nightingale-v7/introduction/),了解更多信息
- 推荐搜索关注夜莺公众号,第一时间获取社区动态:`夜莺监控Nightingale`
- 日常答疑、技术分享、用户之间的交流,统一使用知识星球,大伙可以免费加入交流,[入口在这里](https://download.flashcat.cloud/ulric/20240319095409.png)
## 广受关注
[![Stargazers over time](https://api.star-history.com/svg?repos=ccfos/nightingale&type=Date)](https://star-history.com/#ccfos/nightingale&Date)
## Who is using Nightingale
You can register your usage and share your experience by posting on **[Who is Using Nightingale](https://github.com/ccfos/nightingale/issues/897)**.
## Stargazers over time
[![Stargazers over time](https://starchart.cc/ccfos/nightingale.svg)](https://starchart.cc/ccfos/nightingale)
## Contributors
## 社区共建
- ❇️请阅读浏览[夜莺开源项目和社区治理架构草案](./doc/community-governance.md),真诚欢迎每一位用户、开发者、公司以及组织,使用夜莺监控、积极反馈 Bug、提交功能需求、分享最佳实践共建专业、活跃的夜莺开源社区。
- 夜莺贡献者❤️
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
<img src="https://contrib.rocks/image?repo=ccfos/nightingale" />
</a>
## License
[Apache License V2.0](https://github.com/didi/nightingale/blob/main/LICENSE)
- [Apache License V2.0](https://github.com/didi/nightingale/blob/main/LICENSE)

104
README_en.md Normal file
View File

@@ -0,0 +1,104 @@
<p align="center">
<a href="https://github.com/ccfos/nightingale">
<img src="doc/img/Nightingale_L_V.png" alt="nightingale - cloud native monitoring" width="240" /></a>
</p>
<p align="center">
<img alt="GitHub latest release" src="https://img.shields.io/github/v/release/ccfos/nightingale"/>
<a href="https://n9e.github.io">
<img alt="Docs" src="https://img.shields.io/badge/docs-get%20started-brightgreen"/></a>
<a href="https://hub.docker.com/u/flashcatcloud">
<img alt="Docker pulls" src="https://img.shields.io/docker/pulls/flashcatcloud/nightingale"/></a>
<img alt="GitHub Repo stars" src="https://img.shields.io/github/stars/ccfos/nightingale">
<img alt="GitHub Repo issues" src="https://img.shields.io/github/issues/ccfos/nightingale">
<img alt="GitHub Repo issues closed" src="https://img.shields.io/github/issues-closed/ccfos/nightingale">
<img alt="GitHub forks" src="https://img.shields.io/github/forks/ccfos/nightingale">
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
<img alt="GitHub contributors" src="https://img.shields.io/github/contributors-anon/ccfos/nightingale"/></a>
<a href="https://n9e-talk.slack.com/">
<img alt="GitHub contributors" src="https://img.shields.io/badge/join%20slack-%23n9e-brightgreen.svg"/></a>
<img alt="License" src="https://img.shields.io/badge/license-Apache--2.0-blue"/>
</p>
<p align="center">
An open-source cloud-native monitoring system that is <b>all-in-one</b> <br/>
<b>Out-of-the-box</b>, it integrates data collection, visualization, and monitoring alert <br/>
We recommend upgrading your <b>Prometheus + AlertManager + Grafana</b> combination to Nightingale!
</p>
[English](./README_en.md) | [中文](./README.md)
## Highlighted Features
- **Out-of-the-box**
- Supports multiple deployment methods such as **Docker, Helm Chart, and cloud services**, integrates data collection, monitoring, and alerting into one system, and comes with various monitoring dashboards, quick views, and alert rule templates. **It greatly reduces the construction cost, learning cost, and usage cost of cloud-native monitoring systems**.
- **Professional Alerting**
- Provides visual alert configuration and management, supports various alert rules, offers the ability to configure silence and subscription rules, supports multiple alert delivery channels, and has features such as alert self-healing and event management.
- **Cloud-Native**
- Quickly builds an enterprise-level cloud-native monitoring system through a turnkey approach, supports multiple collectors such as [Categraf](https://github.com/flashcatcloud/categraf), Telegraf, and Grafana-agent, supports multiple data sources such as Prometheus, VictoriaMetrics, M3DB, ElasticSearch, and Jaeger, and is compatible with importing Grafana dashboards. **It seamlessly integrates with the cloud-native ecosystem**.
- **High Performance and High Availability**
- Due to the multi-data-source management engine of Nightingale and its excellent architecture design, and utilizing a high-performance time-series database, it can handle data collection, storage, and alert analysis scenarios with billions of time-series data, saving a lot of costs.
- Nightingale components can be horizontally scaled with no single point of failure. It has been deployed in thousands of enterprises and tested in harsh production practices. Many leading Internet companies have used Nightingale for cluster machines with hundreds of nodes, processing billions of time-series data.
- **Flexible Extension and Centralized Management**
- Nightingale can be deployed on a 1-core 1G cloud host, deployed in a cluster of hundreds of machines, or run in Kubernetes. Time-series databases, alert engines, and other components can also be decentralized to various data centers and regions, balancing edge deployment with centralized management. **It solves the problem of data fragmentation and lack of unified views**.
#### If you are using Prometheus and have one or more of the following requirement scenarios, it is recommended that you upgrade to Nightingale:
- Multiple systems such as Prometheus, Alertmanager, Grafana, etc. are fragmented and lack a unified view and cannot be used out of the box;
- The way to manage Prometheus and Alertmanager by modifying configuration files has a big learning curve and is difficult to collaborate;
- Too much data to scale-up your Prometheus cluster;
- Multiple Prometheus clusters running in production environments, which faced high management and usage costs;
#### If you are using Zabbix and have the following scenarios, it is recommended that you upgrade to Nightingale:
- Monitoring too much data and wanting a better scalable solution;
- A high learning curve and a desire for better efficiency of collaborative use in a multi-person, multi-team model;
- Microservice and cloud-native architectures with variable monitoring data lifecycles and high monitoring data dimension bases, which are not easily adaptable to the Zabbix data model;
#### If you are using [open-falcon](https://github.com/open-falcon/falcon-plus), we recommend you to upgrade to Nightingale
- For more information about open-falcon and Nightingale, please refer to read [Ten features and trends of cloud-native monitoring](https://mp.weixin.qq.com/s?__biz=MzkzNjI5OTM5Nw==&mid=2247483738&idx=1&sn=e8bdbb974a2cd003c1abcc2b5405dd18&chksm=c2a19fb0f5d616a63185cd79277a79a6b80118ef2185890d0683d2bb20451bd9303c78d083c5#rd)。
## Getting Started
[https://n9e.github.io/](https://n9e.github.io/)
## Screenshots
https://user-images.githubusercontent.com/792850/216888712-2565fcea-9df5-47bd-a49e-d60af9bd76e8.mp4
## Architecture
<img src="doc/img/arch-product.png" width="600">
Nightingale monitoring can receive monitoring data reported by various collectors (such as [Categraf](https://github.com/flashcatcloud/categraf) , telegraf, grafana-agent, Prometheus, etc.) and write them to various popular time-series databases (such as Prometheus, M3DB, VictoriaMetrics, Thanos, TDEngine, etc.). It provides configuration capabilities for alert rules, silence rules, and subscription rules, as well as the ability to view monitoring data. It also provides automatic alarm self-healing mechanisms (such as automatically calling back to a webhook address or executing a script after an alarm is triggered), and the ability to store and manage historical alarm events and view them in groups.
If the performance of a standalone time-series database (such as Prometheus) has bottlenecks or poor disaster recovery, we recommend using [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics). The VictoriaMetrics architecture is relatively simple, has excellent performance, and is easy to deploy and maintain. The architecture diagram is as shown above. For more detailed documentation on VictoriaMetrics, please refer to its [official website](https://victoriametrics.com/).
**We welcome you to participate in the Nightingale open-source project and community in various ways, including but not limited to**
- Adding and improving documentation => [n9e.github.io](https://n9e.github.io/)
- Sharing your best practices and experience in using Nightingale monitoring => [Article sharing]((https://n9e.github.io/docs/prologue/share/))
- Submitting product suggestions => [github issue](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Ffeature&template=enhancement.md)
- Submitting code to make Nightingale monitoring faster, more stable, and easier to use => [github pull request](https://github.com/didi/nightingale/pulls)
**Respecting, recognizing, and recording the work of every contributor** is the first guiding principle of the Nightingale open-source community. We advocate effective questioning, which not only respects the developer's time but also contributes to the accumulation of knowledge in the entire community
- Before asking a question, please first refer to the [FAQ](https://www.gitlink.org.cn/ccfos/nightingale/wiki/faq)
- We use [GitHub Discussions](https://github.com/ccfos/nightingale/discussions) as the communication forum. You can search and ask questions here.
- We also recommend that you join ours [Slack channel](https://n9e-talk.slack.com/) to exchange experiences with other Nightingale users.
## Who is using Nightingale
You can register your usage and share your experience by posting on **[Who is Using Nightingale](https://github.com/ccfos/nightingale/issues/897)**.
## Stargazers over time
[![Stargazers over time](https://starchart.cc/ccfos/nightingale.svg)](https://starchart.cc/ccfos/nightingale)
## Contributors
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
<img src="https://contrib.rocks/image?repo=ccfos/nightingale" />
</a>
## License
[Apache License V2.0](https://github.com/didi/nightingale/blob/main/LICENSE)

View File

@@ -1,74 +0,0 @@
<p align="center">
<a href="https://github.com/ccfos/nightingale">
<img src="doc/img/nightingale_logo_h.png" alt="nightingale - cloud native monitoring" width="240" /></a>
</p>
<p align="center">
<a href="https://flashcat.cloud/docs/">
<img alt="Docs" src="https://img.shields.io/badge/docs-get%20started-brightgreen"/></a>
<a href="https://hub.docker.com/u/flashcatcloud">
<img alt="Docker pulls" src="https://img.shields.io/docker/pulls/flashcatcloud/nightingale"/></a>
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
<img alt="GitHub contributors" src="https://img.shields.io/github/contributors-anon/ccfos/nightingale"/></a>
<img alt="GitHub Repo stars" src="https://img.shields.io/github/stars/ccfos/nightingale">
<br/><img alt="GitHub Repo issues" src="https://img.shields.io/github/issues/ccfos/nightingale">
<img alt="GitHub Repo issues closed" src="https://img.shields.io/github/issues-closed/ccfos/nightingale">
<img alt="GitHub forks" src="https://img.shields.io/github/forks/ccfos/nightingale">
<img alt="GitHub latest release" src="https://img.shields.io/github/v/release/ccfos/nightingale"/>
<img alt="License" src="https://img.shields.io/badge/license-Apache--2.0-blue"/>
<a href="https://n9e-talk.slack.com/">
<img alt="GitHub contributors" src="https://img.shields.io/badge/join%20slack-%23n9e-brightgreen.svg"/></a>
</p>
<p align="center">
告警管理专家,一体化的开源可观测平台
</p>
[English](./README.md) | [中文](./README_zh.md)
夜莺Nightingale是中国计算机学会托管的开源云原生可观测工具最早由滴滴于 2020 年孵化并开源,并于 2022 年正式捐赠予中国计算机学会。夜莺采用 All-in-One 的设计理念,集数据采集、可视化、监控告警、数据分析于一体,与云原生生态紧密集成,融入了顶级互联网公司可观测性最佳实践,沉淀了众多社区专家经验,开箱即用。
## 资料
- 文档:[flashcat.cloud/docs](https://flashcat.cloud/docs/)
- 提问:[answer.flashcat.cloud](https://answer.flashcat.cloud/)
- 报Bug[github.com/ccfos/nightingale/issues](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Fbug&projects=&template=bug_report.yml)
## 功能和特点
- 统一接入各种时序库:支持对接 Prometheus、VictoriaMetrics、Thanos、Mimir、M3DB 等多种时序库,实现统一告警管理
- 专业告警能力:内置支持多种告警规则,可以扩展支持所有通知媒介,支持告警屏蔽、告警抑制、告警自愈、告警事件管理
- 高性能可视化引擎支持多种图表样式内置众多Dashboard模版也可导入Grafana模版开箱即用开源协议商业友好
- 无缝搭配 [Flashduty](https://flashcat.cloud/product/flashcat-duty/)实现告警聚合收敛、认领、升级、排班、IM集成确保告警处理不遗漏减少打扰更好协同
- 支持所有常见采集器:支持 [Categraf](https://flashcat.cloud/product/categraf)、telegraf、grafana-agent、datadog-agent、各种 exporter 作为采集器,没有什么数据是不能监控的
- 一体化观测平台:从 v6 版本开始,支持接入 ElasticSearch、Jaeger 数据源,实现日志、链路、指标多维度的统一可观测
## 产品演示
![演示](doc/img/n9e-screenshot-gif-v6.gif)
## 部署架构
![架构](doc/img/n9e-arch-latest.png)
## 加入交流群
欢迎加入 QQ 交流群群号479290895QQ 群适合群友互助,夜莺研发人员通常不在群里。如果要报 bug 请到[这里](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Fbug&projects=&template=bug_report.yml),提问到[这里](https://answer.flashcat.cloud/)。
## Stargazers over time
[![Stargazers over time](https://api.star-history.com/svg?repos=ccfos/nightingale&type=Date)](https://star-history.com/#ccfos/nightingale&Date)
## Contributors
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
<img src="https://contrib.rocks/image?repo=ccfos/nightingale" />
</a>
## 社区治理
[夜莺开源项目和社区治理架构(草案)](./doc/community-governance.md)
## License
[Apache License V2.0](https://github.com/didi/nightingale/blob/main/LICENSE)

View File

@@ -46,13 +46,6 @@ type RedisPub struct {
ChannelKey string
}
type Ibex struct {
Address string
BasicAuthUser string
BasicAuthPass string
Timeout int64
}
func (a *Alert) PreCheck(configDir string) {
if a.Alerting.TemplatesDir == "" {
a.Alerting.TemplatesDir = path.Join(configDir, "template")

View File

@@ -24,7 +24,10 @@ import (
"github.com/ccfos/nightingale/v6/prom"
"github.com/ccfos/nightingale/v6/pushgw/pconf"
"github.com/ccfos/nightingale/v6/pushgw/writer"
"github.com/ccfos/nightingale/v6/storage"
"github.com/ccfos/nightingale/v6/tdengine"
"github.com/flashcatcloud/ibex/src/cmd/ibex"
)
func Initialize(configDir string, cryptoKey string) (func(), error) {
@@ -40,11 +43,17 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
ctx := ctx.NewContext(context.Background(), nil, false, config.CenterApi)
var redis storage.Redis
redis, err = storage.NewRedis(config.Redis)
if err != nil {
return nil, err
}
syncStats := memsto.NewSyncStats()
alertStats := astats.NewSyncStats()
configCache := memsto.NewConfigCache(ctx, syncStats, nil, "")
targetCache := memsto.NewTargetCache(ctx, syncStats, nil)
targetCache := memsto.NewTargetCache(ctx, syncStats, redis)
busiGroupCache := memsto.NewBusiGroupCache(ctx, syncStats)
alertMuteCache := memsto.NewAlertMuteCache(ctx, syncStats)
alertRuleCache := memsto.NewAlertRuleCache(ctx, syncStats)
@@ -52,16 +61,22 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
dsCache := memsto.NewDatasourceCache(ctx, syncStats)
userCache := memsto.NewUserCache(ctx, syncStats)
userGroupCache := memsto.NewUserGroupCache(ctx, syncStats)
taskTplsCache := memsto.NewTaskTplCache(ctx)
promClients := prom.NewPromClient(ctx, config.Alert.Heartbeat)
promClients := prom.NewPromClient(ctx)
tdengineClients := tdengine.NewTdengineClient(ctx, config.Alert.Heartbeat)
externalProcessors := process.NewExternalProcessors()
Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache, alertRuleCache, notifyConfigCache, dsCache, ctx, promClients, tdengineClients, userCache, userGroupCache)
Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache, alertRuleCache, notifyConfigCache, taskTplsCache, dsCache, ctx, promClients, tdengineClients, userCache, userGroupCache)
r := httpx.GinEngine(config.Global.RunMode, config.HTTP)
rt := router.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors)
if config.Ibex.Enable {
ibex.ServerStart(false, nil, redis, config.HTTP.APIForService.BasicAuth, config.Alert.Heartbeat, &config.CenterApi, r, nil, config.Ibex, config.HTTP.Port)
}
rt.Config(r)
dumper.ConfigRouter(r)
@@ -74,26 +89,28 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
}
func Start(alertc aconf.Alert, pushgwc pconf.Pushgw, syncStats *memsto.Stats, alertStats *astats.Stats, externalProcessors *process.ExternalProcessorsType, targetCache *memsto.TargetCacheType, busiGroupCache *memsto.BusiGroupCacheType,
alertMuteCache *memsto.AlertMuteCacheType, alertRuleCache *memsto.AlertRuleCacheType, notifyConfigCache *memsto.NotifyConfigCacheType, datasourceCache *memsto.DatasourceCacheType, ctx *ctx.Context,
alertMuteCache *memsto.AlertMuteCacheType, alertRuleCache *memsto.AlertRuleCacheType, notifyConfigCache *memsto.NotifyConfigCacheType, taskTplsCache *memsto.TaskTplCache, datasourceCache *memsto.DatasourceCacheType, ctx *ctx.Context,
promClients *prom.PromClientMap, tdendgineClients *tdengine.TdengineClientMap, userCache *memsto.UserCacheType, userGroupCache *memsto.UserGroupCacheType) {
alertSubscribeCache := memsto.NewAlertSubscribeCache(ctx, syncStats)
recordingRuleCache := memsto.NewRecordingRuleCache(ctx, syncStats)
targetsOfAlertRulesCache := memsto.NewTargetOfAlertRuleCache(ctx, alertc.Heartbeat.EngineName, syncStats)
go models.InitNotifyConfig(ctx, alertc.Alerting.TemplatesDir)
naming := naming.NewNaming(ctx, alertc.Heartbeat)
naming := naming.NewNaming(ctx, alertc.Heartbeat, alertStats)
writers := writer.NewWriters(pushgwc)
record.NewScheduler(alertc, recordingRuleCache, promClients, writers, alertStats)
eval.NewScheduler(alertc, externalProcessors, alertRuleCache, targetCache, busiGroupCache, alertMuteCache, datasourceCache, promClients, tdendgineClients, naming, ctx, alertStats)
eval.NewScheduler(alertc, externalProcessors, alertRuleCache, targetCache, targetsOfAlertRulesCache,
busiGroupCache, alertMuteCache, datasourceCache, promClients, tdendgineClients, naming, ctx, alertStats)
dp := dispatch.NewDispatch(alertRuleCache, userCache, userGroupCache, alertSubscribeCache, targetCache, notifyConfigCache, alertc.Alerting, ctx, alertStats)
consumer := dispatch.NewConsumer(alertc.Alerting, ctx, dp)
dp := dispatch.NewDispatch(alertRuleCache, userCache, userGroupCache, alertSubscribeCache, targetCache, notifyConfigCache, taskTplsCache, alertc.Alerting, ctx, alertStats)
consumer := dispatch.NewConsumer(alertc.Alerting, ctx, dp, promClients)
go dp.ReloadTpls()
go consumer.LoopConsume()
go queue.ReportQueueSize(alertStats)
go sender.InitEmailSender(notifyConfigCache.GetSMTP())
go sender.InitEmailSender(notifyConfigCache)
}

View File

@@ -16,9 +16,13 @@ type Stats struct {
GaugeAlertQueueSize prometheus.Gauge
CounterRuleEval *prometheus.CounterVec
CounterQueryDataErrorTotal *prometheus.CounterVec
CounterQueryDataTotal *prometheus.CounterVec
CounterRecordEval *prometheus.CounterVec
CounterRecordEvalErrorTotal *prometheus.CounterVec
CounterMuteTotal *prometheus.CounterVec
CounterRuleEvalErrorTotal *prometheus.CounterVec
CounterHeartbeatErrorTotal *prometheus.CounterVec
CounterSubEventTotal *prometheus.CounterVec
}
func NewSyncStats() *Stats {
@@ -29,19 +33,40 @@ func NewSyncStats() *Stats {
Help: "Number of rule eval.",
}, []string{})
CounterRuleEvalErrorTotal := prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "rule_eval_error_total",
Help: "Number of rule eval error.",
}, []string{"datasource", "stage"})
CounterQueryDataErrorTotal := prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "query_data_error_total",
Help: "Number of rule eval query data error.",
}, []string{"datasource"})
CounterQueryDataTotal := prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "query_data_total",
Help: "Number of rule eval query data.",
}, []string{"datasource"})
CounterRecordEval := prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "record_eval_total",
Help: "Number of record eval.",
}, []string{})
}, []string{"datasource"})
CounterRecordEvalErrorTotal := prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "record_eval_error_total",
Help: "Number of record eval error.",
}, []string{})
}, []string{"datasource"})
AlertNotifyTotal := prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
@@ -73,13 +98,6 @@ func NewSyncStats() *Stats {
Help: "The size of alert queue.",
})
CounterQueryDataErrorTotal := prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "query_data_error_total",
Help: "Number of query data error.",
}, []string{"datasource"})
CounterMuteTotal := prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
@@ -87,16 +105,34 @@ func NewSyncStats() *Stats {
Help: "Number of mute.",
}, []string{"group"})
CounterSubEventTotal := prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "sub_event_total",
Help: "Number of sub event.",
}, []string{"group"})
CounterHeartbeatErrorTotal := prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "heartbeat_error_count",
Help: "Number of heartbeat error.",
}, []string{})
prometheus.MustRegister(
CounterAlertsTotal,
GaugeAlertQueueSize,
AlertNotifyTotal,
AlertNotifyErrorTotal,
CounterRuleEval,
CounterQueryDataTotal,
CounterQueryDataErrorTotal,
CounterRecordEval,
CounterRecordEvalErrorTotal,
CounterMuteTotal,
CounterRuleEvalErrorTotal,
CounterHeartbeatErrorTotal,
CounterSubEventTotal,
)
return &Stats{
@@ -105,9 +141,13 @@ func NewSyncStats() *Stats {
AlertNotifyTotal: AlertNotifyTotal,
AlertNotifyErrorTotal: AlertNotifyErrorTotal,
CounterRuleEval: CounterRuleEval,
CounterQueryDataTotal: CounterQueryDataTotal,
CounterQueryDataErrorTotal: CounterQueryDataErrorTotal,
CounterRecordEval: CounterRecordEval,
CounterRecordEvalErrorTotal: CounterRecordEvalErrorTotal,
CounterMuteTotal: CounterMuteTotal,
CounterRuleEvalErrorTotal: CounterRuleEvalErrorTotal,
CounterHeartbeatErrorTotal: CounterHeartbeatErrorTotal,
CounterSubEventTotal: CounterSubEventTotal,
}
}

View File

@@ -16,6 +16,7 @@ type AnomalyPoint struct {
Severity int `json:"severity"`
Triggered bool `json:"triggered"`
Query string `json:"query"`
Values string `json:"values"`
}
func NewAnomalyPoint(key string, labels map[string]string, ts int64, value float64, severity int) AnomalyPoint {

View File

@@ -1,14 +1,19 @@
package dispatch
import (
"encoding/json"
"fmt"
"strings"
"time"
"github.com/ccfos/nightingale/v6/alert/aconf"
"github.com/ccfos/nightingale/v6/alert/common"
"github.com/ccfos/nightingale/v6/alert/queue"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/poster"
promsdk "github.com/ccfos/nightingale/v6/pkg/prom"
"github.com/ccfos/nightingale/v6/prom"
"github.com/toolkits/pkg/concurrent/semaphore"
"github.com/toolkits/pkg/logger"
@@ -18,15 +23,17 @@ type Consumer struct {
alerting aconf.Alerting
ctx *ctx.Context
dispatch *Dispatch
dispatch *Dispatch
promClients *prom.PromClientMap
}
// 创建一个 Consumer 实例
func NewConsumer(alerting aconf.Alerting, ctx *ctx.Context, dispatch *Dispatch) *Consumer {
func NewConsumer(alerting aconf.Alerting, ctx *ctx.Context, dispatch *Dispatch, promClients *prom.PromClientMap) *Consumer {
return &Consumer{
alerting: alerting,
ctx: ctx,
dispatch: dispatch,
alerting: alerting,
ctx: ctx,
dispatch: dispatch,
promClients: promClients,
}
}
@@ -66,18 +73,24 @@ func (e *Consumer) consumeOne(event *models.AlertCurEvent) {
eventType = "recovery"
}
e.dispatch.astats.CounterAlertsTotal.WithLabelValues(event.Cluster, eventType, event.GroupName).Inc()
e.dispatch.Astats.CounterAlertsTotal.WithLabelValues(event.Cluster, eventType, event.GroupName).Inc()
if err := event.ParseRule("rule_name"); err != nil {
logger.Warningf("ruleid:%d failed to parse rule name: %v", event.RuleId, err)
event.RuleName = fmt.Sprintf("failed to parse rule name: %v", err)
}
if err := event.ParseRule("rule_note"); err != nil {
event.RuleNote = fmt.Sprintf("failed to parse rule note: %v", err)
if err := event.ParseRule("annotations"); err != nil {
logger.Warningf("ruleid:%d failed to parse annotations: %v", event.RuleId, err)
event.Annotations = fmt.Sprintf("failed to parse annotations: %v", err)
event.AnnotationsJSON["error"] = event.Annotations
}
if err := event.ParseRule("annotations"); err != nil {
event.Annotations = fmt.Sprintf("failed to parse rule note: %v", err)
e.queryRecoveryVal(event)
if err := event.ParseRule("rule_note"); err != nil {
logger.Warningf("ruleid:%d failed to parse rule note: %v", event.RuleId, err)
event.RuleNote = fmt.Sprintf("failed to parse rule note: %v", err)
}
e.persist(event)
@@ -100,6 +113,7 @@ func (e *Consumer) persist(event *models.AlertCurEvent) {
event.Id, err = poster.PostByUrlsWithResp[int64](e.ctx, "/v1/n9e/event-persist", event)
if err != nil {
logger.Errorf("event:%+v persist err:%v", event, err)
e.dispatch.Astats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", event.DatasourceId), "persist_event").Inc()
}
return
}
@@ -107,5 +121,71 @@ func (e *Consumer) persist(event *models.AlertCurEvent) {
err := models.EventPersist(e.ctx, event)
if err != nil {
logger.Errorf("event%+v persist err:%v", event, err)
e.dispatch.Astats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", event.DatasourceId), "persist_event").Inc()
}
}
func (e *Consumer) queryRecoveryVal(event *models.AlertCurEvent) {
if !event.IsRecovered {
return
}
// If the event is a recovery event, execute the recovery_promql query
promql, ok := event.AnnotationsJSON["recovery_promql"]
if !ok {
return
}
promql = strings.TrimSpace(promql)
if promql == "" {
logger.Warningf("rule_eval:%s promql is blank", getKey(event))
return
}
if e.promClients.IsNil(event.DatasourceId) {
logger.Warningf("rule_eval:%s error reader client is nil", getKey(event))
return
}
readerClient := e.promClients.GetCli(event.DatasourceId)
var warnings promsdk.Warnings
value, warnings, err := readerClient.Query(e.ctx.Ctx, promql, time.Now())
if err != nil {
logger.Errorf("rule_eval:%s promql:%s, error:%v", getKey(event), promql, err)
event.AnnotationsJSON["recovery_promql_error"] = fmt.Sprintf("promql:%s error:%v", promql, err)
b, err := json.Marshal(event.AnnotationsJSON)
if err != nil {
event.AnnotationsJSON = make(map[string]string)
event.AnnotationsJSON["error"] = fmt.Sprintf("failed to parse annotations: %v", err)
} else {
event.Annotations = string(b)
}
return
}
if len(warnings) > 0 {
logger.Errorf("rule_eval:%s promql:%s, warnings:%v", getKey(event), promql, warnings)
}
anomalyPoints := common.ConvertAnomalyPoints(value)
if len(anomalyPoints) == 0 {
logger.Warningf("rule_eval:%s promql:%s, result is empty", getKey(event), promql)
event.AnnotationsJSON["recovery_promql_error"] = fmt.Sprintf("promql:%s error:%s", promql, "result is empty")
} else {
event.AnnotationsJSON["recovery_value"] = fmt.Sprintf("%v", anomalyPoints[0].Value)
}
b, err := json.Marshal(event.AnnotationsJSON)
if err != nil {
event.AnnotationsJSON = make(map[string]string)
event.AnnotationsJSON["error"] = fmt.Sprintf("failed to parse annotations: %v", err)
} else {
event.Annotations = string(b)
}
}
func getKey(event *models.AlertCurEvent) string {
return common.RuleKey(event.DatasourceId, event.RuleId)
}

View File

@@ -4,7 +4,9 @@ import (
"bytes"
"encoding/json"
"html/template"
"net/url"
"strconv"
"strings"
"sync"
"time"
@@ -26,16 +28,18 @@ type Dispatch struct {
alertSubscribeCache *memsto.AlertSubscribeCacheType
targetCache *memsto.TargetCacheType
notifyConfigCache *memsto.NotifyConfigCacheType
taskTplsCache *memsto.TaskTplCache
alerting aconf.Alerting
Senders map[string]sender.Sender
CallBacks map[string]sender.CallBacker
tpls map[string]*template.Template
ExtraSenders map[string]sender.Sender
BeforeSenderHook func(*models.AlertCurEvent) bool
ctx *ctx.Context
astats *astats.Stats
Astats *astats.Stats
RwLock sync.RWMutex
}
@@ -43,7 +47,7 @@ type Dispatch struct {
// 创建一个 Notify 实例
func NewDispatch(alertRuleCache *memsto.AlertRuleCacheType, userCache *memsto.UserCacheType, userGroupCache *memsto.UserGroupCacheType,
alertSubscribeCache *memsto.AlertSubscribeCacheType, targetCache *memsto.TargetCacheType, notifyConfigCache *memsto.NotifyConfigCacheType,
alerting aconf.Alerting, ctx *ctx.Context, astats *astats.Stats) *Dispatch {
taskTplsCache *memsto.TaskTplCache, alerting aconf.Alerting, ctx *ctx.Context, astats *astats.Stats) *Dispatch {
notify := &Dispatch{
alertRuleCache: alertRuleCache,
userCache: userCache,
@@ -51,6 +55,7 @@ func NewDispatch(alertRuleCache *memsto.AlertRuleCacheType, userCache *memsto.Us
alertSubscribeCache: alertSubscribeCache,
targetCache: targetCache,
notifyConfigCache: notifyConfigCache,
taskTplsCache: taskTplsCache,
alerting: alerting,
@@ -60,7 +65,7 @@ func NewDispatch(alertRuleCache *memsto.AlertRuleCacheType, userCache *memsto.Us
BeforeSenderHook: func(*models.AlertCurEvent) bool { return true },
ctx: ctx,
astats: astats,
Astats: astats,
}
return notify
}
@@ -97,6 +102,17 @@ func (e *Dispatch) relaodTpls() error {
models.FeishuCard: sender.NewSender(models.FeishuCard, tmpTpls),
}
// domain -> Callback()
callbacks := map[string]sender.CallBacker{
models.DingtalkDomain: sender.NewCallBacker(models.DingtalkDomain, e.targetCache, e.userCache, e.taskTplsCache, tmpTpls),
models.WecomDomain: sender.NewCallBacker(models.WecomDomain, e.targetCache, e.userCache, e.taskTplsCache, tmpTpls),
models.FeishuDomain: sender.NewCallBacker(models.FeishuDomain, e.targetCache, e.userCache, e.taskTplsCache, tmpTpls),
models.TelegramDomain: sender.NewCallBacker(models.TelegramDomain, e.targetCache, e.userCache, e.taskTplsCache, tmpTpls),
models.FeishuCardDomain: sender.NewCallBacker(models.FeishuCardDomain, e.targetCache, e.userCache, e.taskTplsCache, tmpTpls),
models.IbexDomain: sender.NewCallBacker(models.IbexDomain, e.targetCache, e.userCache, e.taskTplsCache, tmpTpls),
models.DefaultDomain: sender.NewCallBacker(models.DefaultDomain, e.targetCache, e.userCache, e.taskTplsCache, tmpTpls),
}
e.RwLock.RLock()
for channelName, extraSender := range e.ExtraSenders {
senders[channelName] = extraSender
@@ -106,6 +122,7 @@ func (e *Dispatch) relaodTpls() error {
e.RwLock.Lock()
e.tpls = tmpTpls
e.Senders = senders
e.CallBacks = callbacks
e.RwLock.Unlock()
return nil
}
@@ -209,10 +226,11 @@ func (e *Dispatch) handleSub(sub *models.AlertSubscribe, event models.AlertCurEv
}
}
e.Astats.CounterSubEventTotal.WithLabelValues(event.GroupName).Inc()
sub.ModifyEvent(&event)
LogEvent(&event, "subscribe")
event.SubRuleId = sub.Id
LogEvent(&event, "subscribe")
e.HandleEventNotify(&event, true)
}
@@ -220,7 +238,7 @@ func (e *Dispatch) Send(rule *models.AlertRule, event *models.AlertCurEvent, not
needSend := e.BeforeSenderHook(event)
if needSend {
for channel, uids := range notifyTarget.ToChannelUserMap() {
msgCtx := sender.BuildMessageContext(rule, []*models.AlertCurEvent{event}, uids, e.userCache, e.astats)
msgCtx := sender.BuildMessageContext(rule, []*models.AlertCurEvent{event}, uids, e.userCache, e.Astats)
e.RwLock.RLock()
s := e.Senders[channel]
e.RwLock.RUnlock()
@@ -228,18 +246,66 @@ func (e *Dispatch) Send(rule *models.AlertRule, event *models.AlertCurEvent, not
logger.Debugf("no sender for channel: %s", channel)
continue
}
var event *models.AlertCurEvent
if len(msgCtx.Events) > 0 {
event = msgCtx.Events[0]
}
logger.Debugf("send to channel:%s event:%+v users:%+v", channel, event, msgCtx.Users)
s.Send(msgCtx)
}
}
// handle event callbacks
sender.SendCallbacks(e.ctx, notifyTarget.ToCallbackList(), event, e.targetCache, e.userCache, e.notifyConfigCache.GetIbex(), e.astats)
e.SendCallbacks(rule, notifyTarget, event)
// handle global webhooks
sender.SendWebhooks(notifyTarget.ToWebhookList(), event, e.astats)
sender.SendWebhooks(notifyTarget.ToWebhookList(), event, e.Astats)
// handle plugin call
go sender.MayPluginNotify(e.genNoticeBytes(event), e.notifyConfigCache.GetNotifyScript(), e.astats)
go sender.MayPluginNotify(e.genNoticeBytes(event), e.notifyConfigCache.GetNotifyScript(), e.Astats)
}
func (e *Dispatch) SendCallbacks(rule *models.AlertRule, notifyTarget *NotifyTarget, event *models.AlertCurEvent) {
uids := notifyTarget.ToUidList()
urls := notifyTarget.ToCallbackList()
for _, urlStr := range urls {
if len(urlStr) == 0 {
continue
}
cbCtx := sender.BuildCallBackContext(e.ctx, urlStr, rule, []*models.AlertCurEvent{event}, uids, e.userCache, e.Astats)
if strings.HasPrefix(urlStr, "${ibex}") {
e.CallBacks[models.IbexDomain].CallBack(cbCtx)
continue
}
if !(strings.HasPrefix(urlStr, "http://") || strings.HasPrefix(urlStr, "https://")) {
cbCtx.CallBackURL = "http://" + urlStr
}
parsedURL, err := url.Parse(urlStr)
if err != nil {
logger.Errorf("SendCallbacks: failed to url.Parse(urlStr=%s): %v", urlStr, err)
continue
}
// process feishu card
if parsedURL.Host == models.FeishuDomain && parsedURL.Query().Get("card") == "1" {
e.CallBacks[models.FeishuCardDomain].CallBack(cbCtx)
continue
}
callBacker, ok := e.CallBacks[parsedURL.Host]
if ok {
callBacker.CallBack(cbCtx)
} else {
e.CallBacks[models.DefaultDomain].CallBack(cbCtx)
}
}
}
type Notice struct {

View File

@@ -18,11 +18,12 @@ func LogEvent(event *models.AlertCurEvent, location string, err ...error) {
}
logger.Infof(
"event(%s %s) %s: rule_id=%d cluster:%s %v%s@%d %s",
"event(%s %s) %s: rule_id=%d sub_id:%d cluster:%s %v%s@%d %s",
event.Hash,
status,
location,
event.RuleId,
event.SubRuleId,
event.Cluster,
event.TagsJSON,
event.TriggerValue,

View File

@@ -84,6 +84,14 @@ func (s *NotifyTarget) ToWebhookList() []*models.Webhook {
return webhooks
}
func (s *NotifyTarget) ToUidList() []int64 {
uids := make([]int64, len(s.userMap))
for uid, _ := range s.userMap {
uids = append(uids, uid)
}
return uids
}
// Dispatch 抽象由告警事件到信息接收者的路由策略
// rule: 告警规则
// event: 告警事件

View File

@@ -3,6 +3,7 @@ package eval
import (
"context"
"fmt"
"strconv"
"time"
"github.com/ccfos/nightingale/v6/alert/aconf"
@@ -25,11 +26,12 @@ type Scheduler struct {
aconf aconf.Alert
alertRuleCache *memsto.AlertRuleCacheType
targetCache *memsto.TargetCacheType
busiGroupCache *memsto.BusiGroupCacheType
alertMuteCache *memsto.AlertMuteCacheType
datasourceCache *memsto.DatasourceCacheType
alertRuleCache *memsto.AlertRuleCacheType
targetCache *memsto.TargetCacheType
targetsOfAlertRuleCache *memsto.TargetsOfAlertRuleCacheType
busiGroupCache *memsto.BusiGroupCacheType
alertMuteCache *memsto.AlertMuteCacheType
datasourceCache *memsto.DatasourceCacheType
promClients *prom.PromClientMap
tdengineClients *tdengine.TdengineClientMap
@@ -40,7 +42,8 @@ type Scheduler struct {
stats *astats.Stats
}
func NewScheduler(aconf aconf.Alert, externalProcessors *process.ExternalProcessorsType, arc *memsto.AlertRuleCacheType, targetCache *memsto.TargetCacheType,
func NewScheduler(aconf aconf.Alert, externalProcessors *process.ExternalProcessorsType, arc *memsto.AlertRuleCacheType,
targetCache *memsto.TargetCacheType, toarc *memsto.TargetsOfAlertRuleCacheType,
busiGroupCache *memsto.BusiGroupCacheType, alertMuteCache *memsto.AlertMuteCacheType, datasourceCache *memsto.DatasourceCacheType,
promClients *prom.PromClientMap, tdengineClients *tdengine.TdengineClientMap, naming *naming.Naming, ctx *ctx.Context, stats *astats.Stats) *Scheduler {
scheduler := &Scheduler{
@@ -49,11 +52,12 @@ func NewScheduler(aconf aconf.Alert, externalProcessors *process.ExternalProcess
ExternalProcessors: externalProcessors,
alertRuleCache: arc,
targetCache: targetCache,
busiGroupCache: busiGroupCache,
alertMuteCache: alertMuteCache,
datasourceCache: datasourceCache,
alertRuleCache: arc,
targetCache: targetCache,
targetsOfAlertRuleCache: toarc,
busiGroupCache: busiGroupCache,
alertMuteCache: alertMuteCache,
datasourceCache: datasourceCache,
promClients: promClients,
tdengineClients: tdengineClients,
@@ -95,7 +99,7 @@ func (s *Scheduler) syncAlertRules() {
datasourceIds := s.promClients.Hit(rule.DatasourceIdsJson)
datasourceIds = append(datasourceIds, s.tdengineClients.Hit(rule.DatasourceIdsJson)...)
for _, dsId := range datasourceIds {
if !naming.DatasourceHashRing.IsHit(dsId, fmt.Sprintf("%d", rule.Id), s.aconf.Heartbeat.Endpoint) {
if !naming.DatasourceHashRing.IsHit(strconv.FormatInt(dsId, 10), fmt.Sprintf("%d", rule.Id), s.aconf.Heartbeat.Endpoint) {
continue
}
ds := s.datasourceCache.GetById(dsId)
@@ -113,17 +117,17 @@ func (s *Scheduler) syncAlertRules() {
logger.Debugf("datasource %d status is %s", dsId, ds.Status)
continue
}
processor := process.NewProcessor(rule, dsId, s.alertRuleCache, s.targetCache, s.busiGroupCache, s.alertMuteCache, s.datasourceCache, s.ctx, s.stats)
processor := process.NewProcessor(s.aconf.Heartbeat.EngineName, rule, dsId, s.alertRuleCache, s.targetCache, s.targetsOfAlertRuleCache, s.busiGroupCache, s.alertMuteCache, s.datasourceCache, s.ctx, s.stats)
alertRule := NewAlertRuleWorker(rule, dsId, processor, s.promClients, s.tdengineClients, s.ctx)
alertRuleWorkers[alertRule.Hash()] = alertRule
}
} else if rule.IsHostRule() && s.ctx.IsCenter {
} else if rule.IsHostRule() {
// all host rule will be processed by center instance
if !naming.DatasourceHashRing.IsHit(naming.HostDatasource, fmt.Sprintf("%d", rule.Id), s.aconf.Heartbeat.Endpoint) {
if !naming.DatasourceHashRing.IsHit(s.aconf.Heartbeat.EngineName, strconv.FormatInt(rule.Id, 10), s.aconf.Heartbeat.Endpoint) {
continue
}
processor := process.NewProcessor(rule, 0, s.alertRuleCache, s.targetCache, s.busiGroupCache, s.alertMuteCache, s.datasourceCache, s.ctx, s.stats)
processor := process.NewProcessor(s.aconf.Heartbeat.EngineName, rule, 0, s.alertRuleCache, s.targetCache, s.targetsOfAlertRuleCache, s.busiGroupCache, s.alertMuteCache, s.datasourceCache, s.ctx, s.stats)
alertRule := NewAlertRuleWorker(rule, 0, processor, s.promClients, s.tdengineClients, s.ctx)
alertRuleWorkers[alertRule.Hash()] = alertRule
} else {
@@ -140,7 +144,7 @@ func (s *Scheduler) syncAlertRules() {
logger.Debugf("datasource %d status is %s", dsId, ds.Status)
continue
}
processor := process.NewProcessor(rule, dsId, s.alertRuleCache, s.targetCache, s.busiGroupCache, s.alertMuteCache, s.datasourceCache, s.ctx, s.stats)
processor := process.NewProcessor(s.aconf.Heartbeat.EngineName, rule, dsId, s.alertRuleCache, s.targetCache, s.targetsOfAlertRuleCache, s.busiGroupCache, s.alertMuteCache, s.datasourceCache, s.ctx, s.stats)
externalRuleWorkers[processor.Key()] = processor
}
}

View File

@@ -4,6 +4,8 @@ import (
"context"
"encoding/json"
"fmt"
"math"
"sort"
"strings"
"time"
@@ -36,6 +38,14 @@ type AlertRuleWorker struct {
ctx *ctx.Context
}
const (
GET_RULE_CONFIG = "get_rule_config"
GET_PROCESSOR = "get_processor"
CHECK_QUERY = "check_query_config"
GET_CLIENT = "get_client"
QUERY_DATA = "query_data"
)
func NewAlertRuleWorker(rule *models.AlertRule, datasourceId int64, processor *process.Processor, promClients *prom.PromClientMap, tdengineClients *tdengine.TdengineClientMap, ctx *ctx.Context) *AlertRuleWorker {
arw := &AlertRuleWorker{
datasourceId: datasourceId,
@@ -118,11 +128,38 @@ func (arw *AlertRuleWorker) Eval() {
return
}
arw.processor.Handle(anomalyPoints, "inner", arw.inhibit)
for _, point := range recoverPoints {
str := fmt.Sprintf("%v", point.Value)
arw.processor.RecoverSingle(process.Hash(cachedRule.Id, arw.processor.DatasourceId(), point), point.Timestamp, &str)
if arw.inhibit {
pointsMap := make(map[string]common.AnomalyPoint)
for _, point := range recoverPoints {
// 对于恢复的事件,合并处理
tagHash := process.TagHash(point)
p, exists := pointsMap[tagHash]
if !exists {
pointsMap[tagHash] = point
continue
}
if p.Severity > point.Severity {
hash := process.Hash(cachedRule.Id, arw.processor.DatasourceId(), p)
arw.processor.DeleteProcessEvent(hash)
pointsMap[tagHash] = point
}
}
for _, point := range pointsMap {
str := fmt.Sprintf("%v", point.Value)
arw.processor.RecoverSingle(process.Hash(cachedRule.Id, arw.processor.DatasourceId(), point), point.Timestamp, &str)
}
} else {
for _, point := range recoverPoints {
str := fmt.Sprintf("%v", point.Value)
arw.processor.RecoverSingle(process.Hash(cachedRule.Id, arw.processor.DatasourceId(), point), point.Timestamp, &str)
}
}
arw.processor.Handle(anomalyPoints, "inner", arw.inhibit)
}
func (arw *AlertRuleWorker) Stop() {
@@ -137,11 +174,13 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) []common.Anom
var rule *models.PromRuleConfig
if err := json.Unmarshal([]byte(ruleConfig), &rule); err != nil {
logger.Errorf("rule_eval:%s rule_config:%s, error:%v", arw.Key(), ruleConfig, err)
arw.processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.processor.DatasourceId()), GET_RULE_CONFIG).Inc()
return lst
}
if rule == nil {
logger.Errorf("rule_eval:%s rule_config:%s, error:rule is nil", arw.Key(), ruleConfig)
arw.processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.processor.DatasourceId()), GET_RULE_CONFIG).Inc()
return lst
}
@@ -153,29 +192,33 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) []common.Anom
promql := strings.TrimSpace(query.PromQl)
if promql == "" {
logger.Errorf("rule_eval:%s promql is blank", arw.Key())
logger.Warningf("rule_eval:%s promql is blank", arw.Key())
arw.processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.processor.DatasourceId()), CHECK_QUERY).Inc()
continue
}
if arw.promClients.IsNil(arw.datasourceId) {
logger.Errorf("rule_eval:%s error reader client is nil", arw.Key())
logger.Warningf("rule_eval:%s error reader client is nil", arw.Key())
arw.processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.processor.DatasourceId()), GET_CLIENT).Inc()
continue
}
readerClient := arw.promClients.GetCli(arw.datasourceId)
var warnings promsdk.Warnings
arw.processor.Stats.CounterQueryDataTotal.WithLabelValues(fmt.Sprintf("%d", arw.datasourceId)).Inc()
value, warnings, err := readerClient.Query(context.Background(), promql, time.Now())
if err != nil {
logger.Errorf("rule_eval:%s promql:%s, error:%v", arw.Key(), promql, err)
arw.processor.Stats.CounterQueryDataErrorTotal.WithLabelValues(fmt.Sprintf("%d", arw.datasourceId)).Inc()
arw.processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.processor.DatasourceId()), QUERY_DATA).Inc()
continue
}
if len(warnings) > 0 {
logger.Errorf("rule_eval:%s promql:%s, warnings:%v", arw.Key(), promql, warnings)
arw.processor.Stats.CounterQueryDataErrorTotal.WithLabelValues(fmt.Sprintf("%d", arw.datasourceId)).Inc()
continue
arw.processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.processor.DatasourceId()), QUERY_DATA).Inc()
}
logger.Debugf("rule_eval:%s query:%+v, value:%v", arw.Key(), query, value)
@@ -196,6 +239,7 @@ func (arw *AlertRuleWorker) GetTdengineAnomalyPoint(rule *models.AlertRule, dsId
ruleConfig := strings.TrimSpace(rule.RuleConfig)
if ruleConfig == "" {
logger.Warningf("rule_eval:%d promql is blank", rule.Id)
arw.processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.processor.DatasourceId()), GET_RULE_CONFIG).Inc()
return points, recoverPoints
}
@@ -203,92 +247,41 @@ func (arw *AlertRuleWorker) GetTdengineAnomalyPoint(rule *models.AlertRule, dsId
err := json.Unmarshal([]byte(ruleConfig), &ruleQuery)
if err != nil {
logger.Warningf("rule_eval:%d promql parse error:%s", rule.Id, err.Error())
arw.processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.processor.DatasourceId())).Inc()
arw.processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.processor.DatasourceId()), GET_RULE_CONFIG).Inc()
return points, recoverPoints
}
arw.inhibit = ruleQuery.Inhibit
if len(ruleQuery.Queries) > 0 {
seriesStore := make(map[uint64]*models.DataResp)
seriesStore := make(map[uint64]models.DataResp)
seriesTagIndex := make(map[uint64][]uint64)
for _, query := range ruleQuery.Queries {
arw.processor.Stats.CounterQueryDataTotal.WithLabelValues(fmt.Sprintf("%d", arw.datasourceId)).Inc()
cli := arw.tdengineClients.GetCli(dsId)
if cli == nil {
logger.Warningf("rule_eval:%d tdengine client is nil", rule.Id)
arw.processor.Stats.CounterQueryDataErrorTotal.WithLabelValues(fmt.Sprintf("%d", arw.datasourceId)).Inc()
arw.processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.processor.DatasourceId()), GET_CLIENT).Inc()
continue
}
series, err := cli.Query(query)
arw.processor.Stats.CounterQueryDataTotal.WithLabelValues(fmt.Sprintf("%d", arw.datasourceId)).Inc()
if err != nil {
logger.Warningf("rule_eval rid:%d query data error: %v", rule.Id, err)
arw.processor.Stats.CounterQueryDataErrorTotal.WithLabelValues(fmt.Sprintf("%d", arw.datasourceId)).Inc()
arw.processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.processor.DatasourceId()), QUERY_DATA).Inc()
continue
}
// 此条日志很重要,是告警判断的现场值
logger.Debugf("rule_eval rid:%d req:%+v resp:%+v", rule.Id, query, series)
for i := 0; i < len(series); i++ {
serieHash := hash.GetHash(series[i].Metric, series[i].Ref)
tagHash := hash.GetTagHash(series[i].Metric)
seriesStore[serieHash] = series[i]
// 将曲线按照相同的 tag 分组
if _, exists := seriesTagIndex[tagHash]; !exists {
seriesTagIndex[tagHash] = make([]uint64, 0)
}
seriesTagIndex[tagHash] = append(seriesTagIndex[tagHash], serieHash)
}
MakeSeriesMap(series, seriesTagIndex, seriesStore)
}
// 判断
for _, trigger := range ruleQuery.Triggers {
for _, seriesHash := range seriesTagIndex {
m := make(map[string]float64)
var ts int64
var sample *models.DataResp
var value float64
for _, serieHash := range seriesHash {
series, exists := seriesStore[serieHash]
if !exists {
logger.Warningf("rule_eval rid:%d series:%+v not found", rule.Id, series)
continue
}
t, v, exists := series.Last()
if !exists {
logger.Warningf("rule_eval rid:%d series:%+v value not found", rule.Id, series)
continue
}
if !strings.Contains(trigger.Exp, "$"+series.Ref) {
// 表达式中不包含该变量
continue
}
m["$"+series.Ref] = v
m["$"+series.Ref+"."+series.MetricName()] = v
ts = int64(t)
sample = series
value = v
}
isTriggered := parser.Calc(trigger.Exp, m)
// 此条日志很重要,是告警判断的现场值
logger.Debugf("rule_eval rid:%d trigger:%+v exp:%s res:%v m:%v", rule.Id, trigger, trigger.Exp, isTriggered, m)
point := common.AnomalyPoint{
Key: sample.MetricName(),
Labels: sample.Metric,
Timestamp: int64(ts),
Value: value,
Severity: trigger.Severity,
Triggered: isTriggered,
}
if isTriggered {
points = append(points, point)
} else {
recoverPoints = append(recoverPoints, point)
}
}
}
points, recoverPoints = GetAnomalyPoint(rule.Id, ruleQuery, seriesTagIndex, seriesStore)
}
return points, recoverPoints
@@ -301,11 +294,13 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) []common.Anom
var rule *models.HostRuleConfig
if err := json.Unmarshal([]byte(ruleConfig), &rule); err != nil {
logger.Errorf("rule_eval:%s rule_config:%s, error:%v", arw.Key(), ruleConfig, err)
arw.processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.processor.DatasourceId()), GET_RULE_CONFIG).Inc()
return lst
}
if rule == nil {
logger.Errorf("rule_eval:%s rule_config:%s, error:rule is nil", arw.Key(), ruleConfig)
arw.processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.processor.DatasourceId()), GET_RULE_CONFIG).Inc()
return lst
}
@@ -316,16 +311,42 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) []common.Anom
arw.severity = trigger.Severity
}
query := models.GetHostsQuery(rule.Queries)
switch trigger.Type {
case "target_miss":
t := now - int64(trigger.Duration)
targets, err := models.MissTargetGetsByFilter(arw.ctx, query, t)
if err != nil {
logger.Errorf("rule_eval:%s query:%v, error:%v", arw.Key(), query, err)
arw.processor.Stats.CounterQueryDataErrorTotal.WithLabelValues(fmt.Sprintf("%d", arw.datasourceId)).Inc()
var idents, engineIdents, missEngineIdents []string
var exists bool
if arw.ctx.IsCenter {
// 如果是中心节点, 将不再上报数据的主机 engineName 为空的机器,也加入到 targets 中
missEngineIdents, exists = arw.processor.TargetsOfAlertRuleCache.Get("", arw.rule.Id)
if !exists {
logger.Debugf("rule_eval:%s targets not found engineName:%s", arw.Key(), arw.processor.EngineName)
arw.processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.processor.DatasourceId()), QUERY_DATA).Inc()
}
}
idents = append(idents, missEngineIdents...)
engineIdents, exists = arw.processor.TargetsOfAlertRuleCache.Get(arw.processor.EngineName, arw.rule.Id)
if !exists {
logger.Warningf("rule_eval:%s targets not found engineName:%s", arw.Key(), arw.processor.EngineName)
arw.processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.processor.DatasourceId()), QUERY_DATA).Inc()
}
idents = append(idents, engineIdents...)
if len(idents) == 0 {
continue
}
var missTargets []string
targetUpdateTimeMap := arw.processor.TargetCache.GetHostUpdateTime(idents)
for ident, updateTime := range targetUpdateTimeMap {
if updateTime < t {
missTargets = append(missTargets, ident)
}
}
logger.Debugf("rule_eval:%s missTargets:%v", arw.Key(), missTargets)
targets := arw.processor.TargetCache.Gets(missTargets)
for _, target := range targets {
m := make(map[string]string)
target.FillTagsMap()
@@ -342,21 +363,43 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) []common.Anom
lst = append(lst, common.NewAnomalyPoint(trigger.Type, m, now, float64(now-target.UpdateAt), trigger.Severity))
}
case "offset":
targets, err := models.TargetGetsByFilter(arw.ctx, query, 0, 0)
if err != nil {
logger.Errorf("rule_eval:%s query:%v, error:%v", arw.Key(), query, err)
arw.processor.Stats.CounterQueryDataErrorTotal.WithLabelValues(fmt.Sprintf("%d", arw.datasourceId)).Inc()
idents, exists := arw.processor.TargetsOfAlertRuleCache.Get(arw.processor.EngineName, arw.rule.Id)
if !exists {
logger.Warningf("rule_eval:%s targets not found", arw.Key())
arw.processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.processor.DatasourceId()), QUERY_DATA).Inc()
continue
}
var targetMap = make(map[string]*models.Target)
targets := arw.processor.TargetCache.Gets(idents)
targetMap := make(map[string]*models.Target)
for _, target := range targets {
targetMap[target.Ident] = target
}
hostOffsetMap := arw.processor.TargetCache.GetOffsetHost(targets, now, int64(trigger.Duration))
for host, offset := range hostOffsetMap {
offsetIdents := make(map[string]int64)
targetsMeta := arw.processor.TargetCache.GetHostMetas(targets)
for ident, meta := range targetsMeta {
if meta.CpuNum <= 0 {
// means this target is not collect by categraf, do not check offset
continue
}
if target, exists := targetMap[ident]; exists {
if now-target.UpdateAt > 120 {
// means this target is not a active host, do not check offset
continue
}
}
offset := meta.Offset
if math.Abs(float64(offset)) > float64(trigger.Duration) {
offsetIdents[ident] = offset
}
}
logger.Debugf("rule_eval:%s offsetIdents:%v", arw.Key(), offsetIdents)
for host, offset := range offsetIdents {
m := make(map[string]string)
target, exists := targetMap[host]
target, exists := arw.processor.TargetCache.Get(host)
if exists {
target.FillTagsMap()
for k, v := range target.TagsMap {
@@ -374,20 +417,22 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) []common.Anom
}
case "pct_target_miss":
t := now - int64(trigger.Duration)
count, err := models.MissTargetCountByFilter(arw.ctx, query, t)
if err != nil {
logger.Errorf("rule_eval:%s query:%v, error:%v", arw.Key(), query, err)
arw.processor.Stats.CounterQueryDataErrorTotal.WithLabelValues(fmt.Sprintf("%d", arw.datasourceId)).Inc()
idents, exists := arw.processor.TargetsOfAlertRuleCache.Get(arw.processor.EngineName, arw.rule.Id)
if !exists {
logger.Warningf("rule_eval:%s targets not found", arw.Key())
arw.processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.processor.DatasourceId()), QUERY_DATA).Inc()
continue
}
total, err := models.TargetCountByFilter(arw.ctx, query)
if err != nil {
logger.Errorf("rule_eval:%s query:%v, error:%v", arw.Key(), query, err)
arw.processor.Stats.CounterQueryDataErrorTotal.WithLabelValues(fmt.Sprintf("%d", arw.datasourceId)).Inc()
continue
var missTargets []string
targetUpdateTimeMap := arw.processor.TargetCache.GetHostUpdateTime(idents)
for ident, updateTime := range targetUpdateTimeMap {
if updateTime < t {
missTargets = append(missTargets, ident)
}
}
pct := float64(count) / float64(total) * 100
logger.Debugf("rule_eval:%s missTargets:%v", arw.Key(), missTargets)
pct := float64(len(missTargets)) / float64(len(idents)) * 100
if pct >= float64(trigger.Percent) {
lst = append(lst, common.NewAnomalyPoint(trigger.Type, nil, now, pct, trigger.Severity))
}
@@ -395,3 +440,92 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) []common.Anom
}
return lst
}
func GetAnomalyPoint(ruleId int64, ruleQuery models.RuleQuery, seriesTagIndex map[uint64][]uint64, seriesStore map[uint64]models.DataResp) ([]common.AnomalyPoint, []common.AnomalyPoint) {
points := []common.AnomalyPoint{}
recoverPoints := []common.AnomalyPoint{}
for _, trigger := range ruleQuery.Triggers {
for _, seriesHash := range seriesTagIndex {
sort.Slice(seriesHash, func(i, j int) bool {
return seriesHash[i] < seriesHash[j]
})
m := make(map[string]float64)
var ts int64
var sample models.DataResp
var value float64
for _, serieHash := range seriesHash {
series, exists := seriesStore[serieHash]
if !exists {
logger.Warningf("rule_eval rid:%d series:%+v not found", ruleId, series)
continue
}
t, v, exists := series.Last()
if !exists {
logger.Warningf("rule_eval rid:%d series:%+v value not found", ruleId, series)
continue
}
if !strings.Contains(trigger.Exp, "$"+series.Ref) {
// 表达式中不包含该变量
continue
}
m["$"+series.Ref] = v
m["$"+series.Ref+"."+series.MetricName()] = v
ts = int64(t)
sample = series
value = v
}
isTriggered := parser.Calc(trigger.Exp, m)
// 此条日志很重要,是告警判断的现场值
logger.Infof("rule_eval rid:%d trigger:%+v exp:%s res:%v m:%v", ruleId, trigger, trigger.Exp, isTriggered, m)
var values string
for k, v := range m {
if !strings.Contains(k, ".") {
continue
}
values += fmt.Sprintf("%s:%v ", k, v)
}
point := common.AnomalyPoint{
Key: sample.MetricName(),
Labels: sample.Metric,
Timestamp: int64(ts),
Value: value,
Values: values,
Severity: trigger.Severity,
Triggered: isTriggered,
Query: fmt.Sprintf("query:%+v trigger:%+v", ruleQuery.Queries, trigger),
}
if sample.Query != "" {
point.Query = sample.Query
}
if isTriggered {
points = append(points, point)
} else {
recoverPoints = append(recoverPoints, point)
}
}
}
return points, recoverPoints
}
func MakeSeriesMap(series []models.DataResp, seriesTagIndex map[uint64][]uint64, seriesStore map[uint64]models.DataResp) {
for i := 0; i < len(series); i++ {
serieHash := hash.GetHash(series[i].Metric, series[i].Ref)
tagHash := hash.GetTagHash(series[i].Metric)
seriesStore[serieHash] = series[i]
// 将曲线按照相同的 tag 分组
if _, exists := seriesTagIndex[tagHash]; !exists {
seriesTagIndex[tagHash] = make([]uint64, 0)
}
seriesTagIndex[tagHash] = append(seriesTagIndex[tagHash], serieHash)
}
}

View File

@@ -147,7 +147,7 @@ func matchMute(event *models.AlertCurEvent, mute *models.AlertMute, clock ...int
}
// 如果不是全局的,判断 匹配的 datasource id
if !(len(mute.DatasourceIdsJson) != 0 && mute.DatasourceIdsJson[0] == 0) && event.DatasourceId != 0 {
if len(mute.DatasourceIdsJson) != 0 && mute.DatasourceIdsJson[0] != 0 && event.DatasourceId != 0 {
idm := make(map[int64]struct{}, len(mute.DatasourceIdsJson))
for i := 0; i < len(mute.DatasourceIdsJson); i++ {
idm[mute.DatasourceIdsJson[i]] = struct{}{}
@@ -172,7 +172,7 @@ func matchMute(event *models.AlertCurEvent, mute *models.AlertMute, clock ...int
for i := 0; i < len(mute.PeriodicMutesJson); i++ {
if strings.Contains(mute.PeriodicMutesJson[i].EnableDaysOfWeek, triggerWeek) {
if mute.PeriodicMutesJson[i].EnableStime == mute.PeriodicMutesJson[i].EnableEtime {
if mute.PeriodicMutesJson[i].EnableStime == mute.PeriodicMutesJson[i].EnableEtime || (mute.PeriodicMutesJson[i].EnableStime == "00:00" && mute.PeriodicMutesJson[i].EnableEtime == "23:59") {
matchTime = true
break
} else if mute.PeriodicMutesJson[i].EnableStime < mute.PeriodicMutesJson[i].EnableEtime {
@@ -209,5 +209,9 @@ func matchMute(event *models.AlertCurEvent, mute *models.AlertMute, clock ...int
return false
}
if mute.ITags == nil || len(mute.ITags) == 0 {
return true
}
return common.MatchTags(event.TagsMap, mute.ITags)
}

View File

@@ -12,12 +12,12 @@ const NodeReplicas = 500
type DatasourceHashRingType struct {
sync.RWMutex
Rings map[int64]*consistent.Consistent
Rings map[string]*consistent.Consistent
}
// for alert_rule sharding
var HostDatasource int64 = 99999999
var DatasourceHashRing = DatasourceHashRingType{Rings: make(map[int64]*consistent.Consistent)}
var DatasourceHashRing = DatasourceHashRingType{Rings: make(map[string]*consistent.Consistent)}
func NewConsistentHashRing(replicas int32, nodes []string) *consistent.Consistent {
ret := consistent.New()
@@ -28,7 +28,7 @@ func NewConsistentHashRing(replicas int32, nodes []string) *consistent.Consisten
return ret
}
func RebuildConsistentHashRing(datasourceId int64, nodes []string) {
func RebuildConsistentHashRing(datasourceId string, nodes []string) {
r := consistent.New()
r.NumberOfReplicas = NodeReplicas
for i := 0; i < len(nodes); i++ {
@@ -36,10 +36,10 @@ func RebuildConsistentHashRing(datasourceId int64, nodes []string) {
}
DatasourceHashRing.Set(datasourceId, r)
logger.Infof("hash ring %d rebuild %+v", datasourceId, r.Members())
logger.Infof("hash ring %s rebuild %+v", datasourceId, r.Members())
}
func (chr *DatasourceHashRingType) GetNode(datasourceId int64, pk string) (string, error) {
func (chr *DatasourceHashRingType) GetNode(datasourceId string, pk string) (string, error) {
chr.Lock()
defer chr.Unlock()
_, exists := chr.Rings[datasourceId]
@@ -50,28 +50,34 @@ func (chr *DatasourceHashRingType) GetNode(datasourceId int64, pk string) (strin
return chr.Rings[datasourceId].Get(pk)
}
func (chr *DatasourceHashRingType) IsHit(datasourceId int64, pk string, currentNode string) bool {
func (chr *DatasourceHashRingType) IsHit(datasourceId string, pk string, currentNode string) bool {
node, err := chr.GetNode(datasourceId, pk)
if err != nil {
if !errors.Is(err, consistent.ErrEmptyCircle) {
logger.Errorf("rule id:%s is not work, datasource id:%d failed to get node from hashring:%v", pk, datasourceId, err)
logger.Errorf("rule id:%s is not work, datasource id:%s failed to get node from hashring:%v", pk, datasourceId, err)
}
return false
}
return node == currentNode
}
func (chr *DatasourceHashRingType) Set(datasourceId int64, r *consistent.Consistent) {
func (chr *DatasourceHashRingType) Set(datasourceId string, r *consistent.Consistent) {
chr.Lock()
defer chr.Unlock()
chr.Rings[datasourceId] = r
}
func (chr *DatasourceHashRingType) Clear() {
func (chr *DatasourceHashRingType) Del(datasourceId string) {
chr.Lock()
defer chr.Unlock()
delete(chr.Rings, datasourceId)
}
func (chr *DatasourceHashRingType) Clear(engineName string) {
chr.Lock()
defer chr.Unlock()
for id := range chr.Rings {
if id == HostDatasource {
if id == engineName {
continue
}
delete(chr.Rings, id)

View File

@@ -7,6 +7,7 @@ import (
"time"
"github.com/ccfos/nightingale/v6/alert/aconf"
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/poster"
@@ -17,12 +18,14 @@ import (
type Naming struct {
ctx *ctx.Context
heartbeatConfig aconf.HeartbeatConfig
astats *astats.Stats
}
func NewNaming(ctx *ctx.Context, heartbeat aconf.HeartbeatConfig) *Naming {
func NewNaming(ctx *ctx.Context, heartbeat aconf.HeartbeatConfig, alertStats *astats.Stats) *Naming {
naming := &Naming{
ctx: ctx,
heartbeatConfig: heartbeat,
astats: alertStats,
}
naming.Heartbeats()
return naming
@@ -30,9 +33,11 @@ func NewNaming(ctx *ctx.Context, heartbeat aconf.HeartbeatConfig) *Naming {
// local servers
var localss map[int64]string
var localHostServers map[string]string
func (n *Naming) Heartbeats() error {
localss = make(map[int64]string)
localHostServers = make(map[string]string)
if err := n.heartbeat(); err != nil {
fmt.Println("failed to heartbeat:", err)
return err
@@ -86,30 +91,32 @@ func (n *Naming) heartbeat() error {
err := models.AlertingEngineHeartbeatWithCluster(n.ctx, n.heartbeatConfig.Endpoint, n.heartbeatConfig.EngineName, 0)
if err != nil {
logger.Warningf("heartbeat with cluster %s err:%v", "", err)
n.astats.CounterHeartbeatErrorTotal.WithLabelValues().Inc()
}
} else {
for i := 0; i < len(datasourceIds); i++ {
err := models.AlertingEngineHeartbeatWithCluster(n.ctx, n.heartbeatConfig.Endpoint, n.heartbeatConfig.EngineName, datasourceIds[i])
if err != nil {
logger.Warningf("heartbeat with cluster %d err:%v", datasourceIds[i], err)
n.astats.CounterHeartbeatErrorTotal.WithLabelValues().Inc()
}
}
}
if len(datasourceIds) == 0 {
DatasourceHashRing.Clear()
DatasourceHashRing.Clear(n.heartbeatConfig.EngineName)
for dsId := range localss {
if dsId == HostDatasource {
continue
}
delete(localss, dsId)
}
}
newDatasource := make(map[int64]struct{})
for i := 0; i < len(datasourceIds); i++ {
newDatasource[datasourceIds[i]] = struct{}{}
servers, err := n.ActiveServers(datasourceIds[i])
if err != nil {
logger.Warningf("hearbeat %d get active server err:%v", datasourceIds[i], err)
n.astats.CounterHeartbeatErrorTotal.WithLabelValues().Inc()
continue
}
@@ -121,36 +128,42 @@ func (n *Naming) heartbeat() error {
continue
}
RebuildConsistentHashRing(datasourceIds[i], servers)
RebuildConsistentHashRing(fmt.Sprintf("%d", datasourceIds[i]), servers)
localss[datasourceIds[i]] = newss
}
if n.ctx.IsCenter {
// 如果是中心节点,还需要处理 host 类型的告警规则host 类型告警规则,和数据源无关,想复用下数据源的 hash ring想用一个虚假的数据源 id 来处理
// if is center node, we need to handle host type alerting rules, host type alerting rules are not related to datasource, we want to reuse the hash ring of datasource, we want to use a fake datasource id to handle it
err := models.AlertingEngineHeartbeatWithCluster(n.ctx, n.heartbeatConfig.Endpoint, n.heartbeatConfig.EngineName, HostDatasource)
if err != nil {
logger.Warningf("heartbeat with cluster %s err:%v", "", err)
for dsId := range localss {
if _, exists := newDatasource[dsId]; !exists {
delete(localss, dsId)
DatasourceHashRing.Del(fmt.Sprintf("%d", dsId))
}
servers, err := n.ActiveServers(HostDatasource)
if err != nil {
logger.Warningf("hearbeat %d get active server err:%v", HostDatasource, err)
return nil
}
sort.Strings(servers)
newss := strings.Join(servers, " ")
oldss, exists := localss[HostDatasource]
if exists && oldss == newss {
return nil
}
RebuildConsistentHashRing(HostDatasource, servers)
localss[HostDatasource] = newss
}
// host 告警使用的是 hash ring
err = models.AlertingEngineHeartbeatWithCluster(n.ctx, n.heartbeatConfig.Endpoint, n.heartbeatConfig.EngineName, HostDatasource)
if err != nil {
logger.Warningf("heartbeat with cluster %s err:%v", "", err)
n.astats.CounterHeartbeatErrorTotal.WithLabelValues().Inc()
}
servers, err := n.ActiveServersByEngineName()
if err != nil {
logger.Warningf("hearbeat %d get active server err:%v", HostDatasource, err)
n.astats.CounterHeartbeatErrorTotal.WithLabelValues().Inc()
return nil
}
sort.Strings(servers)
newss := strings.Join(servers, " ")
oldss, exists := localHostServers[n.heartbeatConfig.EngineName]
if exists && oldss == newss {
return nil
}
RebuildConsistentHashRing(n.heartbeatConfig.EngineName, servers)
localHostServers[n.heartbeatConfig.EngineName] = newss
return nil
}

28
alert/naming/leader.go Normal file
View File

@@ -0,0 +1,28 @@
package naming
import (
"sort"
"github.com/toolkits/pkg/logger"
)
func (n *Naming) IamLeader() bool {
if !n.ctx.IsCenter {
return false
}
servers, err := n.ActiveServersByEngineName()
if err != nil {
logger.Errorf("failed to get active servers: %v", err)
return false
}
if len(servers) == 0 {
logger.Errorf("active servers empty")
return false
}
sort.Strings(servers)
return n.heartbeatConfig.Endpoint == servers[0]
}

View File

@@ -18,7 +18,6 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/tplx"
"github.com/ccfos/nightingale/v6/prom"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/str"
)
@@ -49,6 +48,7 @@ type HandleEventFunc func(event *models.AlertCurEvent)
type Processor struct {
datasourceId int64
EngineName string
rule *models.AlertRule
fires *AlertCurEventMap
@@ -61,15 +61,15 @@ type Processor struct {
targetNote string
groupName string
atertRuleCache *memsto.AlertRuleCacheType
TargetCache *memsto.TargetCacheType
BusiGroupCache *memsto.BusiGroupCacheType
alertMuteCache *memsto.AlertMuteCacheType
datasourceCache *memsto.DatasourceCacheType
alertRuleCache *memsto.AlertRuleCacheType
TargetCache *memsto.TargetCacheType
TargetsOfAlertRuleCache *memsto.TargetsOfAlertRuleCacheType
BusiGroupCache *memsto.BusiGroupCacheType
alertMuteCache *memsto.AlertMuteCacheType
datasourceCache *memsto.DatasourceCacheType
promClients *prom.PromClientMap
ctx *ctx.Context
Stats *astats.Stats
ctx *ctx.Context
Stats *astats.Stats
HandleFireEventHook HandleEventFunc
HandleRecoverEventHook HandleEventFunc
@@ -93,19 +93,22 @@ func (p *Processor) Hash() string {
))
}
func NewProcessor(rule *models.AlertRule, datasourceId int64, atertRuleCache *memsto.AlertRuleCacheType, targetCache *memsto.TargetCacheType,
func NewProcessor(engineName string, rule *models.AlertRule, datasourceId int64, alertRuleCache *memsto.AlertRuleCacheType,
targetCache *memsto.TargetCacheType, targetsOfAlertRuleCache *memsto.TargetsOfAlertRuleCacheType,
busiGroupCache *memsto.BusiGroupCacheType, alertMuteCache *memsto.AlertMuteCacheType, datasourceCache *memsto.DatasourceCacheType, ctx *ctx.Context,
stats *astats.Stats) *Processor {
p := &Processor{
EngineName: engineName,
datasourceId: datasourceId,
rule: rule,
TargetCache: targetCache,
BusiGroupCache: busiGroupCache,
alertMuteCache: alertMuteCache,
atertRuleCache: atertRuleCache,
datasourceCache: datasourceCache,
TargetCache: targetCache,
TargetsOfAlertRuleCache: targetsOfAlertRuleCache,
BusiGroupCache: busiGroupCache,
alertMuteCache: alertMuteCache,
alertRuleCache: alertRuleCache,
datasourceCache: datasourceCache,
ctx: ctx,
Stats: stats,
@@ -124,11 +127,13 @@ func (p *Processor) Handle(anomalyPoints []common.AnomalyPoint, from string, inh
// 这些信息的修改是不会引起worker restart的但是确实会影响告警处理逻辑
// 所以这里直接从memsto.AlertRuleCache中获取并覆盖
p.inhibit = inhibit
cachedRule := p.atertRuleCache.Get(p.rule.Id)
cachedRule := p.alertRuleCache.Get(p.rule.Id)
if cachedRule == nil {
logger.Errorf("rule not found %+v", anomalyPoints)
p.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", p.DatasourceId()), "handle_event").Inc()
return
}
p.rule = cachedRule
now := time.Now().Unix()
alertingKeys := map[string]struct{}{}
@@ -147,6 +152,8 @@ func (p *Processor) Handle(anomalyPoints []common.AnomalyPoint, from string, inh
}
if p.EventMuteHook(event) {
p.Stats.CounterMuteTotal.WithLabelValues(event.GroupName).Inc()
logger.Debugf("rule_eval:%s event:%v is muted by hook", p.Key(), event)
continue
}
@@ -158,7 +165,7 @@ func (p *Processor) Handle(anomalyPoints []common.AnomalyPoint, from string, inh
p.handleEvent(events)
}
p.HandleRecover(alertingKeys, now)
p.HandleRecover(alertingKeys, now, inhibit)
}
func (p *Processor) BuildEvent(anomalyPoint common.AnomalyPoint, from string, now int64) *models.AlertCurEvent {
@@ -172,6 +179,12 @@ func (p *Processor) BuildEvent(anomalyPoint common.AnomalyPoint, from string, no
}
event := p.rule.GenerateNewEvent(p.ctx)
bg := p.BusiGroupCache.GetByBusiGroupId(p.rule.GroupId)
if bg != nil {
event.GroupName = bg.Name
}
event.TriggerTime = anomalyPoint.Timestamp
event.TagsMap = p.tagsMap
event.DatasourceId = p.datasourceId
@@ -180,8 +193,8 @@ func (p *Processor) BuildEvent(anomalyPoint common.AnomalyPoint, from string, no
event.TargetIdent = p.target
event.TargetNote = p.targetNote
event.TriggerValue = anomalyPoint.ReadableValue()
event.TriggerValues = anomalyPoint.Values
event.TagsJSON = p.tagsArr
event.GroupName = p.groupName
event.Tags = strings.Join(p.tagsArr, ",,")
event.IsRecovered = false
event.Callbacks = p.rule.Callbacks
@@ -194,6 +207,11 @@ func (p *Processor) BuildEvent(anomalyPoint common.AnomalyPoint, from string, no
event.ExtraConfig = p.rule.ExtraConfigJSON
event.PromQl = anomalyPoint.Query
if event.TriggerValues != "" && strings.Count(event.TriggerValues, "$") > 1 {
// TriggerValues 有多个变量,将多个变量都放到 TriggerValue 中
event.TriggerValue = event.TriggerValues
}
if from == "inner" {
event.LastEvalTime = now
} else {
@@ -202,7 +220,7 @@ func (p *Processor) BuildEvent(anomalyPoint common.AnomalyPoint, from string, no
return event
}
func (p *Processor) HandleRecover(alertingKeys map[string]struct{}, now int64) {
func (p *Processor) HandleRecover(alertingKeys map[string]struct{}, now int64, inhibit bool) {
for _, hash := range p.pendings.Keys() {
if _, has := alertingKeys[hash]; has {
continue
@@ -210,19 +228,63 @@ func (p *Processor) HandleRecover(alertingKeys map[string]struct{}, now int64) {
p.pendings.Delete(hash)
}
hashArr := make([]string, 0, len(alertingKeys))
for hash := range p.fires.GetAll() {
if _, has := alertingKeys[hash]; has {
continue
}
p.RecoverSingle(hash, now, nil)
hashArr = append(hashArr, hash)
}
p.HandleRecoverEvent(hashArr, now, inhibit)
}
func (p *Processor) RecoverSingle(hash string, now int64, value *string) {
func (p *Processor) HandleRecoverEvent(hashArr []string, now int64, inhibit bool) {
cachedRule := p.rule
if cachedRule == nil {
return
}
if !inhibit {
for _, hash := range hashArr {
p.RecoverSingle(hash, now, nil)
}
return
}
eventMap := make(map[string]models.AlertCurEvent)
for _, hash := range hashArr {
event, has := p.fires.Get(hash)
if !has {
continue
}
e, exists := eventMap[event.Tags]
if !exists {
eventMap[event.Tags] = *event
continue
}
if e.Severity > event.Severity {
// hash 对应的恢复事件的被抑制了,把之前的事件删除
p.fires.Delete(e.Hash)
p.pendings.Delete(e.Hash)
eventMap[event.Tags] = *event
}
}
for _, event := range eventMap {
p.RecoverSingle(event.Hash, now, nil)
}
}
func (p *Processor) RecoverSingle(hash string, now int64, value *string, values ...string) {
cachedRule := p.rule
if cachedRule == nil {
return
}
event, has := p.fires.Get(hash)
if !has {
return
@@ -234,6 +296,9 @@ func (p *Processor) RecoverSingle(hash string, now int64, value *string) {
}
if value != nil {
event.TriggerValue = *value
if len(values) > 0 {
event.TriggerValues = values[0]
}
}
// 没查到触发阈值的vector姑且就认为这个vector的值恢复了
@@ -353,6 +418,7 @@ func (p *Processor) pushEventToQueue(e *models.AlertCurEvent) {
dispatch.LogEvent(e, "push_queue")
if !queue.EventQueue.PushFront(e) {
logger.Warningf("event_push_queue: queue is full, event:%+v", e)
p.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", p.DatasourceId()), "push_event_queue").Inc()
}
}
@@ -362,12 +428,21 @@ func (p *Processor) RecoverAlertCurEventFromDb() {
curEvents, err := models.AlertCurEventGetByRuleIdAndDsId(p.ctx, p.rule.Id, p.datasourceId)
if err != nil {
logger.Errorf("recover event from db for rule:%s failed, err:%s", p.Key(), err)
p.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", p.DatasourceId()), "get_recover_event").Inc()
p.fires = NewAlertCurEventMap(nil)
return
}
fireMap := make(map[string]*models.AlertCurEvent)
for _, event := range curEvents {
if event.Cate == models.HOST {
target, exists := p.TargetCache.Get(event.TargetIdent)
if exists && target.EngineName != p.EngineName && !(p.ctx.IsCenter && target.EngineName == "") {
// 如果是 host rule且 target 的 engineName 不是当前的 engineName 或者是中心机房 target EngineName 为空,就跳过
continue
}
}
event.DB2Mem()
fireMap[event.Hash] = event
}
@@ -445,6 +520,11 @@ func (p *Processor) mayHandleGroup() {
}
}
func (p *Processor) DeleteProcessEvent(hash string) {
p.fires.Delete(hash)
p.pendings.Delete(hash)
}
func labelMapToArr(m map[string]string) []string {
numLabels := len(m)

View File

@@ -71,7 +71,7 @@ func (rrc *RecordRuleContext) Start() {
}
func (rrc *RecordRuleContext) Eval() {
rrc.stats.CounterRecordEval.WithLabelValues().Inc()
rrc.stats.CounterRecordEval.WithLabelValues(fmt.Sprintf("%d", rrc.datasourceId)).Inc()
promql := strings.TrimSpace(rrc.rule.PromQl)
if promql == "" {
logger.Errorf("eval:%s promql is blank", rrc.Key())
@@ -80,26 +80,30 @@ func (rrc *RecordRuleContext) Eval() {
if rrc.promClients.IsNil(rrc.datasourceId) {
logger.Errorf("eval:%s reader client is nil", rrc.Key())
rrc.stats.CounterRecordEvalErrorTotal.WithLabelValues().Inc()
rrc.stats.CounterRecordEvalErrorTotal.WithLabelValues(fmt.Sprintf("%d", rrc.datasourceId)).Inc()
return
}
value, warnings, err := rrc.promClients.GetCli(rrc.datasourceId).Query(context.Background(), promql, time.Now())
if err != nil {
logger.Errorf("eval:%s promql:%s, error:%v", rrc.Key(), promql, err)
rrc.stats.CounterRecordEvalErrorTotal.WithLabelValues().Inc()
rrc.stats.CounterRecordEvalErrorTotal.WithLabelValues(fmt.Sprintf("%d", rrc.datasourceId)).Inc()
return
}
if len(warnings) > 0 {
logger.Errorf("eval:%s promql:%s, warnings:%v", rrc.Key(), promql, warnings)
rrc.stats.CounterRecordEvalErrorTotal.WithLabelValues().Inc()
rrc.stats.CounterRecordEvalErrorTotal.WithLabelValues(fmt.Sprintf("%d", rrc.datasourceId)).Inc()
return
}
ts := ConvertToTimeSeries(value, rrc.rule)
if len(ts) != 0 {
rrc.promClients.GetWriterCli(rrc.datasourceId).Write(ts)
err := rrc.promClients.GetWriterCli(rrc.datasourceId).Write(ts)
if err != nil {
logger.Errorf("eval:%s promql:%s, error:%v", rrc.Key(), promql, err)
rrc.stats.CounterRecordEvalErrorTotal.WithLabelValues(fmt.Sprintf("%d", rrc.datasourceId)).Inc()
}
}
}

View File

@@ -3,6 +3,7 @@ package record
import (
"context"
"fmt"
"strconv"
"time"
"github.com/ccfos/nightingale/v6/alert/aconf"
@@ -68,7 +69,7 @@ func (s *Scheduler) syncRecordRules() {
datasourceIds := s.promClients.Hit(rule.DatasourceIdsJson)
for _, dsId := range datasourceIds {
if !naming.DatasourceHashRing.IsHit(dsId, fmt.Sprintf("%d", rule.Id), s.aconf.Heartbeat.Endpoint) {
if !naming.DatasourceHashRing.IsHit(strconv.FormatInt(dsId, 10), fmt.Sprintf("%d", rule.Id), s.aconf.Heartbeat.Endpoint) {
continue
}

View File

@@ -2,6 +2,7 @@ package router
import (
"fmt"
"strconv"
"strings"
"time"
@@ -102,7 +103,7 @@ func (rt *Router) makeEvent(c *gin.Context) {
ginx.BindJSON(c, &events)
//now := time.Now().Unix()
for i := 0; i < len(events); i++ {
node, err := naming.DatasourceHashRing.GetNode(events[i].DatasourceId, fmt.Sprintf("%d", events[i].RuleId))
node, err := naming.DatasourceHashRing.GetNode(strconv.FormatInt(events[i].DatasourceId, 10), fmt.Sprintf("%d", events[i].RuleId))
if err != nil {
logger.Warningf("event:%+v get node err:%v", events[i], err)
ginx.Bomb(200, "event node not exists")

View File

@@ -1,222 +1,137 @@
package sender
import (
"encoding/json"
"strconv"
"html/template"
"net/url"
"strings"
"time"
"github.com/ccfos/nightingale/v6/alert/aconf"
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/ibex"
"github.com/ccfos/nightingale/v6/pkg/poster"
"github.com/toolkits/pkg/logger"
)
func SendCallbacks(ctx *ctx.Context, urls []string, event *models.AlertCurEvent, targetCache *memsto.TargetCacheType, userCache *memsto.UserCacheType,
ibexConf aconf.Ibex, stats *astats.Stats) {
for _, url := range urls {
if url == "" {
continue
}
type (
// CallBacker 进行回调的接口
CallBacker interface {
CallBack(ctx CallBackContext)
}
if strings.HasPrefix(url, "${ibex}") {
if !event.IsRecovered {
handleIbex(ctx, url, event, targetCache, userCache, ibexConf)
}
continue
}
// CallBackContext 回调时所需的上下文
CallBackContext struct {
Ctx *ctx.Context
CallBackURL string
Users []*models.User
Rule *models.AlertRule
Events []*models.AlertCurEvent
Stats *astats.Stats
}
if !(strings.HasPrefix(url, "http://") || strings.HasPrefix(url, "https://")) {
url = "http://" + url
}
DefaultCallBacker struct{}
)
stats.AlertNotifyTotal.WithLabelValues("rule_callback").Inc()
resp, code, err := poster.PostJSON(url, 5*time.Second, event, 3)
if err != nil {
logger.Errorf("event_callback_fail(rule_id=%d url=%s), resp: %s, err: %v, code: %d", event.RuleId, url, string(resp), err, code)
stats.AlertNotifyErrorTotal.WithLabelValues("rule_callback").Inc()
} else {
logger.Infof("event_callback_succ(rule_id=%d url=%s), resp: %s, code: %d", event.RuleId, url, string(resp), code)
}
func BuildCallBackContext(ctx *ctx.Context, callBackURL string, rule *models.AlertRule, events []*models.AlertCurEvent,
uids []int64, userCache *memsto.UserCacheType, stats *astats.Stats) CallBackContext {
users := userCache.GetByUserIds(uids)
return CallBackContext{
Ctx: ctx,
CallBackURL: callBackURL,
Rule: rule,
Events: events,
Users: users,
Stats: stats,
}
}
type TaskForm struct {
Title string `json:"title"`
Account string `json:"account"`
Batch int `json:"batch"`
Tolerance int `json:"tolerance"`
Timeout int `json:"timeout"`
Pause string `json:"pause"`
Script string `json:"script"`
Args string `json:"args"`
Stdin string `json:"stdin"`
Action string `json:"action"`
Creator string `json:"creator"`
Hosts []string `json:"hosts"`
func ExtractAtsParams(rawURL string) []string {
ans := make([]string, 0, 1)
parsedURL, err := url.Parse(rawURL)
if err != nil {
logger.Errorf("ExtractAtsParams(url=%s), err: %v", rawURL, err)
return ans
}
queryParams := parsedURL.Query()
atParam := queryParams.Get("ats")
if atParam == "" {
return ans
}
// Split the atParam by comma and return the result as a slice
return strings.Split(atParam, ",")
}
func NewCallBacker(
key string,
targetCache *memsto.TargetCacheType,
userCache *memsto.UserCacheType,
taskTplCache *memsto.TaskTplCache,
tpls map[string]*template.Template,
) CallBacker {
switch key {
case models.IbexDomain: // Distribute to Ibex
return &IbexCallBacker{
targetCache: targetCache,
userCache: userCache,
taskTplCache: taskTplCache,
}
case models.DefaultDomain: // default callback
return &DefaultCallBacker{}
case models.DingtalkDomain:
return &DingtalkSender{tpl: tpls[models.Dingtalk]}
case models.WecomDomain:
return &WecomSender{tpl: tpls[models.Wecom]}
case models.FeishuDomain:
return &FeishuSender{tpl: tpls[models.Feishu]}
case models.FeishuCardDomain:
return &FeishuCardSender{tpl: tpls[models.FeishuCard]}
//case models.Mm:
// return &MmSender{tpl: tpls[models.Mm]}
case models.TelegramDomain:
return &TelegramSender{tpl: tpls[models.Telegram]}
}
return nil
}
func (c *DefaultCallBacker) CallBack(ctx CallBackContext) {
if len(ctx.CallBackURL) == 0 || len(ctx.Events) == 0 {
return
}
event := ctx.Events[0]
ctx.Stats.AlertNotifyTotal.WithLabelValues("rule_callback").Inc()
resp, code, err := poster.PostJSON(ctx.CallBackURL, 5*time.Second, event, 3)
if err != nil {
logger.Errorf("event_callback_fail(rule_id=%d url=%s), resp: %s, err: %v, code: %d",
event.RuleId, ctx.CallBackURL, string(resp), err, code)
ctx.Stats.AlertNotifyErrorTotal.WithLabelValues("rule_callback").Inc()
} else {
logger.Infof("event_callback_succ(rule_id=%d url=%s), resp: %s, code: %d",
event.RuleId, ctx.CallBackURL, string(resp), code)
}
}
func doSend(url string, body interface{}, channel string, stats *astats.Stats) {
stats.AlertNotifyTotal.WithLabelValues(channel).Inc()
res, code, err := poster.PostJSON(url, time.Second*5, body, 3)
if err != nil {
logger.Errorf("%s_sender: result=fail url=%s code=%d error=%v req:%v response=%s", channel, url, code, err, body, string(res))
stats.AlertNotifyErrorTotal.WithLabelValues(channel).Inc()
} else {
logger.Infof("%s_sender: result=succ url=%s code=%d req:%v response=%s", channel, url, code, body, string(res))
}
}
type TaskCreateReply struct {
Err string `json:"err"`
Dat int64 `json:"dat"` // task.id
}
func handleIbex(ctx *ctx.Context, url string, event *models.AlertCurEvent, targetCache *memsto.TargetCacheType, userCache *memsto.UserCacheType, ibexConf aconf.Ibex) {
arr := strings.Split(url, "/")
var idstr string
var host string
if len(arr) > 1 {
idstr = arr[1]
}
if len(arr) > 2 {
host = arr[2]
}
id, err := strconv.ParseInt(idstr, 10, 64)
if err != nil {
logger.Errorf("event_callback_ibex: failed to parse url: %s", url)
return
}
if host == "" {
// 用户在callback url中没有传入host就从event中解析
host = event.TargetIdent
}
if host == "" {
logger.Error("event_callback_ibex: failed to get host")
return
}
tpl, err := models.TaskTplGetById(ctx, id)
if err != nil {
logger.Errorf("event_callback_ibex: failed to get tpl: %v", err)
return
}
if tpl == nil {
logger.Errorf("event_callback_ibex: no such tpl(%d)", id)
return
}
// check perm
// tpl.GroupId - host - account 三元组校验权限
can, err := canDoIbex(ctx, tpl.UpdateBy, tpl, host, targetCache, userCache)
if err != nil {
logger.Errorf("event_callback_ibex: check perm fail: %v", err)
return
}
if !can {
logger.Errorf("event_callback_ibex: user(%s) no permission", tpl.UpdateBy)
return
}
tagsMap := make(map[string]string)
for i := 0; i < len(event.TagsJSON); i++ {
pair := strings.TrimSpace(event.TagsJSON[i])
if pair == "" {
continue
}
arr := strings.Split(pair, "=")
if len(arr) != 2 {
continue
}
tagsMap[arr[0]] = arr[1]
}
// 附加告警级别 告警触发值标签
tagsMap["alert_severity"] = strconv.Itoa(event.Severity)
tagsMap["alert_trigger_value"] = event.TriggerValue
tags, err := json.Marshal(tagsMap)
if err != nil {
logger.Errorf("event_callback_ibex: failed to marshal tags to json: %v", tagsMap)
return
}
// call ibex
in := TaskForm{
Title: tpl.Title + " FH: " + host,
Account: tpl.Account,
Batch: tpl.Batch,
Tolerance: tpl.Tolerance,
Timeout: tpl.Timeout,
Pause: tpl.Pause,
Script: tpl.Script,
Args: tpl.Args,
Stdin: string(tags),
Action: "start",
Creator: tpl.UpdateBy,
Hosts: []string{host},
}
var res TaskCreateReply
err = ibex.New(
ibexConf.Address,
ibexConf.BasicAuthUser,
ibexConf.BasicAuthPass,
ibexConf.Timeout,
).
Path("/ibex/v1/tasks").
In(in).
Out(&res).
POST()
if err != nil {
logger.Errorf("event_callback_ibex: call ibex fail: %v", err)
return
}
if res.Err != "" {
logger.Errorf("event_callback_ibex: call ibex response error: %v", res.Err)
return
}
// write db
record := models.TaskRecord{
Id: res.Dat,
EventId: event.Id,
GroupId: tpl.GroupId,
IbexAddress: ibexConf.Address,
IbexAuthUser: ibexConf.BasicAuthUser,
IbexAuthPass: ibexConf.BasicAuthPass,
Title: in.Title,
Account: in.Account,
Batch: in.Batch,
Tolerance: in.Tolerance,
Timeout: in.Timeout,
Pause: in.Pause,
Script: in.Script,
Args: in.Args,
CreateAt: time.Now().Unix(),
CreateBy: in.Creator,
}
if err = record.Add(ctx); err != nil {
logger.Errorf("event_callback_ibex: persist task_record fail: %v", err)
}
}
func canDoIbex(ctx *ctx.Context, username string, tpl *models.TaskTpl, host string, targetCache *memsto.TargetCacheType, userCache *memsto.UserCacheType) (bool, error) {
user := userCache.GetByUsername(username)
if user != nil && user.IsAdmin() {
return true, nil
}
target, has := targetCache.Get(host)
if !has {
return false, nil
}
return target.GroupId == tpl.GroupId, nil
}

View File

@@ -1,15 +1,9 @@
package sender
import (
"github.com/ccfos/nightingale/v6/models"
"html/template"
"strings"
"time"
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/poster"
"github.com/toolkits/pkg/logger"
)
type dingtalkMarkdown struct {
@@ -28,6 +22,10 @@ type dingtalk struct {
At dingtalkAt `json:"at"`
}
var (
_ CallBacker = (*DingtalkSender)(nil)
)
type DingtalkSender struct {
tpl *template.Template
}
@@ -41,7 +39,7 @@ func (ds *DingtalkSender) Send(ctx MessageContext) {
if len(urls) == 0 {
return
}
message := BuildTplMessage(ds.tpl, ctx.Events)
message := BuildTplMessage(models.Dingtalk, ds.tpl, ctx.Events)
for _, url := range urls {
var body dingtalk
@@ -72,6 +70,37 @@ func (ds *DingtalkSender) Send(ctx MessageContext) {
}
}
func (ds *DingtalkSender) CallBack(ctx CallBackContext) {
if len(ctx.Events) == 0 || len(ctx.CallBackURL) == 0 {
return
}
body := dingtalk{
Msgtype: "markdown",
Markdown: dingtalkMarkdown{
Title: ctx.Events[0].RuleName,
},
}
ats := ExtractAtsParams(ctx.CallBackURL)
message := BuildTplMessage(models.Dingtalk, ds.tpl, ctx.Events)
if len(ats) > 0 {
body.Markdown.Text = message + "\n@" + strings.Join(ats, "@")
body.At = dingtalkAt{
AtMobiles: ats,
IsAtAll: false,
}
} else {
// NoAt in url
body.Markdown.Text = message
}
doSend(ctx.CallBackURL, body, models.Dingtalk, ctx.Stats)
ctx.Stats.AlertNotifyTotal.WithLabelValues("rule_callback").Inc()
}
// extract urls and ats from Users
func (ds *DingtalkSender) extract(users []*models.User) ([]string, []string) {
urls := make([]string, 0, len(users))
@@ -91,15 +120,3 @@ func (ds *DingtalkSender) extract(users []*models.User) ([]string, []string) {
}
return urls, ats
}
func doSend(url string, body interface{}, channel string, stats *astats.Stats) {
stats.AlertNotifyTotal.WithLabelValues(channel).Inc()
res, code, err := poster.PostJSON(url, time.Second*5, body, 3)
if err != nil {
logger.Errorf("%s_sender: result=fail url=%s code=%d error=%v response=%s", channel, url, code, err, string(res))
stats.AlertNotifyErrorTotal.WithLabelValues(channel).Inc()
} else {
logger.Infof("%s_sender: result=succ url=%s code=%d response=%s", channel, url, code, string(res))
}
}

View File

@@ -7,6 +7,7 @@ import (
"time"
"github.com/ccfos/nightingale/v6/alert/aconf"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/toolkits/pkg/logger"
@@ -30,11 +31,11 @@ func (es *EmailSender) Send(ctx MessageContext) {
var subject string
if es.subjectTpl != nil {
subject = BuildTplMessage(es.subjectTpl, []*models.AlertCurEvent{ctx.Events[0]})
subject = BuildTplMessage(models.Email, es.subjectTpl, []*models.AlertCurEvent{ctx.Events[0]})
} else {
subject = ctx.Events[0].RuleName
}
content := BuildTplMessage(es.contentTpl, ctx.Events)
content := BuildTplMessage(models.Email, es.contentTpl, ctx.Events)
es.WriteEmail(subject, content, tos)
ctx.Stats.AlertNotifyTotal.WithLabelValues(models.Email).Add(float64(len(tos)))
@@ -85,12 +86,18 @@ func (es *EmailSender) WriteEmail(subject, content string, tos []string) {
func dialSmtp(d *gomail.Dialer) gomail.SendCloser {
for {
if s, err := d.Dial(); err != nil {
logger.Errorf("email_sender: failed to dial smtp: %s", err)
select {
case <-mailQuit:
// Note that Sendcloser is not obtained below,
// and the outgoing signal (with configuration changes) exits the current dial
return nil
default:
if s, err := d.Dial(); err != nil {
logger.Errorf("email_sender: failed to dial smtp: %s", err)
} else {
return s
}
time.Sleep(time.Second)
continue
} else {
return s
}
}
}
@@ -98,14 +105,31 @@ func dialSmtp(d *gomail.Dialer) gomail.SendCloser {
var mailQuit = make(chan struct{})
func RestartEmailSender(smtp aconf.SMTPConfig) {
close(mailQuit)
mailQuit = make(chan struct{})
// Notify internal start exit
mailQuit <- struct{}{}
startEmailSender(smtp)
}
func InitEmailSender(smtp aconf.SMTPConfig) {
var smtpConfig aconf.SMTPConfig
func InitEmailSender(ncc *memsto.NotifyConfigCacheType) {
mailch = make(chan *gomail.Message, 100000)
startEmailSender(smtp)
go updateSmtp(ncc)
smtpConfig = ncc.GetSMTP()
startEmailSender(smtpConfig)
}
func updateSmtp(ncc *memsto.NotifyConfigCacheType) {
for {
time.Sleep(1 * time.Minute)
smtp := ncc.GetSMTP()
if smtpConfig.Host != smtp.Host || smtpConfig.Batch != smtp.Batch || smtpConfig.From != smtp.From ||
smtpConfig.Pass != smtp.Pass || smtpConfig.User != smtp.User || smtpConfig.Port != smtp.Port ||
smtpConfig.InsecureSkipVerify != smtp.InsecureSkipVerify { //diff
smtpConfig = smtp
RestartEmailSender(smtp)
}
}
}
func startEmailSender(smtp aconf.SMTPConfig) {
@@ -135,6 +159,12 @@ func startEmailSender(smtp aconf.SMTPConfig) {
if !open {
s = dialSmtp(d)
if s == nil {
// Indicates that the dialing failed and exited the current goroutine directly,
// but put the Message back in the mailch
mailch <- m
return
}
open = true
}
if err := gomail.Send(s, m); err != nil {
@@ -146,6 +176,12 @@ func startEmailSender(smtp aconf.SMTPConfig) {
}
s = dialSmtp(d)
if s == nil {
// Indicates that the dialing failed and exited the current goroutine directly,
// but put the Message back in the mailch
mailch <- m
return
}
open = true
if err := gomail.Send(s, m); err != nil {

View File

@@ -1,6 +1,7 @@
package sender
import (
"fmt"
"html/template"
"strings"
@@ -22,16 +23,47 @@ type feishu struct {
At feishuAt `json:"at"`
}
var (
_ CallBacker = (*FeishuSender)(nil)
)
type FeishuSender struct {
tpl *template.Template
}
func (fs *FeishuSender) CallBack(ctx CallBackContext) {
if len(ctx.Events) == 0 || len(ctx.CallBackURL) == 0 {
return
}
ats := ExtractAtsParams(ctx.CallBackURL)
message := BuildTplMessage(models.Feishu, fs.tpl, ctx.Events)
if len(ats) > 0 {
atTags := ""
for _, at := range ats {
atTags += fmt.Sprintf("<at user_id=\"%s\"></at> ", at)
}
message = atTags + message
}
body := feishu{
Msgtype: "text",
Content: feishuContent{
Text: message,
},
}
doSend(ctx.CallBackURL, body, models.Feishu, ctx.Stats)
ctx.Stats.AlertNotifyTotal.WithLabelValues("rule_callback").Inc()
}
func (fs *FeishuSender) Send(ctx MessageContext) {
if len(ctx.Users) == 0 || len(ctx.Events) == 0 {
return
}
urls, ats := fs.extract(ctx.Users)
message := BuildTplMessage(fs.tpl, ctx.Events)
message := BuildTplMessage(models.Feishu, fs.tpl, ctx.Events)
for _, url := range urls {
body := feishu{
Msgtype: "text",

View File

@@ -3,6 +3,7 @@ package sender
import (
"fmt"
"html/template"
"net/url"
"strings"
"github.com/ccfos/nightingale/v6/models"
@@ -91,12 +92,43 @@ var (
}
)
func (fs *FeishuCardSender) CallBack(ctx CallBackContext) {
if len(ctx.Events) == 0 || len(ctx.CallBackURL) == 0 {
return
}
message := BuildTplMessage(models.FeishuCard, fs.tpl, ctx.Events)
color := "red"
lowerUnicode := strings.ToLower(message)
if strings.Count(lowerUnicode, Recovered) > 0 && strings.Count(lowerUnicode, Triggered) > 0 {
color = "orange"
} else if strings.Count(lowerUnicode, Recovered) > 0 {
color = "green"
}
SendTitle := fmt.Sprintf("🔔 %s", ctx.Events[0].RuleName)
body.Card.Header.Title.Content = SendTitle
body.Card.Header.Template = color
body.Card.Elements[0].Text.Content = message
body.Card.Elements[2].Elements[0].Content = SendTitle
// This is to be compatible with the feishucard interface, if with query string parameters, the request will fail
// Remove query parameters from the URL,
parsedURL, err := url.Parse(ctx.CallBackURL)
if err != nil {
return
}
parsedURL.RawQuery = ""
doSend(parsedURL.String(), body, models.FeishuCard, ctx.Stats)
}
func (fs *FeishuCardSender) Send(ctx MessageContext) {
if len(ctx.Users) == 0 || len(ctx.Events) == 0 {
return
}
urls, _ := fs.extract(ctx.Users)
message := BuildTplMessage(fs.tpl, ctx.Events)
message := BuildTplMessage(models.FeishuCard, fs.tpl, ctx.Events)
color := "red"
lowerUnicode := strings.ToLower(message)
if strings.Count(lowerUnicode, Recovered) > 0 && strings.Count(lowerUnicode, Triggered) > 0 {

265
alert/sender/ibex.go Normal file
View File

@@ -0,0 +1,265 @@
// @Author: Ciusyan 6/5/24
package sender
import (
"encoding/json"
"fmt"
"strconv"
"strings"
"time"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
imodels "github.com/flashcatcloud/ibex/src/models"
"github.com/flashcatcloud/ibex/src/storage"
"github.com/toolkits/pkg/logger"
)
var (
_ CallBacker = (*IbexCallBacker)(nil)
)
type IbexCallBacker struct {
targetCache *memsto.TargetCacheType
userCache *memsto.UserCacheType
taskTplCache *memsto.TaskTplCache
}
func (c *IbexCallBacker) CallBack(ctx CallBackContext) {
if len(ctx.CallBackURL) == 0 || len(ctx.Events) == 0 {
return
}
event := ctx.Events[0]
if event.IsRecovered {
return
}
c.handleIbex(ctx.Ctx, ctx.CallBackURL, event)
}
func (c *IbexCallBacker) handleIbex(ctx *ctx.Context, url string, event *models.AlertCurEvent) {
if imodels.DB() == nil && ctx.IsCenter {
logger.Warning("event_callback_ibex: db is nil")
return
}
arr := strings.Split(url, "/")
var idstr string
var host string
if len(arr) > 1 {
idstr = arr[1]
}
if len(arr) > 2 {
host = arr[2]
}
id, err := strconv.ParseInt(idstr, 10, 64)
if err != nil {
logger.Errorf("event_callback_ibex: failed to parse url: %s", url)
return
}
if host == "" {
// 用户在callback url中没有传入host就从event中解析
host = event.TargetIdent
}
if host == "" {
logger.Error("event_callback_ibex: failed to get host")
return
}
tpl := c.taskTplCache.Get(id)
if tpl == nil {
logger.Errorf("event_callback_ibex: no such tpl(%d)", id)
return
}
// check perm
// tpl.GroupId - host - account 三元组校验权限
can, err := canDoIbex(tpl.UpdateBy, tpl, host, c.targetCache, c.userCache)
if err != nil {
logger.Errorf("event_callback_ibex: check perm fail: %v", err)
return
}
if !can {
logger.Errorf("event_callback_ibex: user(%s) no permission", tpl.UpdateBy)
return
}
tagsMap := make(map[string]string)
for i := 0; i < len(event.TagsJSON); i++ {
pair := strings.TrimSpace(event.TagsJSON[i])
if pair == "" {
continue
}
arr := strings.Split(pair, "=")
if len(arr) != 2 {
continue
}
tagsMap[arr[0]] = arr[1]
}
// 附加告警级别 告警触发值标签
tagsMap["alert_severity"] = strconv.Itoa(event.Severity)
tagsMap["alert_trigger_value"] = event.TriggerValue
tags, err := json.Marshal(tagsMap)
if err != nil {
logger.Errorf("event_callback_ibex: failed to marshal tags to json: %v", tagsMap)
return
}
// call ibex
in := models.TaskForm{
Title: tpl.Title + " FH: " + host,
Account: tpl.Account,
Batch: tpl.Batch,
Tolerance: tpl.Tolerance,
Timeout: tpl.Timeout,
Pause: tpl.Pause,
Script: tpl.Script,
Args: tpl.Args,
Stdin: string(tags),
Action: "start",
Creator: tpl.UpdateBy,
Hosts: []string{host},
AlertTriggered: true,
}
id, err = TaskAdd(in, tpl.UpdateBy, ctx.IsCenter)
if err != nil {
logger.Errorf("event_callback_ibex: call ibex fail: %v", err)
return
}
// write db
record := models.TaskRecord{
Id: id,
EventId: event.Id,
GroupId: tpl.GroupId,
Title: in.Title,
Account: in.Account,
Batch: in.Batch,
Tolerance: in.Tolerance,
Timeout: in.Timeout,
Pause: in.Pause,
Script: in.Script,
Args: in.Args,
CreateAt: time.Now().Unix(),
CreateBy: in.Creator,
}
if err = record.Add(ctx); err != nil {
logger.Errorf("event_callback_ibex: persist task_record fail: %v", err)
}
}
func canDoIbex(username string, tpl *models.TaskTpl, host string, targetCache *memsto.TargetCacheType, userCache *memsto.UserCacheType) (bool, error) {
user := userCache.GetByUsername(username)
if user != nil && user.IsAdmin() {
return true, nil
}
target, has := targetCache.Get(host)
if !has {
return false, nil
}
return target.GroupId == tpl.GroupId, nil
}
func TaskAdd(f models.TaskForm, authUser string, isCenter bool) (int64, error) {
hosts := cleanHosts(f.Hosts)
if len(hosts) == 0 {
return 0, fmt.Errorf("arg(hosts) empty")
}
taskMeta := &imodels.TaskMeta{
Title: f.Title,
Account: f.Account,
Batch: f.Batch,
Tolerance: f.Tolerance,
Timeout: f.Timeout,
Pause: f.Pause,
Script: f.Script,
Args: f.Args,
Stdin: f.Stdin,
Creator: f.Creator,
}
err := taskMeta.CleanFields()
if err != nil {
return 0, err
}
taskMeta.HandleFH(hosts[0])
// 任务类型分为"告警规则触发"和"n9e center用户下发"两种;
// 边缘机房"告警规则触发"的任务不需要规划并且它可能是失联的无法使用db资源所以放入redis缓存中直接下发给agentd执行
if !isCenter && f.AlertTriggered {
if err := taskMeta.Create(); err != nil {
// 当网络不连通时生成唯一的id防止边缘机房中不同任务的id相同
// 方法是redis自增id去防止同一个机房的不同n9e edge生成的id相同
// 但没法防止不同边缘机房生成同样的id所以生成id的数据不会上报存入数据库只用于闭环执行。
taskMeta.Id, err = storage.IdGet()
if err != nil {
return 0, err
}
}
taskHost := imodels.TaskHost{
Id: taskMeta.Id,
Host: hosts[0],
Status: "running",
}
if err = taskHost.Create(); err != nil {
logger.Warningf("task_add_fail: authUser=%s title=%s err=%s", authUser, taskMeta.Title, err.Error())
}
// 缓存任务元信息和待下发的任务
err = taskMeta.Cache(hosts[0])
if err != nil {
return 0, err
}
} else {
// 如果是中心机房,还是保持之前的逻辑
err = taskMeta.Save(hosts, f.Action)
if err != nil {
return 0, err
}
}
logger.Infof("task_add_succ: authUser=%s title=%s", authUser, taskMeta.Title)
return taskMeta.Id, nil
}
func cleanHosts(formHosts []string) []string {
cnt := len(formHosts)
arr := make([]string, 0, cnt)
for i := 0; i < cnt; i++ {
item := strings.TrimSpace(formHosts[i])
if item == "" {
continue
}
if strings.HasPrefix(item, "#") {
continue
}
arr = append(arr, item)
}
return arr
}

View File

@@ -36,7 +36,7 @@ func (ms *MmSender) Send(ctx MessageContext) {
if len(urls) == 0 {
return
}
message := BuildTplMessage(ms.tpl, ctx.Events)
message := BuildTplMessage(models.Mm, ms.tpl, ctx.Events)
SendMM(MatterMostMessage{
Text: message,
@@ -45,6 +45,21 @@ func (ms *MmSender) Send(ctx MessageContext) {
})
}
func (ms *MmSender) CallBack(ctx CallBackContext) {
if len(ctx.Events) == 0 || len(ctx.CallBackURL) == 0 {
return
}
message := BuildTplMessage(models.Mm, ms.tpl, ctx.Events)
SendMM(MatterMostMessage{
Text: message,
Tokens: []string{ctx.CallBackURL},
Stats: ctx.Stats,
})
ctx.Stats.AlertNotifyTotal.WithLabelValues("rule_callback").Inc()
}
func (ms *MmSender) extract(users []*models.User) []string {
tokens := make([]string, 0, len(users))
for _, user := range users {

View File

@@ -55,11 +55,11 @@ func BuildMessageContext(rule *models.AlertRule, events []*models.AlertCurEvent,
}
}
type BuildTplMessageFunc func(tpl *template.Template, events []*models.AlertCurEvent) string
type BuildTplMessageFunc func(channel string, tpl *template.Template, events []*models.AlertCurEvent) string
var BuildTplMessage BuildTplMessageFunc = buildTplMessage
func buildTplMessage(tpl *template.Template, events []*models.AlertCurEvent) string {
func buildTplMessage(channel string, tpl *template.Template, events []*models.AlertCurEvent) string {
if tpl == nil {
return "tpl for current sender not found, please check configuration"
}

View File

@@ -21,16 +21,35 @@ type telegram struct {
Text string `json:"text"`
}
var (
_ CallBacker = (*TelegramSender)(nil)
)
type TelegramSender struct {
tpl *template.Template
}
func (ts *TelegramSender) CallBack(ctx CallBackContext) {
if len(ctx.Events) == 0 || len(ctx.CallBackURL) == 0 {
return
}
message := BuildTplMessage(models.Telegram, ts.tpl, ctx.Events)
SendTelegram(TelegramMessage{
Text: message,
Tokens: []string{ctx.CallBackURL},
Stats: ctx.Stats,
})
ctx.Stats.AlertNotifyTotal.WithLabelValues("rule_callback").Inc()
}
func (ts *TelegramSender) Send(ctx MessageContext) {
if len(ctx.Users) == 0 || len(ctx.Events) == 0 {
return
}
tokens := ts.extract(ctx.Users)
message := BuildTplMessage(ts.tpl, ctx.Events)
message := BuildTplMessage(models.Telegram, ts.tpl, ctx.Events)
SendTelegram(TelegramMessage{
Text: message,

View File

@@ -38,7 +38,7 @@ func SendWebhooks(webhooks []*models.Webhook, event *models.AlertCurEvent, stats
if len(conf.Headers) > 0 && len(conf.Headers)%2 == 0 {
for i := 0; i < len(conf.Headers); i += 2 {
if conf.Headers[i] == "host" {
if conf.Headers[i] == "host" || conf.Headers[i] == "Host" {
req.Host = conf.Headers[i+1]
continue
}
@@ -66,6 +66,6 @@ func SendWebhooks(webhooks []*models.Webhook, event *models.AlertCurEvent, stats
body, _ = io.ReadAll(resp.Body)
}
logger.Debugf("event_webhook_succ, url: %s, response code: %d, body: %s", conf.Url, resp.StatusCode, string(body))
logger.Debugf("event_webhook_succ, url: %s, response code: %d, body: %s event:%+v", conf.Url, resp.StatusCode, string(body), event)
}
}

View File

@@ -16,16 +16,37 @@ type wecom struct {
Markdown wecomMarkdown `json:"markdown"`
}
var (
_ CallBacker = (*WecomSender)(nil)
)
type WecomSender struct {
tpl *template.Template
}
func (ws *WecomSender) CallBack(ctx CallBackContext) {
if len(ctx.Events) == 0 || len(ctx.CallBackURL) == 0 {
return
}
message := BuildTplMessage(models.Wecom, ws.tpl, ctx.Events)
body := wecom{
Msgtype: "markdown",
Markdown: wecomMarkdown{
Content: message,
},
}
doSend(ctx.CallBackURL, body, models.Wecom, ctx.Stats)
ctx.Stats.AlertNotifyTotal.WithLabelValues("rule_callback").Inc()
}
func (ws *WecomSender) Send(ctx MessageContext) {
if len(ctx.Users) == 0 || len(ctx.Events) == 0 {
return
}
urls := ws.extract(ctx.Users)
message := BuildTplMessage(ws.tpl, ctx.Events)
message := BuildTplMessage(models.Wecom, ws.tpl, ctx.Events)
for _, url := range urls {
body := wecom{
Msgtype: "markdown",

View File

@@ -1,5 +1,7 @@
package cconf
import "time"
type Center struct {
Plugins []Plugin
MetricsYamlFile string
@@ -9,6 +11,8 @@ type Center struct {
MetricDesc MetricDescType
AnonymousAccess AnonymousAccess
UseFileAssets bool
FlashDuty FlashDuty
EventHistoryGroupView bool
}
type Plugin struct {
@@ -18,6 +22,12 @@ type Plugin struct {
TypeName string `json:"plugin_type_name"`
}
type FlashDuty struct {
Api string
Headers map[string]string
Timeout time.Duration
}
type AnonymousAccess struct {
PromQuerier bool
AlertDetail bool

View File

@@ -28,6 +28,14 @@ func LoadOpsYaml(configDir string, opsYamlFile string) error {
if !file.IsExist(fp) {
return nil
}
hash, _ := file.MD5(fp)
if hash == "2f91a9ed265cf2024e266dc1d538ee77" {
// ops.yaml 是老的默认文件,删除
file.Remove(fp)
return nil
}
return file.ReadYaml(fp, &Operations)
}
@@ -68,7 +76,8 @@ ops:
- "/dashboards/add"
- "/dashboards/put"
- "/dashboards/del"
- "/dashboards-built-in"
- "/embedded-dashboards/put"
- "/embedded-dashboards"
- name: alert
cname: 告警规则
@@ -77,7 +86,7 @@ ops:
- "/alert-rules/add"
- "/alert-rules/put"
- "/alert-rules/del"
- "/alert-rules-built-in"
- name: alert-mutes
cname: 告警静默管理
ops:
@@ -128,6 +137,7 @@ ops:
- "/targets/add"
- "/targets/put"
- "/targets/del"
- "/targets/bind"
- name: job
cname: 任务管理
@@ -139,6 +149,7 @@ ops:
- "/job-tasks"
- "/job-tasks/add"
- "/job-tasks/put"
- "/ibex-settings"
- name: user
cname: 用户管理
@@ -149,6 +160,11 @@ ops:
- "/user-groups/put"
- "/user-groups/del"
- name: permissions
cname: 权限管理
ops:
- "/permissions"
- name: busi-groups
cname: 业务分组管理
ops:
@@ -157,9 +173,26 @@ ops:
- "/busi-groups/put"
- "/busi-groups/del"
- name: builtin-metrics
cname: 指标视图
ops:
- "/metrics-built-in"
- "/builtin-metrics/add"
- "/builtin-metrics/put"
- "/builtin-metrics/del"
- name: built-in-components
cname: 模版中心
ops:
- "/built-in-components"
- "/built-in-components/add"
- "/built-in-components/put"
- "/built-in-components/del"
- name: system
cname: 系统信息
ops:
- "/help/variable-configs"
- "/help/version"
- "/help/servers"
- "/help/source"
@@ -167,5 +200,6 @@ ops:
- "/help/notification-tpls"
- "/help/notification-settings"
- "/help/migrate"
- "/site-settings"
`
)

View File

@@ -7,10 +7,13 @@ import (
"github.com/ccfos/nightingale/v6/alert"
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/alert/process"
alertrt "github.com/ccfos/nightingale/v6/alert/router"
"github.com/ccfos/nightingale/v6/center/cconf"
"github.com/ccfos/nightingale/v6/center/cconf/rsa"
"github.com/ccfos/nightingale/v6/center/cstats"
"github.com/ccfos/nightingale/v6/center/integration"
"github.com/ccfos/nightingale/v6/center/metas"
centerrt "github.com/ccfos/nightingale/v6/center/router"
"github.com/ccfos/nightingale/v6/center/sso"
"github.com/ccfos/nightingale/v6/conf"
"github.com/ccfos/nightingale/v6/dumper"
@@ -18,19 +21,19 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/models/migrate"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/flashduty"
"github.com/ccfos/nightingale/v6/pkg/httpx"
"github.com/ccfos/nightingale/v6/pkg/i18nx"
"github.com/ccfos/nightingale/v6/pkg/logx"
"github.com/ccfos/nightingale/v6/pkg/version"
"github.com/ccfos/nightingale/v6/prom"
"github.com/ccfos/nightingale/v6/pushgw/idents"
pushgwrt "github.com/ccfos/nightingale/v6/pushgw/router"
"github.com/ccfos/nightingale/v6/pushgw/writer"
"github.com/ccfos/nightingale/v6/storage"
"github.com/ccfos/nightingale/v6/tdengine"
alertrt "github.com/ccfos/nightingale/v6/alert/router"
centerrt "github.com/ccfos/nightingale/v6/center/router"
pushgwrt "github.com/ccfos/nightingale/v6/pushgw/router"
"github.com/flashcatcloud/ibex/src/cmd/ibex"
)
func Initialize(configDir string, cryptoKey string) (func(), error) {
@@ -51,6 +54,7 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
i18nx.Init(configDir)
cstats.Init()
flashduty.Init(config.Center.FlashDuty)
db, err := storage.New(config.DB)
if err != nil {
@@ -60,11 +64,14 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
migrate.Migrate(db)
models.InitRoot(ctx)
config.HTTP.JWTAuth.SigningKey = models.InitJWTSigningKey(ctx)
err = rsa.InitRSAConfig(ctx, &config.HTTP.RSA)
if err != nil {
return nil, err
}
integration.Init(ctx, config.Center.BuiltinIntegrationsDir)
var redis storage.Redis
redis, err = storage.NewRedis(config.Redis)
if err != nil {
@@ -72,13 +79,11 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
}
metas := metas.New(redis)
idents := idents.New(ctx)
idents := idents.New(ctx, redis)
syncStats := memsto.NewSyncStats()
alertStats := astats.NewSyncStats()
sso := sso.Init(config.Center, ctx)
configCache := memsto.NewConfigCache(ctx, syncStats, config.HTTP.RSA.RSAPrivateKey, config.HTTP.RSA.RSAPassWord)
busiGroupCache := memsto.NewBusiGroupCache(ctx, syncStats)
targetCache := memsto.NewTargetCache(ctx, syncStats, redis)
@@ -88,21 +93,23 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
notifyConfigCache := memsto.NewNotifyConfigCache(ctx, configCache)
userCache := memsto.NewUserCache(ctx, syncStats)
userGroupCache := memsto.NewUserGroupCache(ctx, syncStats)
taskTplCache := memsto.NewTaskTplCache(ctx)
promClients := prom.NewPromClient(ctx, config.Alert.Heartbeat)
sso := sso.Init(config.Center, ctx, configCache)
promClients := prom.NewPromClient(ctx)
tdengineClients := tdengine.NewTdengineClient(ctx, config.Alert.Heartbeat)
externalProcessors := process.NewExternalProcessors()
alert.Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache, alertRuleCache, notifyConfigCache, dsCache, ctx, promClients, tdengineClients, userCache, userGroupCache)
alert.Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache, alertRuleCache, notifyConfigCache, taskTplCache, dsCache, ctx, promClients, tdengineClients, userCache, userGroupCache)
writers := writer.NewWriters(config.Pushgw)
go version.GetGithubVersion()
alertrtRouter := alertrt.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors)
centerRouter := centerrt.New(config.HTTP, config.Center, cconf.Operations, dsCache, notifyConfigCache, promClients, tdengineClients,
centerRouter := centerrt.New(config.HTTP, config.Center, config.Alert, cconf.Operations, dsCache, notifyConfigCache, promClients, tdengineClients,
redis, sso, ctx, metas, idents, targetCache, userCache, userGroupCache)
pushgwRouter := pushgwrt.New(config.HTTP, config.Pushgw, targetCache, busiGroupCache, idents, writers, ctx)
pushgwRouter := pushgwrt.New(config.HTTP, config.Pushgw, config.Alert, targetCache, busiGroupCache, idents, metas, writers, ctx)
r := httpx.GinEngine(config.Global.RunMode, config.HTTP)
@@ -111,6 +118,11 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
pushgwRouter.Config(r)
dumper.ConfigRouter(r)
if config.Ibex.Enable {
migrate.MigrateIbexTables(db)
ibex.ServerStart(true, db, redis, config.HTTP.APIForService.BasicAuth, config.Alert.Heartbeat, &config.CenterApi, r, centerRouter, config.Ibex, config.HTTP.Port)
}
httpClean := httpx.Init(config.HTTP, r)
return func() {

365
center/integration/init.go Normal file
View File

@@ -0,0 +1,365 @@
package integration
import (
"encoding/json"
"path"
"strings"
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/toolkits/pkg/file"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/runner"
)
const SYSTEM = "system"
func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
fp := builtinIntegrationsDir
if fp == "" {
fp = path.Join(runner.Cwd, "integrations")
}
// var fileList []string
dirList, err := file.DirsUnder(fp)
if err != nil {
logger.Warning("read builtin component dir fail ", err)
return
}
for _, dir := range dirList {
// components icon
componentDir := fp + "/" + dir
component := models.BuiltinComponent{
Ident: dir,
}
// get logo name
// /api/n9e/integrations/icon/AliYun/aliyun.png
files, err := file.FilesUnder(componentDir + "/icon")
if err == nil && len(files) > 0 {
component.Logo = "/api/n9e/integrations/icon/" + component.Ident + "/" + files[0]
} else if err != nil {
logger.Warningf("read builtin component icon dir fail %s %v", component.Ident, err)
}
// get description
files, err = file.FilesUnder(componentDir + "/markdown")
if err == nil && len(files) > 0 {
var readmeFile string
for _, file := range files {
if strings.HasSuffix(strings.ToLower(file), "md") {
readmeFile = componentDir + "/markdown/" + file
break
}
}
if readmeFile != "" {
component.Readme, _ = file.ReadString(readmeFile)
}
} else if err != nil {
logger.Warningf("read builtin component markdown dir fail %s %v", component.Ident, err)
}
exists, _ := models.BuiltinComponentExists(ctx, &component)
if !exists {
err = component.Add(ctx, SYSTEM)
if err != nil {
logger.Warning("add builtin component fail ", component, err)
continue
}
} else {
old, err := models.BuiltinComponentGet(ctx, "ident = ?", component.Ident)
if err != nil {
logger.Warning("get builtin component fail ", component, err)
continue
}
if old == nil {
logger.Warning("get builtin component nil ", component)
continue
}
if old.UpdatedBy == SYSTEM {
now := time.Now().Unix()
old.CreatedAt = now
old.UpdatedAt = now
old.Readme = component.Readme
old.UpdatedBy = SYSTEM
err = models.DB(ctx).Model(old).Select("*").Updates(old).Error
if err != nil {
logger.Warning("update builtin component fail ", old, err)
}
}
}
// delete uuid is emtpy
err = models.DB(ctx).Exec("delete from builtin_payloads where uuid = 0 and type != 'collect' and (updated_by = 'system' or updated_by = '')").Error
if err != nil {
logger.Warning("delete builtin payloads fail ", err)
}
// delete builtin metrics uuid is emtpy
err = models.DB(ctx).Exec("delete from builtin_metrics where uuid = 0 and (updated_by = 'system' or updated_by = '')").Error
if err != nil {
logger.Warning("delete builtin metrics fail ", err)
}
// alerts
files, err = file.FilesUnder(componentDir + "/alerts")
if err == nil && len(files) > 0 {
for _, f := range files {
fp := componentDir + "/alerts/" + f
bs, err := file.ReadBytes(fp)
if err != nil {
logger.Warning("read builtin component alerts file fail ", f, err)
continue
}
alerts := []models.AlertRule{}
err = json.Unmarshal(bs, &alerts)
if err != nil {
logger.Warning("parse builtin component alerts file fail ", f, err)
continue
}
newAlerts := []models.AlertRule{}
writeAlertFileFlag := false
for _, alert := range alerts {
if alert.UUID == 0 {
writeAlertFileFlag = true
alert.UUID = time.Now().UnixNano()
}
newAlerts = append(newAlerts, alert)
content, err := json.Marshal(alert)
if err != nil {
logger.Warning("marshal builtin alert fail ", alert, err)
continue
}
cate := strings.Replace(f, ".json", "", -1)
builtinAlert := models.BuiltinPayload{
Component: component.Ident,
Type: "alert",
Cate: cate,
Name: alert.Name,
Tags: alert.AppendTags,
Content: string(content),
UUID: alert.UUID,
}
old, err := models.BuiltinPayloadGet(ctx, "uuid = ?", alert.UUID)
if err != nil {
logger.Warning("get builtin alert fail ", builtinAlert, err)
continue
}
if old == nil {
err := builtinAlert.Add(ctx, SYSTEM)
if err != nil {
logger.Warning("add builtin alert fail ", builtinAlert, err)
}
continue
}
if old.UpdatedBy == SYSTEM {
old.Content = string(content)
old.Name = alert.Name
old.Tags = alert.AppendTags
err = models.DB(ctx).Model(old).Select("*").Updates(old).Error
if err != nil {
logger.Warningf("update builtin alert:%+v fail %v", builtinAlert, err)
}
}
}
if writeAlertFileFlag {
bs, err = json.MarshalIndent(newAlerts, "", " ")
if err != nil {
logger.Warning("marshal builtin alerts fail ", newAlerts, err)
continue
}
_, err = file.WriteBytes(fp, bs)
if err != nil {
logger.Warning("write builtin alerts file fail ", f, err)
}
}
}
}
// dashboards
files, err = file.FilesUnder(componentDir + "/dashboards")
if err == nil && len(files) > 0 {
for _, f := range files {
fp := componentDir + "/dashboards/" + f
bs, err := file.ReadBytes(fp)
if err != nil {
logger.Warning("read builtin component dashboards file fail ", f, err)
continue
}
dashboard := BuiltinBoard{}
err = json.Unmarshal(bs, &dashboard)
if err != nil {
logger.Warning("parse builtin component dashboards file fail ", f, err)
continue
}
if dashboard.UUID == 0 {
dashboard.UUID = time.Now().UnixNano()
// 补全文件中的 uuid
bs, err = json.MarshalIndent(dashboard, "", " ")
if err != nil {
logger.Warning("marshal builtin dashboard fail ", dashboard, err)
continue
}
_, err = file.WriteBytes(fp, bs)
if err != nil {
logger.Warning("write builtin dashboard file fail ", f, err)
}
}
content, err := json.Marshal(dashboard)
if err != nil {
logger.Warning("marshal builtin dashboard fail ", dashboard, err)
continue
}
builtinDashboard := models.BuiltinPayload{
Component: component.Ident,
Type: "dashboard",
Cate: "",
Name: dashboard.Name,
Tags: dashboard.Tags,
Content: string(content),
UUID: dashboard.UUID,
}
old, err := models.BuiltinPayloadGet(ctx, "uuid = ?", dashboard.UUID)
if err != nil {
logger.Warning("get builtin alert fail ", builtinDashboard, err)
continue
}
if old == nil {
err := builtinDashboard.Add(ctx, SYSTEM)
if err != nil {
logger.Warning("add builtin alert fail ", builtinDashboard, err)
}
continue
}
if old.UpdatedBy == SYSTEM {
old.Content = string(content)
old.Name = dashboard.Name
old.Tags = dashboard.Tags
err = models.DB(ctx).Model(old).Select("*").Updates(old).Error
if err != nil {
logger.Warningf("update builtin alert:%+v fail %v", builtinDashboard, err)
}
}
}
} else if err != nil {
logger.Warningf("read builtin component dash dir fail %s %v", component.Ident, err)
}
// metrics
files, err = file.FilesUnder(componentDir + "/metrics")
if err == nil && len(files) > 0 {
for _, f := range files {
fp := componentDir + "/metrics/" + f
bs, err := file.ReadBytes(fp)
if err != nil {
logger.Warning("read builtin component metrics file fail", f, err)
continue
}
metrics := []models.BuiltinMetric{}
newMetrics := []models.BuiltinMetric{}
err = json.Unmarshal(bs, &metrics)
if err != nil {
logger.Warning("parse builtin component metrics file fail", f, err)
continue
}
writeMetricFileFlag := false
for _, metric := range metrics {
if metric.UUID == 0 {
writeMetricFileFlag = true
metric.UUID = time.Now().UnixNano()
}
newMetrics = append(newMetrics, metric)
old, err := models.BuiltinMetricGet(ctx, "uuid = ?", metric.UUID)
if err != nil {
logger.Warning("get builtin metrics fail ", metric, err)
continue
}
if old == nil {
err := metric.Add(ctx, SYSTEM)
if err != nil {
logger.Warning("add builtin metrics fail ", metric, err)
}
continue
}
if old.UpdatedBy == SYSTEM {
old.Collector = metric.Collector
old.Typ = metric.Typ
old.Name = metric.Name
old.Unit = metric.Unit
old.Note = metric.Note
old.Lang = metric.Lang
old.Expression = metric.Expression
err = models.DB(ctx).Model(old).Select("*").Updates(old).Error
if err != nil {
logger.Warningf("update builtin metric:%+v fail %v", metric, err)
}
}
}
if writeMetricFileFlag {
bs, err = json.MarshalIndent(newMetrics, "", " ")
if err != nil {
logger.Warning("marshal builtin metrics fail ", newMetrics, err)
continue
}
_, err = file.WriteBytes(fp, bs)
if err != nil {
logger.Warning("write builtin metrics file fail ", f, err)
}
}
}
} else if err != nil {
logger.Warningf("read builtin component metrics dir fail %s %v", component.Ident, err)
}
}
}
type BuiltinBoard struct {
Id int64 `json:"id" gorm:"primaryKey"`
GroupId int64 `json:"group_id"`
Name string `json:"name"`
Ident string `json:"ident"`
Tags string `json:"tags"`
CreateAt int64 `json:"create_at"`
CreateBy string `json:"create_by"`
UpdateAt int64 `json:"update_at"`
UpdateBy string `json:"update_by"`
Configs interface{} `json:"configs" gorm:"-"`
Public int `json:"public"` // 0: false, 1: true
PublicCate int `json:"public_cate"` // 0: anonymous, 1: login, 2: busi
Bgids []int64 `json:"bgids" gorm:"-"`
BuiltIn int `json:"built_in"` // 0: false, 1: true
Hide int `json:"hide"` // 0: false, 1: true
UUID int64 `json:"uuid"`
}

View File

@@ -2,6 +2,7 @@ package metas
import (
"context"
"encoding/json"
"sync"
"time"
@@ -90,15 +91,41 @@ func (s *Set) updateMeta(items map[string]models.HostMeta) {
}
func (s *Set) updateTargets(m map[string]models.HostMeta) error {
if s.redis == nil {
logger.Warningf("redis is nil")
return nil
}
count := int64(len(m))
if count == 0 {
return nil
}
newMap := make(map[string]interface{}, count)
extendMap := make(map[string]interface{})
for ident, meta := range m {
if meta.ExtendInfo != nil {
extendMeta := meta.ExtendInfo
meta.ExtendInfo = make(map[string]interface{})
extendMetaStr, err := json.Marshal(extendMeta)
if err != nil {
return err
}
extendMap[models.WrapExtendIdent(ident)] = extendMetaStr
}
newMap[models.WrapIdent(ident)] = meta
}
err := storage.MSet(context.Background(), s.redis, newMap)
if err != nil {
return err
}
if len(extendMap) > 0 {
err = storage.MSet(context.Background(), s.redis, extendMap)
if err != nil {
return err
}
}
return err
}

View File

@@ -8,6 +8,7 @@ import (
"strings"
"time"
"github.com/ccfos/nightingale/v6/alert/aconf"
"github.com/ccfos/nightingale/v6/center/cconf"
"github.com/ccfos/nightingale/v6/center/cstats"
"github.com/ccfos/nightingale/v6/center/metas"
@@ -33,6 +34,7 @@ import (
type Router struct {
HTTP httpx.Config
Center cconf.Center
Alert aconf.Alert
Operations cconf.Operation
DatasourceCache *memsto.DatasourceCacheType
NotifyConfigCache *memsto.NotifyConfigCacheType
@@ -46,14 +48,13 @@ type Router struct {
UserCache *memsto.UserCacheType
UserGroupCache *memsto.UserGroupCacheType
Ctx *ctx.Context
DatasourceCheckHook func(*gin.Context) bool
}
func New(httpConfig httpx.Config, center cconf.Center, operations cconf.Operation, ds *memsto.DatasourceCacheType, ncc *memsto.NotifyConfigCacheType, pc *prom.PromClientMap, tdendgineClients *tdengine.TdengineClientMap, redis storage.Redis, sso *sso.SsoClient, ctx *ctx.Context, metaSet *metas.Set, idents *idents.Set, tc *memsto.TargetCacheType, uc *memsto.UserCacheType, ugc *memsto.UserGroupCacheType) *Router {
func New(httpConfig httpx.Config, center cconf.Center, alert aconf.Alert, operations cconf.Operation, ds *memsto.DatasourceCacheType, ncc *memsto.NotifyConfigCacheType, pc *prom.PromClientMap, tdendgineClients *tdengine.TdengineClientMap, redis storage.Redis, sso *sso.SsoClient, ctx *ctx.Context, metaSet *metas.Set, idents *idents.Set, tc *memsto.TargetCacheType, uc *memsto.UserCacheType, ugc *memsto.UserGroupCacheType) *Router {
return &Router{
HTTP: httpConfig,
Center: center,
Alert: alert,
Operations: operations,
DatasourceCache: ds,
NotifyConfigCache: ncc,
@@ -67,8 +68,6 @@ func New(httpConfig httpx.Config, center cconf.Center, operations cconf.Operatio
UserCache: uc,
UserGroupCache: ugc,
Ctx: ctx,
DatasourceCheckHook: func(ctx *gin.Context) bool { return false },
}
}
@@ -93,14 +92,14 @@ func languageDetector(i18NHeaderKey string) gin.HandlerFunc {
lang := c.GetHeader(headerKey)
if lang != "" {
if strings.HasPrefix(lang, "zh") {
c.Request.Header.Set("X-Language", "zh")
c.Request.Header.Set("X-Language", "zh_CN")
} else if strings.HasPrefix(lang, "en") {
c.Request.Header.Set("X-Language", "en")
} else {
c.Request.Header.Set("X-Language", lang)
}
} else {
c.Request.Header.Set("X-Language", "en")
c.Request.Header.Set("X-Language", "zh_CN")
}
}
c.Next()
@@ -175,12 +174,11 @@ func (rt *Router) Config(r *gin.Engine) {
pages.POST("/tdengine-tables", rt.tdengineTables)
pages.POST("/tdengine-columns", rt.tdengineColumns)
pages.GET("/sql-template", rt.QuerySqlTemplate)
} else {
pages.Any("/proxy/:id/*url", rt.auth(), rt.dsProxy)
pages.POST("/query-range-batch", rt.auth(), rt.promBatchQueryRange)
pages.POST("/query-instant-batch", rt.auth(), rt.promBatchQueryInstant)
pages.GET("/datasource/brief", rt.auth(), rt.datasourceBriefs)
pages.GET("/datasource/brief", rt.auth(), rt.user(), rt.datasourceBriefs)
pages.POST("/ds-query", rt.auth(), rt.QueryData)
pages.POST("/logs-query", rt.auth(), rt.QueryLog)
@@ -190,8 +188,9 @@ func (rt *Router) Config(r *gin.Engine) {
pages.POST("/tdengine-columns", rt.auth(), rt.tdengineColumns)
}
pages.GET("/sql-template", rt.QuerySqlTemplate)
pages.POST("/auth/login", rt.jwtMock(), rt.loginPost)
pages.POST("/auth/logout", rt.jwtMock(), rt.auth(), rt.logoutPost)
pages.POST("/auth/logout", rt.jwtMock(), rt.auth(), rt.user(), rt.logoutPost)
pages.POST("/auth/refresh", rt.jwtMock(), rt.refreshPost)
pages.POST("/auth/captcha", rt.jwtMock(), rt.generateCaptcha)
pages.POST("/auth/captcha-verify", rt.jwtMock(), rt.captchaVerify)
@@ -230,6 +229,20 @@ func (rt *Router) Config(r *gin.Engine) {
pages.POST("/metric-views", rt.auth(), rt.user(), rt.metricViewAdd)
pages.PUT("/metric-views", rt.auth(), rt.user(), rt.metricViewPut)
pages.GET("/builtin-metric-filters", rt.auth(), rt.user(), rt.metricFilterGets)
pages.DELETE("/builtin-metric-filters", rt.auth(), rt.user(), rt.metricFilterDel)
pages.POST("/builtin-metric-filters", rt.auth(), rt.user(), rt.metricFilterAdd)
pages.PUT("/builtin-metric-filters", rt.auth(), rt.user(), rt.metricFilterPut)
pages.POST("/builtin-metric-promql", rt.auth(), rt.user(), rt.getMetricPromql)
pages.POST("/builtin-metrics", rt.auth(), rt.user(), rt.perm("/builtin-metrics/add"), rt.builtinMetricsAdd)
pages.PUT("/builtin-metrics", rt.auth(), rt.user(), rt.perm("/builtin-metrics/put"), rt.builtinMetricsPut)
pages.DELETE("/builtin-metrics", rt.auth(), rt.user(), rt.perm("/builtin-metrics/del"), rt.builtinMetricsDel)
pages.GET("/builtin-metrics", rt.auth(), rt.user(), rt.builtinMetricsGets)
pages.GET("/builtin-metrics/types", rt.auth(), rt.user(), rt.builtinMetricsTypes)
pages.GET("/builtin-metrics/types/default", rt.auth(), rt.user(), rt.builtinMetricsDefaultTypes)
pages.GET("/builtin-metrics/collectors", rt.auth(), rt.user(), rt.builtinMetricsCollectors)
pages.GET("/user-groups", rt.auth(), rt.user(), rt.userGroupGets)
pages.POST("/user-groups", rt.auth(), rt.user(), rt.perm("/user-groups/add"), rt.userGroupAdd)
pages.GET("/user-group/:id", rt.auth(), rt.user(), rt.userGroupGet)
@@ -249,6 +262,7 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/busi-group/:id/perm/:perm", rt.auth(), rt.user(), rt.checkBusiGroupPerm)
pages.GET("/targets", rt.auth(), rt.user(), rt.targetGets)
pages.GET("/target/extra-meta", rt.auth(), rt.user(), rt.targetExtendInfoByIdent)
pages.POST("/target/list", rt.auth(), rt.user(), rt.targetGetsByHostFilter)
pages.DELETE("/targets", rt.auth(), rt.user(), rt.perm("/targets/del"), rt.targetDel)
pages.GET("/targets/tags", rt.auth(), rt.user(), rt.targetGetTags)
@@ -260,17 +274,21 @@ func (rt *Router) Config(r *gin.Engine) {
pages.POST("/builtin-cate-favorite", rt.auth(), rt.user(), rt.builtinCateFavoriteAdd)
pages.DELETE("/builtin-cate-favorite/:name", rt.auth(), rt.user(), rt.builtinCateFavoriteDel)
pages.GET("/builtin-boards", rt.builtinBoardGets)
pages.GET("/builtin-board/:name", rt.builtinBoardGet)
pages.GET("/dashboards/builtin/list", rt.builtinBoardGets)
pages.GET("/builtin-boards-cates", rt.auth(), rt.user(), rt.builtinBoardCateGets)
pages.POST("/builtin-boards-detail", rt.auth(), rt.user(), rt.builtinBoardDetailGets)
pages.GET("/integrations/icon/:cate/:name", rt.builtinIcon)
pages.GET("/integrations/makedown/:cate", rt.builtinMarkdown)
// pages.GET("/builtin-boards", rt.builtinBoardGets)
// pages.GET("/builtin-board/:name", rt.builtinBoardGet)
// pages.GET("/dashboards/builtin/list", rt.builtinBoardGets)
// pages.GET("/builtin-boards-cates", rt.auth(), rt.user(), rt.builtinBoardCateGets)
// pages.POST("/builtin-boards-detail", rt.auth(), rt.user(), rt.builtinBoardDetailGets)
// pages.GET("/integrations/makedown/:cate", rt.builtinMarkdown)
pages.GET("/busi-groups/public-boards", rt.auth(), rt.user(), rt.perm("/dashboards"), rt.publicBoardGets)
pages.GET("/busi-groups/boards", rt.auth(), rt.user(), rt.perm("/dashboards"), rt.boardGetsByGids)
pages.GET("/busi-group/:id/boards", rt.auth(), rt.user(), rt.perm("/dashboards"), rt.bgro(), rt.boardGets)
pages.POST("/busi-group/:id/boards", rt.auth(), rt.user(), rt.perm("/dashboards/add"), rt.bgrw(), rt.boardAdd)
pages.POST("/busi-group/:id/board/:bid/clone", rt.auth(), rt.user(), rt.perm("/dashboards/add"), rt.bgrw(), rt.boardClone)
pages.POST("/busi-groups/boards/clones", rt.auth(), rt.user(), rt.perm("/dashboards/add"), rt.boardBatchClone)
pages.GET("/board/:bid", rt.boardGet)
pages.GET("/board/:bid/pure", rt.boardPureGet)
@@ -282,9 +300,11 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/share-charts", rt.chartShareGets)
pages.POST("/share-charts", rt.auth(), rt.chartShareAdd)
pages.GET("/alert-rules/builtin/alerts-cates", rt.auth(), rt.user(), rt.builtinAlertCateGets)
pages.GET("/alert-rules/builtin/list", rt.auth(), rt.user(), rt.builtinAlertRules)
// pages.GET("/alert-rules/builtin/alerts-cates", rt.auth(), rt.user(), rt.builtinAlertCateGets)
// pages.GET("/alert-rules/builtin/list", rt.auth(), rt.user(), rt.builtinAlertRules)
pages.GET("/alert-rules/callbacks", rt.auth(), rt.user(), rt.alertRuleCallbacks)
pages.GET("/busi-groups/alert-rules", rt.auth(), rt.user(), rt.perm("/alert-rules"), rt.alertRuleGetsByGids)
pages.GET("/busi-group/:id/alert-rules", rt.auth(), rt.user(), rt.perm("/alert-rules"), rt.alertRuleGets)
pages.POST("/busi-group/:id/alert-rules", rt.auth(), rt.user(), rt.perm("/alert-rules/add"), rt.bgrw(), rt.alertRuleAddByFE)
pages.POST("/busi-group/:id/alert-rules/import", rt.auth(), rt.user(), rt.perm("/alert-rules/add"), rt.bgrw(), rt.alertRuleAddByImport)
@@ -294,6 +314,7 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/alert-rule/:arid", rt.auth(), rt.user(), rt.perm("/alert-rules"), rt.alertRuleGet)
pages.PUT("/busi-group/alert-rule/validate", rt.auth(), rt.user(), rt.perm("/alert-rules/put"), rt.alertRuleValidation)
pages.GET("/busi-groups/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules"), rt.recordingRuleGetsByGids)
pages.GET("/busi-group/:id/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules"), rt.recordingRuleGets)
pages.POST("/busi-group/:id/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules/add"), rt.bgrw(), rt.recordingRuleAddByFE)
pages.DELETE("/busi-group/:id/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules/del"), rt.bgrw(), rt.recordingRuleDel)
@@ -301,13 +322,16 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/recording-rule/:rrid", rt.auth(), rt.user(), rt.perm("/recording-rules"), rt.recordingRuleGet)
pages.PUT("/busi-group/:id/recording-rules/fields", rt.auth(), rt.user(), rt.perm("/recording-rules/put"), rt.recordingRulePutFields)
pages.GET("/busi-groups/alert-mutes", rt.auth(), rt.user(), rt.perm("/alert-mutes"), rt.alertMuteGetsByGids)
pages.GET("/busi-group/:id/alert-mutes", rt.auth(), rt.user(), rt.perm("/alert-mutes"), rt.bgro(), rt.alertMuteGetsByBG)
pages.POST("/busi-group/:id/alert-mutes/preview", rt.auth(), rt.user(), rt.perm("/alert-mutes/add"), rt.bgrw(), rt.alertMutePreview)
pages.POST("/busi-group/:id/alert-mutes", rt.auth(), rt.user(), rt.perm("/alert-mutes/add"), rt.bgrw(), rt.alertMuteAdd)
pages.DELETE("/busi-group/:id/alert-mutes", rt.auth(), rt.user(), rt.perm("/alert-mutes/del"), rt.bgrw(), rt.alertMuteDel)
pages.PUT("/busi-group/:id/alert-mute/:amid", rt.auth(), rt.user(), rt.perm("/alert-mutes/put"), rt.alertMutePutByFE)
pages.GET("/busi-group/:id/alert-mute/:amid", rt.auth(), rt.user(), rt.perm("/alert-mutes"), rt.alertMuteGet)
pages.PUT("/busi-group/:id/alert-mutes/fields", rt.auth(), rt.user(), rt.perm("/alert-mutes/put"), rt.bgrw(), rt.alertMutePutFields)
pages.GET("/busi-groups/alert-subscribes", rt.auth(), rt.user(), rt.perm("/alert-subscribes"), rt.alertSubscribeGetsByGids)
pages.GET("/busi-group/:id/alert-subscribes", rt.auth(), rt.user(), rt.perm("/alert-subscribes"), rt.bgro(), rt.alertSubscribeGets)
pages.GET("/alert-subscribe/:sid", rt.auth(), rt.user(), rt.perm("/alert-subscribes"), rt.alertSubscribeGet)
pages.POST("/busi-group/:id/alert-subscribes", rt.auth(), rt.user(), rt.perm("/alert-subscribes/add"), rt.bgrw(), rt.alertSubscribeAdd)
@@ -318,15 +342,15 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/alert-cur-event/:eid", rt.alertCurEventGet)
pages.GET("/alert-his-event/:eid", rt.alertHisEventGet)
} else {
pages.GET("/alert-cur-event/:eid", rt.auth(), rt.alertCurEventGet)
pages.GET("/alert-his-event/:eid", rt.auth(), rt.alertHisEventGet)
pages.GET("/alert-cur-event/:eid", rt.auth(), rt.user(), rt.alertCurEventGet)
pages.GET("/alert-his-event/:eid", rt.auth(), rt.user(), rt.alertHisEventGet)
}
// card logic
pages.GET("/alert-cur-events/list", rt.auth(), rt.alertCurEventsList)
pages.GET("/alert-cur-events/card", rt.auth(), rt.alertCurEventsCard)
pages.GET("/alert-cur-events/list", rt.auth(), rt.user(), rt.alertCurEventsList)
pages.GET("/alert-cur-events/card", rt.auth(), rt.user(), rt.alertCurEventsCard)
pages.POST("/alert-cur-events/card/details", rt.auth(), rt.alertCurEventsCardDetails)
pages.GET("/alert-his-events/list", rt.auth(), rt.alertHisEventsList)
pages.GET("/alert-his-events/list", rt.auth(), rt.user(), rt.alertHisEventsList)
pages.DELETE("/alert-cur-events", rt.auth(), rt.user(), rt.perm("/alert-cur-events/del"), rt.alertCurEventDel)
pages.GET("/alert-cur-events/stats", rt.auth(), rt.alertCurEventsStatistics)
@@ -335,6 +359,7 @@ func (rt *Router) Config(r *gin.Engine) {
pages.POST("/alert-aggr-views", rt.auth(), rt.user(), rt.alertAggrViewAdd)
pages.PUT("/alert-aggr-views", rt.auth(), rt.user(), rt.alertAggrViewPut)
pages.GET("/busi-groups/task-tpls", rt.auth(), rt.user(), rt.perm("/job-tpls"), rt.taskTplGetsByGids)
pages.GET("/busi-group/:id/task-tpls", rt.auth(), rt.user(), rt.perm("/job-tpls"), rt.bgro(), rt.taskTplGets)
pages.POST("/busi-group/:id/task-tpls", rt.auth(), rt.user(), rt.perm("/job-tpls/add"), rt.bgrw(), rt.taskTplAdd)
pages.DELETE("/busi-group/:id/task-tpl/:tid", rt.auth(), rt.user(), rt.perm("/job-tpls/del"), rt.bgrw(), rt.taskTplDel)
@@ -343,15 +368,14 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/busi-group/:id/task-tpl/:tid", rt.auth(), rt.user(), rt.perm("/job-tpls"), rt.bgro(), rt.taskTplGet)
pages.PUT("/busi-group/:id/task-tpl/:tid", rt.auth(), rt.user(), rt.perm("/job-tpls/put"), rt.bgrw(), rt.taskTplPut)
pages.GET("/busi-groups/tasks", rt.auth(), rt.user(), rt.perm("/job-tasks"), rt.taskGetsByGids)
pages.GET("/busi-group/:id/tasks", rt.auth(), rt.user(), rt.perm("/job-tasks"), rt.bgro(), rt.taskGets)
pages.POST("/busi-group/:id/tasks", rt.auth(), rt.user(), rt.perm("/job-tasks/add"), rt.bgrw(), rt.taskAdd)
pages.GET("/busi-group/:id/task/*url", rt.auth(), rt.user(), rt.perm("/job-tasks"), rt.taskProxy)
pages.PUT("/busi-group/:id/task/*url", rt.auth(), rt.user(), rt.perm("/job-tasks/put"), rt.bgrw(), rt.taskProxy)
pages.GET("/servers", rt.auth(), rt.admin(), rt.serversGet)
pages.GET("/server-clusters", rt.auth(), rt.admin(), rt.serverClustersGet)
pages.GET("/servers", rt.auth(), rt.user(), rt.perm("/help/servers"), rt.serversGet)
pages.GET("/server-clusters", rt.auth(), rt.user(), rt.perm("/help/servers"), rt.serverClustersGet)
pages.POST("/datasource/list", rt.auth(), rt.datasourceList)
pages.POST("/datasource/list", rt.auth(), rt.user(), rt.datasourceList)
pages.POST("/datasource/plugin/list", rt.auth(), rt.pluginList)
pages.POST("/datasource/upsert", rt.auth(), rt.admin(), rt.datasourceUpsert)
pages.POST("/datasource/desc", rt.auth(), rt.admin(), rt.datasourceGet)
@@ -367,29 +391,29 @@ func (rt *Router) Config(r *gin.Engine) {
pages.PUT("/role/:id/ops", rt.auth(), rt.admin(), rt.roleBindOperation)
pages.GET("/operation", rt.operations)
pages.GET("/notify-tpls", rt.auth(), rt.admin(), rt.notifyTplGets)
pages.PUT("/notify-tpl/content", rt.auth(), rt.admin(), rt.notifyTplUpdateContent)
pages.PUT("/notify-tpl", rt.auth(), rt.admin(), rt.notifyTplUpdate)
pages.POST("/notify-tpl", rt.auth(), rt.admin(), rt.notifyTplAdd)
pages.DELETE("/notify-tpl/:id", rt.auth(), rt.admin(), rt.notifyTplDel)
pages.POST("/notify-tpl/preview", rt.auth(), rt.admin(), rt.notifyTplPreview)
pages.GET("/notify-tpls", rt.auth(), rt.user(), rt.notifyTplGets)
pages.PUT("/notify-tpl/content", rt.auth(), rt.user(), rt.notifyTplUpdateContent)
pages.PUT("/notify-tpl", rt.auth(), rt.user(), rt.notifyTplUpdate)
pages.POST("/notify-tpl", rt.auth(), rt.user(), rt.notifyTplAdd)
pages.DELETE("/notify-tpl/:id", rt.auth(), rt.user(), rt.notifyTplDel)
pages.POST("/notify-tpl/preview", rt.auth(), rt.user(), rt.notifyTplPreview)
pages.GET("/sso-configs", rt.auth(), rt.admin(), rt.ssoConfigGets)
pages.PUT("/sso-config", rt.auth(), rt.admin(), rt.ssoConfigUpdate)
pages.GET("/webhooks", rt.auth(), rt.admin(), rt.webhookGets)
pages.GET("/webhooks", rt.auth(), rt.user(), rt.webhookGets)
pages.PUT("/webhooks", rt.auth(), rt.admin(), rt.webhookPuts)
pages.GET("/notify-script", rt.auth(), rt.admin(), rt.notifyScriptGet)
pages.GET("/notify-script", rt.auth(), rt.user(), rt.perm("/help/notification-settings"), rt.notifyScriptGet)
pages.PUT("/notify-script", rt.auth(), rt.admin(), rt.notifyScriptPut)
pages.GET("/notify-channel", rt.auth(), rt.admin(), rt.notifyChannelGets)
pages.GET("/notify-channel", rt.auth(), rt.user(), rt.perm("/help/notification-settings"), rt.notifyChannelGets)
pages.PUT("/notify-channel", rt.auth(), rt.admin(), rt.notifyChannelPuts)
pages.GET("/notify-contact", rt.auth(), rt.admin(), rt.notifyContactGets)
pages.GET("/notify-contact", rt.auth(), rt.user(), rt.perm("/help/notification-settings"), rt.notifyContactGets)
pages.PUT("/notify-contact", rt.auth(), rt.admin(), rt.notifyContactPuts)
pages.GET("/notify-config", rt.auth(), rt.admin(), rt.notifyConfigGet)
pages.GET("/notify-config", rt.auth(), rt.user(), rt.perm("/help/notification-settings"), rt.notifyConfigGet)
pages.PUT("/notify-config", rt.auth(), rt.admin(), rt.notifyConfigPut)
pages.PUT("/smtp-config-test", rt.auth(), rt.admin(), rt.attemptSendEmail)
@@ -399,14 +423,29 @@ func (rt *Router) Config(r *gin.Engine) {
pages.PUT("/es-index-pattern", rt.auth(), rt.admin(), rt.esIndexPatternPut)
pages.DELETE("/es-index-pattern", rt.auth(), rt.admin(), rt.esIndexPatternDel)
pages.GET("/user-variable-configs", rt.auth(), rt.admin(), rt.userVariableConfigGets)
pages.POST("/user-variable-config", rt.auth(), rt.admin(), rt.userVariableConfigAdd)
pages.PUT("/user-variable-config/:id", rt.auth(), rt.admin(), rt.userVariableConfigPut)
pages.DELETE("/user-variable-config/:id", rt.auth(), rt.admin(), rt.userVariableConfigDel)
pages.GET("/user-variable-configs", rt.auth(), rt.user(), rt.perm("/help/variable-configs"), rt.userVariableConfigGets)
pages.POST("/user-variable-config", rt.auth(), rt.user(), rt.perm("/help/variable-configs"), rt.userVariableConfigAdd)
pages.PUT("/user-variable-config/:id", rt.auth(), rt.user(), rt.perm("/help/variable-configs"), rt.userVariableConfigPut)
pages.DELETE("/user-variable-config/:id", rt.auth(), rt.user(), rt.perm("/help/variable-configs"), rt.userVariableConfigDel)
pages.GET("/config", rt.auth(), rt.admin(), rt.configGetByKey)
pages.PUT("/config", rt.auth(), rt.admin(), rt.configPutByKey)
pages.GET("/site-info", rt.siteInfo)
// for admin api
pages.GET("/user/busi-groups", rt.auth(), rt.admin(), rt.userBusiGroupsGets)
pages.GET("/builtin-components", rt.auth(), rt.user(), rt.builtinComponentsGets)
pages.POST("/builtin-components", rt.auth(), rt.user(), rt.perm("/built-in-components/add"), rt.builtinComponentsAdd)
pages.PUT("/builtin-components", rt.auth(), rt.user(), rt.perm("/built-in-components/put"), rt.builtinComponentsPut)
pages.DELETE("/builtin-components", rt.auth(), rt.user(), rt.perm("/built-in-components/del"), rt.builtinComponentsDel)
pages.GET("/builtin-payloads", rt.auth(), rt.user(), rt.builtinPayloadsGets)
pages.GET("/builtin-payloads/cates", rt.auth(), rt.user(), rt.builtinPayloadcatesGet)
pages.POST("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/built-in-components/add"), rt.builtinPayloadsAdd)
pages.GET("/builtin-payload/:id", rt.auth(), rt.user(), rt.perm("/built-in-components"), rt.builtinPayloadGet)
pages.PUT("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/built-in-components/put"), rt.builtinPayloadsPut)
pages.DELETE("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/built-in-components/del"), rt.builtinPayloadsDel)
}
r.GET("/api/n9e/versions", func(c *gin.Context) {
@@ -432,18 +471,25 @@ func (rt *Router) Config(r *gin.Engine) {
{
service.Any("/prometheus/*url", rt.dsProxy)
service.POST("/users", rt.userAddPost)
service.PUT("/user/:id", rt.userProfilePutByService)
service.DELETE("/user/:id", rt.userDel)
service.GET("/users", rt.userFindAll)
service.GET("/user-groups", rt.userGroupGetsByService)
service.GET("/user-group-members", rt.userGroupMemberGetsByService)
service.GET("/targets", rt.targetGetsByService)
service.GET("/target/extra-meta", rt.targetExtendInfoByIdent)
service.POST("/target/list", rt.targetGetsByHostFilter)
service.DELETE("/targets", rt.targetDelByService)
service.GET("/targets/tags", rt.targetGetTags)
service.POST("/targets/tags", rt.targetBindTagsByService)
service.DELETE("/targets/tags", rt.targetUnbindTagsByService)
service.PUT("/targets/note", rt.targetUpdateNoteByService)
service.PUT("/targets/bgid", rt.targetUpdateBgidByService)
service.POST("/alert-rules", rt.alertRuleAddByService)
service.POST("/alert-rule-add", rt.alertRuleAddOneByService)
service.DELETE("/alert-rules", rt.alertRuleDelByService)
service.PUT("/alert-rule/:arid", rt.alertRulePutByService)
service.GET("/alert-rule/:arid", rt.alertRuleGet)
@@ -470,6 +516,8 @@ func (rt *Router) Config(r *gin.Engine) {
service.GET("/alert-his-event/:eid", rt.alertHisEventGet)
service.GET("/task-tpl/:tid", rt.taskTplGetByService)
service.GET("/task-tpls", rt.taskTplGetsByService)
service.GET("/task-tpl/statistics", rt.taskTplStatistics)
service.GET("/config/:id", rt.configGet)
service.GET("/configs", rt.configsGet)
@@ -488,6 +536,9 @@ func (rt *Router) Config(r *gin.Engine) {
service.POST("/task-record-add", rt.taskRecordAdd)
service.GET("/user-variable/decrypt", rt.userVariableGetDecryptByService)
service.GET("/targets-of-alert-rule", rt.targetsOfAlertRule)
}
}

View File

@@ -43,7 +43,6 @@ func (rt *Router) alertCurEventsCard(c *gin.Context) {
stime, etime := getTimeRange(c)
severity := ginx.QueryInt(c, "severity", -1)
query := ginx.QueryStr(c, "query", "")
busiGroupId := ginx.QueryInt64(c, "bgid", 0)
dsIds := queryDatasourceIds(c)
rules := parseAggrRules(c)
@@ -62,8 +61,11 @@ func (rt *Router) alertCurEventsCard(c *gin.Context) {
cates = strings.Split(cate, ",")
}
bgids, err := GetBusinessGroupIds(c, rt.Ctx, rt.Center.EventHistoryGroupView)
ginx.Dangerous(err)
// 最多获取50000个获取太多也没啥意义
list, err := models.AlertCurEventGets(rt.Ctx, prods, busiGroupId, stime, etime, severity, dsIds, cates, query, 50000, 0)
list, err := models.AlertCurEventGets(rt.Ctx, prods, bgids, stime, etime, severity, dsIds, cates, query, 50000, 0)
ginx.Dangerous(err)
cardmap := make(map[string]*AlertCard)
@@ -142,7 +144,6 @@ func (rt *Router) alertCurEventsList(c *gin.Context) {
severity := ginx.QueryInt(c, "severity", -1)
query := ginx.QueryStr(c, "query", "")
limit := ginx.QueryInt(c, "limit", 20)
busiGroupId := ginx.QueryInt64(c, "bgid", 0)
dsIds := queryDatasourceIds(c)
prod := ginx.QueryStr(c, "prods", "")
@@ -161,10 +162,13 @@ func (rt *Router) alertCurEventsList(c *gin.Context) {
cates = strings.Split(cate, ",")
}
total, err := models.AlertCurEventTotal(rt.Ctx, prods, busiGroupId, stime, etime, severity, dsIds, cates, query)
bgids, err := GetBusinessGroupIds(c, rt.Ctx, rt.Center.EventHistoryGroupView)
ginx.Dangerous(err)
list, err := models.AlertCurEventGets(rt.Ctx, prods, busiGroupId, stime, etime, severity, dsIds, cates, query, limit, ginx.Offset(c, limit))
total, err := models.AlertCurEventTotal(rt.Ctx, prods, bgids, stime, etime, severity, dsIds, cates, query)
ginx.Dangerous(err)
list, err := models.AlertCurEventGets(rt.Ctx, prods, bgids, stime, etime, severity, dsIds, cates, query, limit, ginx.Offset(c, limit))
ginx.Dangerous(err)
cache := make(map[int64]*models.UserGroup)
@@ -214,6 +218,10 @@ func (rt *Router) alertCurEventGet(c *gin.Context) {
ginx.Bomb(404, "No such active event")
}
if !rt.Center.AnonymousAccess.AlertDetail && rt.Center.EventHistoryGroupView {
rt.bgroCheck(c, event.GroupId)
}
ginx.NewRender(c).Data(event, nil)
}

View File

@@ -1,13 +1,16 @@
package router
import (
"fmt"
"strings"
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"golang.org/x/exp/slices"
)
func getTimeRange(c *gin.Context) (stime, etime int64) {
@@ -33,7 +36,6 @@ func (rt *Router) alertHisEventsList(c *gin.Context) {
recovered := ginx.QueryInt(c, "is_recovered", -1)
query := ginx.QueryStr(c, "query", "")
limit := ginx.QueryInt(c, "limit", 20)
busiGroupId := ginx.QueryInt64(c, "bgid", 0)
dsIds := queryDatasourceIds(c)
prod := ginx.QueryStr(c, "prods", "")
@@ -52,10 +54,13 @@ func (rt *Router) alertHisEventsList(c *gin.Context) {
cates = strings.Split(cate, ",")
}
total, err := models.AlertHisEventTotal(rt.Ctx, prods, busiGroupId, stime, etime, severity, recovered, dsIds, cates, query)
bgids, err := GetBusinessGroupIds(c, rt.Ctx, rt.Center.EventHistoryGroupView)
ginx.Dangerous(err)
list, err := models.AlertHisEventGets(rt.Ctx, prods, busiGroupId, stime, etime, severity, recovered, dsIds, cates, query, limit, ginx.Offset(c, limit))
total, err := models.AlertHisEventTotal(rt.Ctx, prods, bgids, stime, etime, severity, recovered, dsIds, cates, query)
ginx.Dangerous(err)
list, err := models.AlertHisEventGets(rt.Ctx, prods, bgids, stime, etime, severity, recovered, dsIds, cates, query, limit, ginx.Offset(c, limit))
ginx.Dangerous(err)
cache := make(map[int64]*models.UserGroup)
@@ -78,5 +83,50 @@ func (rt *Router) alertHisEventGet(c *gin.Context) {
ginx.Bomb(404, "No such alert event")
}
if !rt.Center.AnonymousAccess.AlertDetail && rt.Center.EventHistoryGroupView {
rt.bgroCheck(c, event.GroupId)
}
ginx.NewRender(c).Data(event, err)
}
func GetBusinessGroupIds(c *gin.Context, ctx *ctx.Context, eventHistoryGroupView bool) ([]int64, error) {
bgid := ginx.QueryInt64(c, "bgid", 0)
var bgids []int64
if !eventHistoryGroupView || strings.HasPrefix(c.Request.URL.Path, "/v1") {
if bgid > 0 {
return []int64{bgid}, nil
}
return bgids, nil
}
user := c.MustGet("user").(*models.User)
if user.IsAdmin() {
if bgid > 0 {
return []int64{bgid}, nil
}
return bgids, nil
}
bussGroupIds, err := models.MyBusiGroupIds(ctx, user.Id)
if err != nil {
return nil, err
}
if len(bussGroupIds) == 0 {
// 如果没查到用户属于任何业务组需要返回一个0否则会导致查询到全部告警历史
return []int64{0}, nil
}
if bgid > 0 && !slices.Contains(bussGroupIds, bgid) {
return nil, fmt.Errorf("business group ID not allowed")
}
if bgid > 0 {
// Pass filter parameters, priority to use
return []int64{bgid}, nil
}
return bussGroupIds, nil
}

View File

@@ -11,6 +11,7 @@ import (
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
"github.com/toolkits/pkg/str"
)
// Return all, front-end search and paging
@@ -27,6 +28,37 @@ func (rt *Router) alertRuleGets(c *gin.Context) {
ginx.NewRender(c).Data(ars, err)
}
func (rt *Router) alertRuleGetsByGids(c *gin.Context) {
gids := str.IdsInt64(ginx.QueryStr(c, "gids", ""), ",")
if len(gids) > 0 {
for _, gid := range gids {
rt.bgroCheck(c, gid)
}
} else {
me := c.MustGet("user").(*models.User)
if !me.IsAdmin() {
var err error
gids, err = models.MyBusiGroupIds(rt.Ctx, me.Id)
ginx.Dangerous(err)
if len(gids) == 0 {
ginx.NewRender(c).Data([]int{}, nil)
return
}
}
}
ars, err := models.AlertRuleGetsByBGIds(rt.Ctx, gids)
if err == nil {
cache := make(map[int64]*models.UserGroup)
for i := 0; i < len(ars); i++ {
ars[i].FillNotifyGroups(rt.Ctx, cache)
ars[i].FillSeverities()
}
}
ginx.NewRender(c).Data(ars, err)
}
func (rt *Router) alertRulesGetByService(c *gin.Context) {
prods := []string{}
prodStr := ginx.QueryStr(c, "prods", "")
@@ -101,6 +133,17 @@ func (rt *Router) alertRuleAddByService(c *gin.Context) {
ginx.NewRender(c).Data(reterr, nil)
}
func (rt *Router) alertRuleAddOneByService(c *gin.Context) {
var f models.AlertRule
ginx.BindJSON(c, &f)
err := f.FE2DB()
ginx.Dangerous(err)
err = f.Add(rt.Ctx)
ginx.NewRender(c).Data(f.Id, err)
}
func (rt *Router) alertRuleAddForService(lst []models.AlertRule, username string) map[string]string {
count := len(lst)
// alert rule name -> error string
@@ -323,3 +366,25 @@ func (rt *Router) alertRuleValidation(c *gin.Context) {
ginx.NewRender(c).Message("")
}
func (rt *Router) alertRuleCallbacks(c *gin.Context) {
user := c.MustGet("user").(*models.User)
bussGroupIds, err := models.MyBusiGroupIds(rt.Ctx, user.Id)
ginx.Dangerous(err)
ars, err := models.AlertRuleGetsByBGIds(rt.Ctx, bussGroupIds)
ginx.Dangerous(err)
var callbacks []string
callbackFilter := make(map[string]struct{})
for i := range ars {
for _, callback := range ars[i].CallbacksJSON {
if _, ok := callbackFilter[callback]; !ok {
callbackFilter[callback] = struct{}{}
callbacks = append(callbacks, callback)
}
}
}
ginx.NewRender(c).Data(callbacks, nil)
}

View File

@@ -8,6 +8,7 @@ import (
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/str"
)
// Return all, front-end search and paging
@@ -21,7 +22,43 @@ func (rt *Router) alertSubscribeGets(c *gin.Context) {
for i := 0; i < len(lst); i++ {
ginx.Dangerous(lst[i].FillUserGroups(rt.Ctx, ugcache))
ginx.Dangerous(lst[i].FillRuleName(rt.Ctx, rulecache))
ginx.Dangerous(lst[i].FillRuleNames(rt.Ctx, rulecache))
ginx.Dangerous(lst[i].FillDatasourceIds(rt.Ctx))
ginx.Dangerous(lst[i].DB2FE())
}
ginx.NewRender(c).Data(lst, err)
}
func (rt *Router) alertSubscribeGetsByGids(c *gin.Context) {
gids := str.IdsInt64(ginx.QueryStr(c, "gids", ""), ",")
if len(gids) > 0 {
for _, gid := range gids {
rt.bgroCheck(c, gid)
}
} else {
me := c.MustGet("user").(*models.User)
if !me.IsAdmin() {
var err error
gids, err = models.MyBusiGroupIds(rt.Ctx, me.Id)
ginx.Dangerous(err)
if len(gids) == 0 {
ginx.NewRender(c).Data([]int{}, nil)
return
}
}
}
lst, err := models.AlertSubscribeGetsByBGIds(rt.Ctx, gids)
ginx.Dangerous(err)
ugcache := make(map[int64]*models.UserGroup)
rulecache := make(map[int64]string)
for i := 0; i < len(lst); i++ {
ginx.Dangerous(lst[i].FillUserGroups(rt.Ctx, ugcache))
ginx.Dangerous(lst[i].FillRuleNames(rt.Ctx, rulecache))
ginx.Dangerous(lst[i].FillDatasourceIds(rt.Ctx))
ginx.Dangerous(lst[i].DB2FE())
}
@@ -44,7 +81,7 @@ func (rt *Router) alertSubscribeGet(c *gin.Context) {
ginx.Dangerous(sub.FillUserGroups(rt.Ctx, ugcache))
rulecache := make(map[int64]string)
ginx.Dangerous(sub.FillRuleName(rt.Ctx, rulecache))
ginx.Dangerous(sub.FillRuleNames(rt.Ctx, rulecache))
ginx.Dangerous(sub.FillDatasourceIds(rt.Ctx))
ginx.Dangerous(sub.DB2FE())
@@ -76,6 +113,9 @@ func (rt *Router) alertSubscribePut(c *gin.Context) {
for i := 0; i < len(fs); i++ {
fs[i].UpdateBy = username
fs[i].UpdateAt = timestamp
//After adding the function of batch subscription alert rules, rule_ids is used instead of rule_id.
//When the subscription rules are updated, set rule_id=0 to prevent the wrong subscription caused by the old rule_id.
fs[i].RuleId = 0
ginx.Dangerous(fs[i].Update(
rt.Ctx,
"name",
@@ -85,6 +125,7 @@ func (rt *Router) alertSubscribePut(c *gin.Context) {
"datasource_ids",
"cluster",
"rule_id",
"rule_ids",
"tags",
"redefine_severity",
"new_severity",
@@ -99,6 +140,7 @@ func (rt *Router) alertSubscribePut(c *gin.Context) {
"severities",
"extra_config",
"busi_groups",
"note",
))
}

View File

@@ -1,22 +1,26 @@
package router
import (
"fmt"
"net/http"
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
"github.com/toolkits/pkg/str"
)
type boardForm struct {
Name string `json:"name"`
Ident string `json:"ident"`
Tags string `json:"tags"`
Configs string `json:"configs"`
Public int `json:"public"`
Name string `json:"name"`
Ident string `json:"ident"`
Tags string `json:"tags"`
Configs string `json:"configs"`
Public int `json:"public"`
PublicCate int `json:"public_cate"`
Bgids []int64 `json:"bgids"`
}
func (rt *Router) boardAdd(c *gin.Context) {
@@ -65,6 +69,28 @@ func (rt *Router) boardGet(c *gin.Context) {
}
}
if board.PublicCate == models.PublicLogin {
rt.auth()(c)
} else if board.PublicCate == models.PublicBusi {
rt.auth()(c)
rt.user()(c)
me := c.MustGet("user").(*models.User)
if !me.IsAdmin() {
bgids, err := models.MyBusiGroupIds(rt.Ctx, me.Id)
ginx.Dangerous(err)
if len(bgids) == 0 {
ginx.Bomb(http.StatusForbidden, "forbidden")
}
ok, err := models.BoardBusigroupCheck(rt.Ctx, board.Id, bgids)
ginx.Dangerous(err)
if !ok {
ginx.Bomb(http.StatusForbidden, "forbidden")
}
}
}
ginx.NewRender(c).Data(board, nil)
}
@@ -192,10 +218,20 @@ func (rt *Router) boardPutPublic(c *gin.Context) {
}
bo.Public = f.Public
bo.PublicCate = f.PublicCate
if bo.PublicCate == models.PublicBusi {
err := models.BoardBusigroupUpdate(rt.Ctx, bo.Id, f.Bgids)
ginx.Dangerous(err)
} else {
err := models.BoardBusigroupDelByBoardId(rt.Ctx, bo.Id)
ginx.Dangerous(err)
}
bo.UpdateBy = me.Username
bo.UpdateAt = time.Now().Unix()
err := bo.Update(rt.Ctx, "public", "update_by", "update_at")
err := bo.Update(rt.Ctx, "public", "public_cate", "update_by", "update_at")
ginx.NewRender(c).Data(bo, err)
}
@@ -207,21 +243,64 @@ func (rt *Router) boardGets(c *gin.Context) {
ginx.NewRender(c).Data(boards, err)
}
func (rt *Router) publicBoardGets(c *gin.Context) {
me := c.MustGet("user").(*models.User)
bgids, err := models.MyBusiGroupIds(rt.Ctx, me.Id)
ginx.Dangerous(err)
boardIds, err := models.BoardIdsByBusiGroupIds(rt.Ctx, bgids)
ginx.Dangerous(err)
boards, err := models.BoardGets(rt.Ctx, "", "public=1 and (public_cate in (?) or id in (?))", []int64{0, 1}, boardIds)
ginx.NewRender(c).Data(boards, err)
}
func (rt *Router) boardGetsByGids(c *gin.Context) {
gids := str.IdsInt64(ginx.QueryStr(c, "gids", ""), ",")
query := ginx.QueryStr(c, "query", "")
if len(gids) > 0 {
for _, gid := range gids {
rt.bgroCheck(c, gid)
}
} else {
me := c.MustGet("user").(*models.User)
if !me.IsAdmin() {
var err error
gids, err = models.MyBusiGroupIds(rt.Ctx, me.Id)
ginx.Dangerous(err)
if len(gids) == 0 {
ginx.NewRender(c).Data([]int{}, nil)
return
}
}
}
boardBusigroups, err := models.BoardBusigroupGets(rt.Ctx)
ginx.Dangerous(err)
m := make(map[int64][]int64)
for _, boardBusigroup := range boardBusigroups {
m[boardBusigroup.BoardId] = append(m[boardBusigroup.BoardId], boardBusigroup.BusiGroupId)
}
boards, err := models.BoardGetsByBGIds(rt.Ctx, gids, query)
ginx.Dangerous(err)
for i := 0; i < len(boards); i++ {
if ids, ok := m[boards[i].Id]; ok {
boards[i].Bgids = ids
}
}
ginx.NewRender(c).Data(boards, err)
}
func (rt *Router) boardClone(c *gin.Context) {
me := c.MustGet("user").(*models.User)
bo := rt.Board(ginx.UrlParamInt64(c, "bid"))
newBoard := &models.Board{
Name: bo.Name + " Copy",
Tags: bo.Tags,
GroupId: bo.GroupId,
CreateBy: me.Username,
UpdateBy: me.Username,
}
if bo.Ident != "" {
newBoard.Ident = uuid.NewString()
}
newBoard := bo.Clone(me.Username, bo.GroupId, " Cloned")
ginx.Dangerous(newBoard.Add(rt.Ctx))
@@ -235,3 +314,39 @@ func (rt *Router) boardClone(c *gin.Context) {
ginx.NewRender(c).Message(nil)
}
type boardsForm struct {
BoardIds []int64 `json:"board_ids"`
Bgids []int64 `json:"bgids"`
}
func (rt *Router) boardBatchClone(c *gin.Context) {
me := c.MustGet("user").(*models.User)
var f boardsForm
ginx.BindJSON(c, &f)
for _, bgid := range f.Bgids {
rt.bgrwCheck(c, bgid)
}
reterr := make(map[string]string, len(f.BoardIds))
lang := c.GetHeader("X-Language")
for _, bgid := range f.Bgids {
for _, bid := range f.BoardIds {
bo := rt.Board(bid)
newBoard := bo.Clone(me.Username, bgid, "")
payload, err := models.BoardPayloadGet(rt.Ctx, bo.Id)
if err != nil {
reterr[fmt.Sprintf("%s-%d", newBoard.Name, bgid)] = i18n.Sprintf(lang, err.Error())
continue
}
if err = newBoard.AtomicAdd(rt.Ctx, payload); err != nil {
reterr[fmt.Sprintf("%s-%d", newBoard.Name, bgid)] = i18n.Sprintf(lang, err.Error())
}
}
}
ginx.NewRender(c).Data(reterr, nil)
}

View File

@@ -0,0 +1,66 @@
package router
import (
"net/http"
"github.com/ccfos/nightingale/v6/models"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) builtinComponentsAdd(c *gin.Context) {
var lst []models.BuiltinComponent
ginx.BindJSON(c, &lst)
username := Username(c)
count := len(lst)
if count == 0 {
ginx.Bomb(http.StatusBadRequest, "input json is empty")
}
reterr := make(map[string]string)
for i := 0; i < count; i++ {
if err := lst[i].Add(rt.Ctx, username); err != nil {
reterr[lst[i].Ident] = err.Error()
}
}
ginx.NewRender(c).Data(reterr, nil)
}
func (rt *Router) builtinComponentsGets(c *gin.Context) {
query := ginx.QueryStr(c, "query", "")
bc, err := models.BuiltinComponentGets(rt.Ctx, query)
ginx.Dangerous(err)
ginx.NewRender(c).Data(bc, nil)
}
func (rt *Router) builtinComponentsPut(c *gin.Context) {
var req models.BuiltinComponent
ginx.BindJSON(c, &req)
bc, err := models.BuiltinComponentGet(rt.Ctx, "id = ?", req.ID)
ginx.Dangerous(err)
if bc == nil {
ginx.NewRender(c, http.StatusNotFound).Message("No such builtin component")
return
}
username := Username(c)
req.UpdatedBy = username
ginx.NewRender(c).Message(bc.Update(rt.Ctx, req))
}
func (rt *Router) builtinComponentsDel(c *gin.Context) {
var req idsForm
ginx.BindJSON(c, &req)
req.Verify()
ginx.NewRender(c).Message(models.BuiltinComponentDels(rt.Ctx, req.Ids))
}

View File

@@ -0,0 +1,120 @@
package router
import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/prom"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) metricFilterGets(c *gin.Context) {
lst, err := models.MetricFilterGets(rt.Ctx, "")
ginx.Dangerous(err)
me := c.MustGet("user").(*models.User)
gids, err := models.MyGroupIds(rt.Ctx, me.Id)
ginx.Dangerous(err)
arr := make([]models.MetricFilter, 0)
for _, f := range lst {
if me.Username == f.CreateBy {
arr = append(arr, f)
continue
}
if HasPerm(gids, f.GroupsPerm, false) {
arr = append(arr, f)
}
}
ginx.NewRender(c).Data(arr, err)
}
func (rt *Router) metricFilterAdd(c *gin.Context) {
var f models.MetricFilter
ginx.BindJSON(c, &f)
me := c.MustGet("user").(*models.User)
f.CreateBy = me.Username
f.UpdateBy = me.Username
ginx.Dangerous(f.Add(rt.Ctx))
ginx.NewRender(c).Data(f, nil)
}
func (rt *Router) metricFilterDel(c *gin.Context) {
var f idsForm
ginx.BindJSON(c, &f)
f.Verify()
me := c.MustGet("user").(*models.User)
for _, id := range f.Ids {
old, err := models.MetricFilterGet(rt.Ctx, id)
ginx.Dangerous(err)
if me.Username != old.CreateBy {
gids, err := models.MyGroupIds(rt.Ctx, me.Id)
ginx.Dangerous(err)
if !HasPerm(gids, old.GroupsPerm, true) {
ginx.NewRender(c).Message("no permission")
return
}
}
}
ginx.NewRender(c).Message(models.MetricFilterDel(rt.Ctx, f.Ids))
}
func (rt *Router) metricFilterPut(c *gin.Context) {
var f models.MetricFilter
ginx.BindJSON(c, &f)
me := c.MustGet("user").(*models.User)
old, err := models.MetricFilterGet(rt.Ctx, f.ID)
ginx.Dangerous(err)
if me.Username != old.CreateBy {
gids, err := models.MyGroupIds(rt.Ctx, me.Id)
ginx.Dangerous(err)
if !HasPerm(gids, old.GroupsPerm, true) {
ginx.NewRender(c).Message("no permission")
return
}
}
f.UpdateBy = me.Username
ginx.NewRender(c).Message(f.Update(rt.Ctx))
}
type metricPromqlReq struct {
LabelFilter string `json:"label_filter"`
Promql string `json:"promql"`
}
func (rt *Router) getMetricPromql(c *gin.Context) {
var req metricPromqlReq
ginx.BindJSON(c, &req)
promql := prom.AddLabelToPromQL(req.LabelFilter, req.Promql)
ginx.NewRender(c).Data(promql, nil)
}
func HasPerm(gids []int64, gps []models.GroupPerm, checkWrite bool) bool {
gmap := make(map[int64]struct{})
for _, gp := range gps {
if checkWrite && !gp.Write {
continue
}
gmap[gp.Gid] = struct{}{}
}
for _, gid := range gids {
if _, ok := gmap[gid]; ok {
return true
}
}
return false
}

View File

@@ -0,0 +1,116 @@
package router
import (
"net/http"
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
)
// single or import
func (rt *Router) builtinMetricsAdd(c *gin.Context) {
var lst []models.BuiltinMetric
ginx.BindJSON(c, &lst)
username := Username(c)
count := len(lst)
if count == 0 {
ginx.Bomb(http.StatusBadRequest, "input json is empty")
}
lang := c.GetHeader("X-Language")
if lang == "" {
lang = "zh_CN"
}
reterr := make(map[string]string)
for i := 0; i < count; i++ {
lst[i].Lang = lang
lst[i].UUID = time.Now().UnixNano()
if err := lst[i].Add(rt.Ctx, username); err != nil {
reterr[lst[i].Name] = i18n.Sprintf(c.GetHeader("X-Language"), err.Error())
}
}
ginx.NewRender(c).Data(reterr, nil)
}
func (rt *Router) builtinMetricsGets(c *gin.Context) {
collector := ginx.QueryStr(c, "collector", "")
typ := ginx.QueryStr(c, "typ", "")
query := ginx.QueryStr(c, "query", "")
limit := ginx.QueryInt(c, "limit", 20)
lang := c.GetHeader("X-Language")
unit := ginx.QueryStr(c, "unit", "")
if lang == "" {
lang = "zh_CN"
}
bm, err := models.BuiltinMetricGets(rt.Ctx, lang, collector, typ, query, unit, limit, ginx.Offset(c, limit))
ginx.Dangerous(err)
total, err := models.BuiltinMetricCount(rt.Ctx, lang, collector, typ, query, unit)
ginx.Dangerous(err)
ginx.NewRender(c).Data(gin.H{
"list": bm,
"total": total,
}, nil)
}
func (rt *Router) builtinMetricsPut(c *gin.Context) {
var req models.BuiltinMetric
ginx.BindJSON(c, &req)
bm, err := models.BuiltinMetricGet(rt.Ctx, "id = ?", req.ID)
ginx.Dangerous(err)
if bm == nil {
ginx.NewRender(c, http.StatusNotFound).Message("No such builtin metric")
return
}
username := Username(c)
req.UpdatedBy = username
ginx.NewRender(c).Message(bm.Update(rt.Ctx, req))
}
func (rt *Router) builtinMetricsDel(c *gin.Context) {
var req idsForm
ginx.BindJSON(c, &req)
req.Verify()
ginx.NewRender(c).Message(models.BuiltinMetricDels(rt.Ctx, req.Ids))
}
func (rt *Router) builtinMetricsDefaultTypes(c *gin.Context) {
lst := []string{
"Linux",
"cAdvisor",
"Ping",
"MySQL",
"Redis",
"Kafka",
"Elasticsearch",
"PostgreSQL",
"MongoDB",
"Memcached",
}
ginx.NewRender(c).Data(lst, nil)
}
func (rt *Router) builtinMetricsTypes(c *gin.Context) {
collector := ginx.QueryStr(c, "collector", "")
query := ginx.QueryStr(c, "query", "")
lang := c.GetHeader("X-Language")
ginx.NewRender(c).Data(models.BuiltinMetricTypes(rt.Ctx, lang, collector, query))
}
func (rt *Router) builtinMetricsCollectors(c *gin.Context) {
typ := ginx.QueryStr(c, "typ", "")
query := ginx.QueryStr(c, "query", "")
lang := c.GetHeader("X-Language")
ginx.NewRender(c).Data(models.BuiltinMetricCollectors(rt.Ctx, lang, typ, query))
}

View File

@@ -0,0 +1,247 @@
package router
import (
"encoding/json"
"net/http"
"strings"
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
)
type Board struct {
Name string `json:"name"`
Tags string `json:"tags"`
Configs interface{} `json:"configs"`
UUID int64 `json:"uuid"`
}
func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
var lst []models.BuiltinPayload
ginx.BindJSON(c, &lst)
username := Username(c)
count := len(lst)
if count == 0 {
ginx.Bomb(http.StatusBadRequest, "input json is empty")
}
reterr := make(map[string]string)
for i := 0; i < count; i++ {
if lst[i].Type == "alert" {
if strings.HasPrefix(strings.TrimSpace(lst[i].Content), "[") {
// 处理多个告警规则模板的情况
alertRules := []models.AlertRule{}
if err := json.Unmarshal([]byte(lst[i].Content), &alertRules); err != nil {
reterr[lst[i].Name] = err.Error()
}
for _, rule := range alertRules {
if rule.UUID == 0 {
rule.UUID = time.Now().UnixNano()
}
contentBytes, err := json.Marshal(rule)
if err != nil {
reterr[rule.Name] = err.Error()
continue
}
bp := models.BuiltinPayload{
Type: lst[i].Type,
Component: lst[i].Component,
Cate: lst[i].Cate,
Name: rule.Name,
Tags: rule.AppendTags,
UUID: rule.UUID,
Content: string(contentBytes),
CreatedBy: username,
UpdatedBy: username,
}
if err := bp.Add(rt.Ctx, username); err != nil {
reterr[bp.Name] = i18n.Sprintf(c.GetHeader("X-Language"), err.Error())
}
}
continue
}
alertRule := models.AlertRule{}
if err := json.Unmarshal([]byte(lst[i].Content), &alertRule); err != nil {
reterr[lst[i].Name] = err.Error()
continue
}
if alertRule.UUID == 0 {
alertRule.UUID = time.Now().UnixNano()
}
bp := models.BuiltinPayload{
Type: lst[i].Type,
Component: lst[i].Component,
Cate: lst[i].Cate,
Name: alertRule.Name,
Tags: alertRule.AppendTags,
UUID: alertRule.UUID,
Content: lst[i].Content,
CreatedBy: username,
UpdatedBy: username,
}
if err := bp.Add(rt.Ctx, username); err != nil {
reterr[bp.Name] = i18n.Sprintf(c.GetHeader("X-Language"), err.Error())
}
} else if lst[i].Type == "dashboard" {
if strings.HasPrefix(strings.TrimSpace(lst[i].Content), "[") {
// 处理多个告警规则模板的情况
dashboards := []Board{}
if err := json.Unmarshal([]byte(lst[i].Content), &dashboards); err != nil {
reterr[lst[i].Name] = err.Error()
}
for _, dashboard := range dashboards {
if dashboard.UUID == 0 {
dashboard.UUID = time.Now().UnixNano()
}
contentBytes, err := json.Marshal(dashboard)
if err != nil {
reterr[dashboard.Name] = err.Error()
continue
}
bp := models.BuiltinPayload{
Type: lst[i].Type,
Component: lst[i].Component,
Cate: lst[i].Cate,
Name: dashboard.Name,
Tags: dashboard.Tags,
UUID: dashboard.UUID,
Content: string(contentBytes),
CreatedBy: username,
UpdatedBy: username,
}
if err := bp.Add(rt.Ctx, username); err != nil {
reterr[bp.Name] = i18n.Sprintf(c.GetHeader("X-Language"), err.Error())
}
}
continue
}
dashboard := Board{}
if err := json.Unmarshal([]byte(lst[i].Content), &dashboard); err != nil {
reterr[lst[i].Name] = i18n.Sprintf(c.GetHeader("X-Language"), err.Error())
continue
}
if dashboard.UUID == 0 {
dashboard.UUID = time.Now().UnixNano()
}
bp := models.BuiltinPayload{
Type: lst[i].Type,
Component: lst[i].Component,
Cate: lst[i].Cate,
Name: dashboard.Name,
Tags: dashboard.Tags,
UUID: dashboard.UUID,
Content: lst[i].Content,
CreatedBy: username,
UpdatedBy: username,
}
if err := bp.Add(rt.Ctx, username); err != nil {
reterr[bp.Name] = i18n.Sprintf(c.GetHeader("X-Language"), err.Error())
}
} else {
if err := lst[i].Add(rt.Ctx, username); err != nil {
reterr[lst[i].Name] = i18n.Sprintf(c.GetHeader("X-Language"), err.Error())
}
}
}
ginx.NewRender(c).Data(reterr, nil)
}
func (rt *Router) builtinPayloadsGets(c *gin.Context) {
typ := ginx.QueryStr(c, "type", "")
component := ginx.QueryStr(c, "component", "")
cate := ginx.QueryStr(c, "cate", "")
query := ginx.QueryStr(c, "query", "")
lst, err := models.BuiltinPayloadGets(rt.Ctx, typ, component, cate, query)
ginx.NewRender(c).Data(lst, err)
}
func (rt *Router) builtinPayloadcatesGet(c *gin.Context) {
typ := ginx.QueryStr(c, "type", "")
component := ginx.QueryStr(c, "component", "")
cates, err := models.BuiltinPayloadCates(rt.Ctx, typ, component)
ginx.NewRender(c).Data(cates, err)
}
func (rt *Router) builtinPayloadGet(c *gin.Context) {
id := ginx.UrlParamInt64(c, "id")
bp, err := models.BuiltinPayloadGet(rt.Ctx, "id = ?", id)
if err != nil {
ginx.Bomb(http.StatusInternalServerError, err.Error())
}
if bp == nil {
ginx.Bomb(http.StatusNotFound, "builtin payload not found")
}
ginx.NewRender(c).Data(bp, nil)
}
func (rt *Router) builtinPayloadsPut(c *gin.Context) {
var req models.BuiltinPayload
ginx.BindJSON(c, &req)
bp, err := models.BuiltinPayloadGet(rt.Ctx, "id = ?", req.ID)
ginx.Dangerous(err)
if bp == nil {
ginx.NewRender(c, http.StatusNotFound).Message("No such builtin payload")
return
}
if req.Type == "alert" {
alertRule := models.AlertRule{}
if err := json.Unmarshal([]byte(req.Content), &alertRule); err != nil {
ginx.Bomb(http.StatusBadRequest, err.Error())
}
req.Name = alertRule.Name
req.Tags = alertRule.AppendTags
} else if req.Type == "dashboard" {
dashboard := Board{}
if err := json.Unmarshal([]byte(req.Content), &dashboard); err != nil {
ginx.Bomb(http.StatusBadRequest, err.Error())
}
req.Name = dashboard.Name
req.Tags = dashboard.Tags
}
username := Username(c)
req.UpdatedBy = username
ginx.NewRender(c).Message(bp.Update(rt.Ctx, req))
}
func (rt *Router) builtinPayloadsDel(c *gin.Context) {
var req idsForm
ginx.BindJSON(c, &req)
req.Verify()
ginx.NewRender(c).Message(models.BuiltinPayloadDels(rt.Ctx, req.Ids))
}

View File

@@ -65,7 +65,7 @@ func (rt *Router) generateCaptcha(c *gin.Context) {
var driver = captcha.NewDriverMath(60, 200, 0, captcha.OptionShowHollowLine, nil, nil, []string{"wqy-microhei.ttc"})
cc := captcha.NewCaptcha(driver, rt.newCaptchaRedisStore())
//data:image/png;base64
id, b64s, err := cc.Generate()
id, b64s, _, err := cc.Generate()
if err != nil {
ginx.NewRender(c).Message(err)

View File

@@ -62,3 +62,8 @@ func (rt *Router) contactKeysGets(c *gin.Context) {
ginx.NewRender(c).Data(labelAndKeys, nil)
}
func (rt *Router) siteInfo(c *gin.Context) {
config, err := models.ConfigsGet(rt.Ctx, "site_info")
ginx.NewRender(c).Data(config, err)
}

View File

@@ -26,7 +26,7 @@ type listReq struct {
}
func (rt *Router) datasourceList(c *gin.Context) {
if rt.DatasourceCheckHook(c) {
if rt.DatasourceCache.DatasourceCheckHook(c) {
Render(c, []int{}, nil)
return
}
@@ -38,8 +38,10 @@ func (rt *Router) datasourceList(c *gin.Context) {
category := req.Category
name := req.Name
user := c.MustGet("user").(*models.User)
list, err := models.GetDatasourcesGetsBy(rt.Ctx, typ, category, name, "")
Render(c, list, err)
Render(c, rt.DatasourceCache.DatasourceFilter(list, user), err)
}
func (rt *Router) datasourceGetsByService(c *gin.Context) {
@@ -48,30 +50,36 @@ func (rt *Router) datasourceGetsByService(c *gin.Context) {
ginx.NewRender(c).Data(lst, err)
}
type datasourceBrief struct {
Id int64 `json:"id"`
Name string `json:"name"`
PluginType string `json:"plugin_type"`
}
func (rt *Router) datasourceBriefs(c *gin.Context) {
var dss []datasourceBrief
var dss []*models.Datasource
list, err := models.GetDatasourcesGetsBy(rt.Ctx, "", "", "", "")
ginx.Dangerous(err)
for i := range list {
dss = append(dss, datasourceBrief{
Id: list[i].Id,
Name: list[i].Name,
PluginType: list[i].PluginType,
})
for _, item := range list {
item.AuthJson.BasicAuthPassword = ""
if item.PluginType != models.PROMETHEUS {
item.SettingsJson = nil
} else {
for k, v := range item.SettingsJson {
if strings.HasPrefix(k, "prometheus.") {
item.SettingsJson[strings.TrimPrefix(k, "prometheus.")] = v
delete(item.SettingsJson, k)
}
}
}
dss = append(dss, item)
}
if !rt.Center.AnonymousAccess.PromQuerier {
user := c.MustGet("user").(*models.User)
dss = rt.DatasourceCache.DatasourceFilter(dss, user)
}
ginx.NewRender(c).Data(dss, err)
}
func (rt *Router) datasourceUpsert(c *gin.Context) {
if rt.DatasourceCheckHook(c) {
if rt.DatasourceCache.DatasourceCheckHook(c) {
Render(c, []int{}, nil)
return
}
@@ -105,7 +113,7 @@ func (rt *Router) datasourceUpsert(c *gin.Context) {
}
err = req.Add(rt.Ctx)
} else {
err = req.Update(rt.Ctx, "name", "description", "cluster_name", "settings", "http", "auth", "updated_by", "updated_at")
err = req.Update(rt.Ctx, "name", "description", "cluster_name", "settings", "http", "auth", "updated_by", "updated_at", "is_default")
}
Render(c, nil, err)
@@ -196,7 +204,7 @@ func DatasourceCheck(ds models.Datasource) error {
}
func (rt *Router) datasourceGet(c *gin.Context) {
if rt.DatasourceCheckHook(c) {
if rt.DatasourceCache.DatasourceCheckHook(c) {
Render(c, []int{}, nil)
return
}
@@ -208,7 +216,7 @@ func (rt *Router) datasourceGet(c *gin.Context) {
}
func (rt *Router) datasourceUpdataStatus(c *gin.Context) {
if rt.DatasourceCheckHook(c) {
if rt.DatasourceCache.DatasourceCheckHook(c) {
Render(c, []int{}, nil)
return
}
@@ -222,7 +230,7 @@ func (rt *Router) datasourceUpdataStatus(c *gin.Context) {
}
func (rt *Router) datasourceDel(c *gin.Context) {
if rt.DatasourceCheckHook(c) {
if rt.DatasourceCache.DatasourceCheckHook(c) {
Render(c, []int{}, nil)
return
}
@@ -239,8 +247,3 @@ func (rt *Router) getDatasourceIds(c *gin.Context) {
ginx.NewRender(c).Data(datasourceIds, err)
}
func Username(c *gin.Context) string {
return c.MustGet("username").(string)
}

View File

@@ -1,17 +1,14 @@
package router
import (
"fmt"
"net/http"
"strconv"
"strings"
"github.com/ccfos/nightingale/v6/alert/aconf"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/ibex"
"github.com/gin-gonic/gin"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
@@ -69,7 +66,8 @@ func queryDatasourceIds(c *gin.Context) []int64 {
}
type idsForm struct {
Ids []int64 `json:"ids"`
Ids []int64 `json:"ids"`
IsSyncToFlashDuty bool `json:"is_sync_to_flashduty"`
}
func (f idsForm) Verify() {
@@ -134,27 +132,11 @@ type TaskCreateReply struct {
Dat int64 `json:"dat"` // task.id
}
// return task.id, error
func TaskCreate(v interface{}, ibexc aconf.Ibex) (int64, error) {
var res TaskCreateReply
err := ibex.New(
ibexc.Address,
ibexc.BasicAuthUser,
ibexc.BasicAuthPass,
ibexc.Timeout,
).
Path("/ibex/v1/tasks").
In(v).
Out(&res).
POST()
if err != nil {
return 0, err
func Username(c *gin.Context) string {
username := c.GetString(gin.AuthUserKey)
if username == "" {
user := c.MustGet("user").(*models.User)
username = user.Username
}
if res.Err != "" {
return 0, fmt.Errorf("response.err: %v", res.Err)
}
return res.Dat, nil
return username
}

View File

@@ -47,6 +47,10 @@ func (rt *Router) heartbeat(c *gin.Context) {
req.RemoteAddr = c.ClientIP()
}
if req.EngineName == "" {
req.EngineName = rt.Alert.Heartbeat.EngineName
}
rt.MetaSet.Set(req.Hostname, req)
var items = make(map[string]struct{})
items[req.Hostname] = struct{}{}
@@ -77,6 +81,10 @@ func (rt *Router) heartbeat(c *gin.Context) {
}
}
if req.EngineName != "" && req.EngineName != target.EngineName {
filed["engine_name"] = req.EngineName
}
if len(filed) > 0 {
err := target.UpdateFieldsMap(rt.Ctx, filed)
if err != nil {

View File

@@ -6,7 +6,6 @@ import (
"net/http"
"strconv"
"strings"
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/cas"
@@ -14,10 +13,10 @@ import (
"github.com/ccfos/nightingale/v6/pkg/oauth2x"
"github.com/ccfos/nightingale/v6/pkg/oidcx"
"github.com/ccfos/nightingale/v6/pkg/secu"
"github.com/pelletier/go-toml/v2"
"github.com/dgrijalva/jwt-go"
"github.com/gin-gonic/gin"
"github.com/pelletier/go-toml/v2"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
@@ -51,22 +50,26 @@ func (rt *Router) loginPost(c *gin.Context) {
}
authPassWord = decPassWord
}
user, err := models.PassLogin(rt.Ctx, f.Username, authPassWord)
if err != nil {
// pass validate fail, try ldap
if rt.Sso.LDAP.Enable {
roles := strings.Join(rt.Sso.LDAP.DefaultRoles, " ")
user, err = models.LdapLogin(rt.Ctx, f.Username, authPassWord, roles, rt.Sso.LDAP)
if err != nil {
logger.Debugf("ldap login failed: %v username: %s", err, f.Username)
ginx.NewRender(c).Message(err)
var user *models.User
var err error
lc := rt.Sso.LDAP.Copy()
if lc.Enable {
user, err = ldapx.LdapLogin(rt.Ctx, f.Username, authPassWord, lc.DefaultRoles, lc)
if err != nil {
logger.Debugf("ldap login failed: %v username: %s", err, f.Username)
var errLoginInN9e error
// to use n9e as the minimum guarantee for login
if user, errLoginInN9e = models.PassLogin(rt.Ctx, f.Username, authPassWord); errLoginInN9e != nil {
ginx.NewRender(c).Message("ldap login failed: %v; n9e login failed: %v", err, errLoginInN9e)
return
}
user.RolesLst = strings.Fields(user.Roles)
} else {
ginx.NewRender(c).Message(err)
return
user.RolesLst = strings.Fields(user.Roles)
}
} else {
user, err = models.PassLogin(rt.Ctx, f.Username, authPassWord)
ginx.Dangerous(err)
}
if user == nil {
@@ -89,7 +92,7 @@ func (rt *Router) loginPost(c *gin.Context) {
}
func (rt *Router) logoutPost(c *gin.Context) {
logger.Infof("username:%s login from:%s", c.GetString("username"), c.ClientIP())
logger.Infof("username:%s logout from:%s", c.GetString("username"), c.ClientIP())
metadata, err := rt.extractTokenMetadata(c.Request)
if err != nil {
ginx.NewRender(c, http.StatusBadRequest).Message("failed to parse jwt token")
@@ -102,7 +105,18 @@ func (rt *Router) logoutPost(c *gin.Context) {
return
}
ginx.NewRender(c).Message("")
var logoutAddr string
user := c.MustGet("user").(*models.User)
switch user.Belong {
case "oidc":
logoutAddr = rt.Sso.OIDC.GetSsoLogoutAddr()
case "cas":
logoutAddr = rt.Sso.CAS.GetSsoLogoutAddr()
case "oauth2":
logoutAddr = rt.Sso.OAuth2.GetSsoLogoutAddr()
}
ginx.NewRender(c).Data(logoutAddr, nil)
}
type refreshForm struct {
@@ -240,41 +254,23 @@ func (rt *Router) loginCallback(c *gin.Context) {
if user != nil {
if rt.Sso.OIDC.CoverAttributes {
if ret.Nickname != "" {
user.Nickname = ret.Nickname
}
if ret.Email != "" {
user.Email = ret.Email
}
if ret.Phone != "" {
user.Phone = ret.Phone
}
user.UpdateAt = time.Now().Unix()
user.Update(rt.Ctx, "email", "nickname", "phone", "update_at")
updatedFields := user.UpdateSsoFields("oidc", ret.Nickname, ret.Phone, ret.Email)
ginx.Dangerous(user.Update(rt.Ctx, "update_at", updatedFields...))
}
} else {
now := time.Now().Unix()
user = &models.User{
Username: ret.Username,
Password: "******",
Nickname: ret.Nickname,
Phone: ret.Phone,
Email: ret.Email,
Portrait: "",
Roles: strings.Join(rt.Sso.OIDC.DefaultRoles, " "),
RolesLst: rt.Sso.OIDC.DefaultRoles,
Contacts: []byte("{}"),
CreateAt: now,
UpdateAt: now,
CreateBy: "oidc",
UpdateBy: "oidc",
}
user = new(models.User)
user.FullSsoFields("oidc", ret.Username, ret.Nickname, ret.Phone, ret.Email, rt.Sso.OIDC.DefaultRoles)
// create user from oidc
ginx.Dangerous(user.Add(rt.Ctx))
if len(rt.Sso.OIDC.DefaultTeams) > 0 {
for _, gid := range rt.Sso.OIDC.DefaultTeams {
err = models.UserGroupMemberAdd(rt.Ctx, gid, user.Id)
if err != nil {
logger.Errorf("user:%v UserGroupMemberAdd: %s", user, err)
}
}
}
}
// set user login state
@@ -350,38 +346,12 @@ func (rt *Router) loginCallbackCas(c *gin.Context) {
ginx.Dangerous(err)
if user != nil {
if rt.Sso.CAS.CoverAttributes {
if ret.Nickname != "" {
user.Nickname = ret.Nickname
}
if ret.Email != "" {
user.Email = ret.Email
}
if ret.Phone != "" {
user.Phone = ret.Phone
}
user.UpdateAt = time.Now().Unix()
ginx.Dangerous(user.Update(rt.Ctx, "email", "nickname", "phone", "update_at"))
updatedFields := user.UpdateSsoFields("cas", ret.Nickname, ret.Phone, ret.Email)
ginx.Dangerous(user.Update(rt.Ctx, "update_at", updatedFields...))
}
} else {
now := time.Now().Unix()
user = &models.User{
Username: ret.Username,
Password: "******",
Nickname: ret.Nickname,
Portrait: "",
Roles: strings.Join(rt.Sso.CAS.DefaultRoles, " "),
RolesLst: rt.Sso.CAS.DefaultRoles,
Contacts: []byte("{}"),
Phone: ret.Phone,
Email: ret.Email,
CreateAt: now,
UpdateAt: now,
CreateBy: "CAS",
UpdateBy: "CAS",
}
user = new(models.User)
user.FullSsoFields("cas", ret.Username, ret.Nickname, ret.Phone, ret.Email, rt.Sso.CAS.DefaultRoles)
// create user from cas
ginx.Dangerous(user.Add(rt.Ctx))
}
@@ -452,39 +422,12 @@ func (rt *Router) loginCallbackOAuth(c *gin.Context) {
if user != nil {
if rt.Sso.OAuth2.CoverAttributes {
if ret.Nickname != "" {
user.Nickname = ret.Nickname
}
if ret.Email != "" {
user.Email = ret.Email
}
if ret.Phone != "" {
user.Phone = ret.Phone
}
user.UpdateAt = time.Now().Unix()
user.Update(rt.Ctx, "email", "nickname", "phone", "update_at")
updatedFields := user.UpdateSsoFields("oauth2", ret.Nickname, ret.Phone, ret.Email)
ginx.Dangerous(user.Update(rt.Ctx, "update_at", updatedFields...))
}
} else {
now := time.Now().Unix()
user = &models.User{
Username: ret.Username,
Password: "******",
Nickname: ret.Nickname,
Phone: ret.Phone,
Email: ret.Email,
Portrait: "",
Roles: strings.Join(rt.Sso.OAuth2.DefaultRoles, " "),
RolesLst: rt.Sso.OAuth2.DefaultRoles,
Contacts: []byte("{}"),
CreateAt: now,
UpdateAt: now,
CreateBy: "oauth2",
UpdateBy: "oauth2",
}
user = new(models.User)
user.FullSsoFields("oauth2", ret.Username, ret.Nickname, ret.Phone, ret.Email, rt.Sso.OAuth2.DefaultRoles)
// create user from oidc
ginx.Dangerous(user.Add(rt.Ctx))
}

View File

@@ -10,6 +10,7 @@ import (
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/str"
)
// Return all, front-end search and paging
@@ -20,6 +21,31 @@ func (rt *Router) alertMuteGetsByBG(c *gin.Context) {
ginx.NewRender(c).Data(lst, err)
}
func (rt *Router) alertMuteGetsByGids(c *gin.Context) {
gids := str.IdsInt64(ginx.QueryStr(c, "gids", ""), ",")
if len(gids) > 0 {
for _, gid := range gids {
rt.bgroCheck(c, gid)
}
} else {
me := c.MustGet("user").(*models.User)
if !me.IsAdmin() {
var err error
gids, err = models.MyBusiGroupIds(rt.Ctx, me.Id)
ginx.Dangerous(err)
if len(gids) == 0 {
ginx.NewRender(c).Data([]int{}, nil)
return
}
}
}
lst, err := models.AlertMuteGetsByBGIds(rt.Ctx, gids)
ginx.NewRender(c).Data(lst, err)
}
func (rt *Router) alertMuteGets(c *gin.Context) {
prods := strings.Fields(ginx.QueryStr(c, "prods", ""))
bgid := ginx.QueryInt64(c, "bgid", -1)
@@ -80,6 +106,14 @@ func (rt *Router) alertMuteDel(c *gin.Context) {
ginx.NewRender(c).Message(models.AlertMuteDel(rt.Ctx, f.Ids))
}
// alertMuteGet returns the alert mute by ID
func (rt *Router) alertMuteGet(c *gin.Context) {
amid := ginx.UrlParamInt64(c, "amid")
am, err := models.AlertMuteGetById(rt.Ctx, amid)
am.DB2FE()
ginx.NewRender(c).Data(am, err)
}
func (rt *Router) alertMutePutByFE(c *gin.Context) {
var f models.AlertMute
ginx.BindJSON(c, &f)

View File

@@ -92,6 +92,10 @@ func (rt *Router) jwtAuth() gin.HandlerFunc {
}
}
func (rt *Router) Auth() gin.HandlerFunc {
return rt.auth()
}
func (rt *Router) auth() gin.HandlerFunc {
if rt.HTTP.ProxyAuth.Enable {
return rt.proxyAuth()
@@ -120,6 +124,10 @@ func (rt *Router) jwtMock() gin.HandlerFunc {
}
}
func (rt *Router) User() gin.HandlerFunc {
return rt.user()
}
func (rt *Router) user() gin.HandlerFunc {
return func(c *gin.Context) {
userid := c.MustGet("userid").(int64)
@@ -135,6 +143,8 @@ func (rt *Router) user() gin.HandlerFunc {
c.Set("user", user)
c.Set("isadmin", user.IsAdmin())
// Update user.LastActiveTime
rt.UserCache.SetLastActiveTime(user.Id, time.Now().Unix())
c.Next()
}
}
@@ -174,6 +184,10 @@ func (rt *Router) bgro() gin.HandlerFunc {
}
// bgrw 逐步要被干掉,不安全
func (rt *Router) Bgrw() gin.HandlerFunc {
return rt.bgrw()
}
func (rt *Router) bgrw() gin.HandlerFunc {
return func(c *gin.Context) {
me := c.MustGet("user").(*models.User)
@@ -233,6 +247,10 @@ func (rt *Router) bgroCheck(c *gin.Context, bgid int64) {
c.Set("busi_group", bg)
}
func (rt *Router) Perm(operation string) gin.HandlerFunc {
return rt.perm(operation)
}
func (rt *Router) perm(operation string) gin.HandlerFunc {
return func(c *gin.Context) {
me := c.MustGet("user").(*models.User)

View File

@@ -163,16 +163,12 @@ func (rt *Router) notifyConfigPut(c *gin.Context) {
var f models.Configs
ginx.BindJSON(c, &f)
userVariableMap := rt.NotifyConfigCache.ConfigCache.Get()
text := tplx.ReplaceMacroVariables(f.Ckey, f.Cval, userVariableMap)
text := tplx.ReplaceTemplateUseText(f.Ckey, f.Cval, userVariableMap)
switch f.Ckey {
case models.SMTP:
var smtp aconf.SMTPConfig
err := toml.Unmarshal([]byte(text), &smtp)
ginx.Dangerous(err)
case models.IBEX:
var ibex aconf.Ibex
err := toml.Unmarshal([]byte(f.Cval), &ibex)
ginx.Dangerous(err)
default:
ginx.Bomb(200, "key %s can not modify", f.Ckey)
}
@@ -221,7 +217,7 @@ func (rt *Router) attemptSendEmail(c *gin.Context) {
ginx.Bomb(200, "config(%v) invalid", f)
}
userVariableMap := rt.NotifyConfigCache.ConfigCache.Get()
text := tplx.ReplaceMacroVariables(f.Ckey, f.Cval, userVariableMap)
text := tplx.ReplaceTemplateUseText(f.Ckey, f.Cval, userVariableMap)
smtp, err := SmtpValidate(text)
ginx.Dangerous(err)

View File

@@ -6,6 +6,7 @@ import (
"fmt"
"html/template"
"strings"
"time"
"github.com/ccfos/nightingale/v6/center/cconf"
"github.com/ccfos/nightingale/v6/models"
@@ -34,10 +35,22 @@ func (rt *Router) notifyTplGets(c *gin.Context) {
}
func (rt *Router) notifyTplUpdateContent(c *gin.Context) {
user := c.MustGet("user").(*models.User)
var f models.NotifyTpl
ginx.BindJSON(c, &f)
ginx.Dangerous(templateValidate(f))
notifyTpl, err := models.NotifyTplGet(rt.Ctx, f.Id)
ginx.Dangerous(err)
if notifyTpl.CreateBy != user.Username && !user.IsAdmin() {
ginx.Bomb(403, "no permission")
}
f.UpdateAt = time.Now().Unix()
f.UpdateBy = user.Username
ginx.NewRender(c).Message(f.UpdateContent(rt.Ctx))
}
@@ -45,8 +58,27 @@ func (rt *Router) notifyTplUpdate(c *gin.Context) {
var f models.NotifyTpl
ginx.BindJSON(c, &f)
ginx.Dangerous(templateValidate(f))
user := c.MustGet("user").(*models.User)
ginx.NewRender(c).Message(f.Update(rt.Ctx))
notifyTpl, err := models.NotifyTplGet(rt.Ctx, f.Id)
ginx.Dangerous(err)
if notifyTpl.CreateBy != user.Username && !user.IsAdmin() {
ginx.Bomb(403, "no permission")
}
// get the count of the same channel and name but different id
count, err := models.Count(models.DB(rt.Ctx).Model(&models.NotifyTpl{}).Where("channel = ? or name = ? and id <> ?", f.Channel, f.Name, f.Id))
ginx.Dangerous(err)
if count != 0 {
ginx.Bomb(200, "Refuse to create duplicate channel or name")
}
notifyTpl.UpdateAt = time.Now().Unix()
notifyTpl.UpdateBy = user.Username
notifyTpl.Name = f.Name
ginx.NewRender(c).Message(notifyTpl.Update(rt.Ctx))
}
func templateValidate(f models.NotifyTpl) error {
@@ -132,7 +164,7 @@ func (rt *Router) notifyTplAdd(c *gin.Context) {
f.Channel = strings.TrimSpace(f.Channel)
ginx.Dangerous(templateValidate(f))
count, err := models.NotifyTplCountByChannel(rt.Ctx, f.Channel)
count, err := models.Count(models.DB(rt.Ctx).Model(&models.NotifyTpl{}).Where("channel = ? or name = ?", f.Channel, f.Name))
ginx.Dangerous(err)
if count != 0 {
ginx.Bomb(200, "Refuse to create duplicate channel(unique)")
@@ -144,5 +176,14 @@ func (rt *Router) notifyTplAdd(c *gin.Context) {
func (rt *Router) notifyTplDel(c *gin.Context) {
f := new(models.NotifyTpl)
id := ginx.UrlParamInt64(c, "id")
user := c.MustGet("user").(*models.User)
notifyTpl, err := models.NotifyTplGet(rt.Ctx, id)
ginx.Dangerous(err)
if notifyTpl.CreateBy != user.Username && !user.IsAdmin() {
ginx.Bomb(403, "no permission")
}
ginx.NewRender(c).Message(f.NotifyTplDelete(rt.Ctx, id))
}

View File

@@ -13,34 +13,39 @@ import (
"time"
pkgprom "github.com/ccfos/nightingale/v6/pkg/prom"
"github.com/ccfos/nightingale/v6/prom"
"github.com/gin-gonic/gin"
"github.com/prometheus/common/model"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
type queryFormItem struct {
type QueryFormItem struct {
Start int64 `json:"start" binding:"required"`
End int64 `json:"end" binding:"required"`
Step int64 `json:"step" binding:"required"`
Query string `json:"query" binding:"required"`
}
type batchQueryForm struct {
type BatchQueryForm struct {
DatasourceId int64 `json:"datasource_id" binding:"required"`
Queries []queryFormItem `json:"queries" binding:"required"`
Queries []QueryFormItem `json:"queries" binding:"required"`
}
func (rt *Router) promBatchQueryRange(c *gin.Context) {
var f batchQueryForm
var f BatchQueryForm
ginx.Dangerous(c.BindJSON(&f))
lst, err := PromBatchQueryRange(rt.PromClients, f)
ginx.NewRender(c).Data(lst, err)
}
func PromBatchQueryRange(pc *prom.PromClientMap, f BatchQueryForm) ([]model.Value, error) {
var lst []model.Value
cli := rt.PromClients.GetCli(f.DatasourceId)
cli := pc.GetCli(f.DatasourceId)
if cli == nil {
logger.Warningf("no such datasource id: %d", f.DatasourceId)
ginx.NewRender(c).Data(lst, nil)
return
return lst, fmt.Errorf("no such datasource id: %d", f.DatasourceId)
}
for _, item := range f.Queries {
@@ -51,15 +56,16 @@ func (rt *Router) promBatchQueryRange(c *gin.Context) {
}
resp, _, err := cli.QueryRange(context.Background(), item.Query, r)
ginx.Dangerous(err)
if err != nil {
return lst, err
}
lst = append(lst, resp)
}
ginx.NewRender(c).Data(lst, nil)
return lst, nil
}
type batchInstantForm struct {
type BatchInstantForm struct {
DatasourceId int64 `json:"datasource_id" binding:"required"`
Queries []InstantFormItem `json:"queries" binding:"required"`
}
@@ -70,26 +76,31 @@ type InstantFormItem struct {
}
func (rt *Router) promBatchQueryInstant(c *gin.Context) {
var f batchInstantForm
var f BatchInstantForm
ginx.Dangerous(c.BindJSON(&f))
lst, err := PromBatchQueryInstant(rt.PromClients, f)
ginx.NewRender(c).Data(lst, err)
}
func PromBatchQueryInstant(pc *prom.PromClientMap, f BatchInstantForm) ([]model.Value, error) {
var lst []model.Value
cli := rt.PromClients.GetCli(f.DatasourceId)
cli := pc.GetCli(f.DatasourceId)
if cli == nil {
logger.Warningf("no such datasource id: %d", f.DatasourceId)
ginx.NewRender(c).Data(lst, nil)
return
return lst, fmt.Errorf("no such datasource id: %d", f.DatasourceId)
}
for _, item := range f.Queries {
resp, _, err := cli.Query(context.Background(), item.Query, time.Unix(item.Time, 0))
ginx.Dangerous(err)
if err != nil {
return lst, err
}
lst = append(lst, resp)
}
ginx.NewRender(c).Data(lst, nil)
return lst, nil
}
func (rt *Router) dsProxy(c *gin.Context) {
@@ -167,8 +178,10 @@ func (rt *Router) dsProxy(c *gin.Context) {
modifyResponse := func(r *http.Response) error {
if r.StatusCode == http.StatusUnauthorized {
logger.Warningf("proxy path:%s unauthorized access ", c.Request.URL.Path)
return fmt.Errorf("unauthorized access")
}
return nil
}
@@ -180,6 +193,7 @@ func (rt *Router) dsProxy(c *gin.Context) {
}
proxy.ServeHTTP(c.Writer, c.Request)
}
var (

View File

@@ -11,6 +11,7 @@ import (
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/str"
)
func (rt *Router) recordingRuleGets(c *gin.Context) {
@@ -19,6 +20,30 @@ func (rt *Router) recordingRuleGets(c *gin.Context) {
ginx.NewRender(c).Data(ars, err)
}
func (rt *Router) recordingRuleGetsByGids(c *gin.Context) {
gids := str.IdsInt64(ginx.QueryStr(c, "gids", ""), ",")
if len(gids) > 0 {
for _, gid := range gids {
rt.bgroCheck(c, gid)
}
} else {
me := c.MustGet("user").(*models.User)
if !me.IsAdmin() {
var err error
gids, err = models.MyBusiGroupIds(rt.Ctx, me.Id)
ginx.Dangerous(err)
if len(gids) == 0 {
ginx.NewRender(c).Data([]int{}, nil)
return
}
}
}
ars, err := models.RecordingRuleGetsByBGIds(rt.Ctx, gids)
ginx.NewRender(c).Data(ars, err)
}
func (rt *Router) recordingRuleGetsByService(c *gin.Context) {
ars, err := models.RecordingRuleEnabledGets(rt.Ctx)
ginx.NewRender(c).Data(ars, err)

View File

@@ -4,6 +4,7 @@ import (
"net/http"
"strings"
"github.com/ccfos/nightingale/v6/center/cconf"
"github.com/ccfos/nightingale/v6/models"
"github.com/gin-gonic/gin"
@@ -17,6 +18,15 @@ func (rt *Router) rolesGets(c *gin.Context) {
func (rt *Router) permsGets(c *gin.Context) {
user := c.MustGet("user").(*models.User)
if user.IsAdmin() {
var lst []string
for _, ops := range cconf.Operations.Ops {
lst = append(lst, ops.Ops...)
}
ginx.NewRender(c).Data(lst, nil)
return
}
lst, err := models.OperationsOfRole(rt.Ctx, strings.Fields(user.Roles))
ginx.NewRender(c).Data(lst, err)
}

View File

@@ -3,9 +3,11 @@ package router
import (
"net/http"
"github.com/ccfos/nightingale/v6/center/cconf"
"github.com/ccfos/nightingale/v6/models"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
)
func (rt *Router) operationOfRole(c *gin.Context) {
@@ -16,6 +18,15 @@ func (rt *Router) operationOfRole(c *gin.Context) {
ginx.Bomb(http.StatusOK, "role not found")
}
if role.Name == "Admin" {
var lst []string
for _, ops := range cconf.Operations.Ops {
lst = append(lst, ops.Ops...)
}
ginx.NewRender(c).Data(lst, nil)
return
}
ops, err := models.OperationsOfRole(rt.Ctx, []string{role.Name})
ginx.NewRender(c).Data(ops, err)
}
@@ -39,5 +50,11 @@ func (rt *Router) roleBindOperation(c *gin.Context) {
}
func (rt *Router) operations(c *gin.Context) {
ginx.NewRender(c).Data(rt.Operations.Ops, nil)
var ops []cconf.Ops
for _, v := range rt.Operations.Ops {
v.Cname = i18n.Sprintf(c.GetHeader("X-Language"), v.Cname)
ops = append(ops, v)
}
ginx.NewRender(c).Data(ops, nil)
}

View File

@@ -2,6 +2,7 @@ package router
import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/flashduty"
"github.com/ccfos/nightingale/v6/pkg/ormx"
"github.com/gin-gonic/gin"
@@ -29,6 +30,11 @@ func (rt *Router) selfProfilePut(c *gin.Context) {
ginx.BindJSON(c, &f)
user := c.MustGet("user").(*models.User)
oldInfo := models.User{
Username: user.Username,
Phone: user.Phone,
Email: user.Email,
}
user.Nickname = f.Nickname
user.Phone = f.Phone
user.Email = f.Email
@@ -36,6 +42,10 @@ func (rt *Router) selfProfilePut(c *gin.Context) {
user.Contacts = f.Contacts
user.UpdateBy = user.Username
if flashduty.NeedSyncUser(rt.Ctx) {
flashduty.UpdateUser(rt.Ctx, oldInfo, f.Email, f.Phone)
}
ginx.NewRender(c).Message(user.UpdateAllFields(rt.Ctx))
}

View File

@@ -27,7 +27,7 @@ func (rt *Router) serverHeartbeat(c *gin.Context) {
}
func (rt *Router) serversActive(c *gin.Context) {
datasourceId := ginx.QueryInt64(c, "dsid")
datasourceId := ginx.QueryInt64(c, "dsid", 0)
engineName := ginx.QueryStr(c, "engine_name", "")
if engineName != "" {
servers, err := models.AlertingEngineGetsInstances(rt.Ctx, "engine_cluster = ? and clock > ?", engineName, time.Now().Unix()-30)
@@ -35,6 +35,10 @@ func (rt *Router) serversActive(c *gin.Context) {
return
}
if datasourceId == 0 {
ginx.NewRender(c).Message("dsid is required")
return
}
servers, err := models.AlertingEngineGetsInstances(rt.Ctx, "datasource_id = ? and clock > ?", datasourceId, time.Now().Unix()-30)
ginx.NewRender(c).Data(servers, err)
}

View File

@@ -15,6 +15,7 @@ import (
"github.com/prometheus/common/model"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/str"
)
type TargetQuery struct {
@@ -42,29 +43,24 @@ func (rt *Router) targetGetsByHostFilter(c *gin.Context) {
}
func (rt *Router) targetGets(c *gin.Context) {
bgid := ginx.QueryInt64(c, "bgid", -1)
bgids := str.IdsInt64(ginx.QueryStr(c, "gids", ""), ",")
query := ginx.QueryStr(c, "query", "")
limit := ginx.QueryInt(c, "limit", 30)
downtime := ginx.QueryInt64(c, "downtime", 0)
dsIds := queryDatasourceIds(c)
var bgids []int64
var err error
if bgid == -1 {
if len(bgids) == 0 {
user := c.MustGet("user").(*models.User)
if !user.IsAdmin() {
// 如果是非 admin 用户,全部对象的情况,找到用户有权限的业务组
userGroupIds, err := models.MyGroupIds(rt.Ctx, user.Id)
ginx.Dangerous(err)
bgids, err = models.BusiGroupIds(rt.Ctx, userGroupIds)
var err error
bgids, err = models.MyBusiGroupIds(rt.Ctx, user.Id)
ginx.Dangerous(err)
// 将未分配业务组的对象也加入到列表中
bgids = append(bgids, 0)
}
} else {
bgids = append(bgids, bgid)
}
total, err := models.TargetTotal(rt.Ctx, bgids, dsIds, query, downtime)
@@ -123,6 +119,27 @@ func (rt *Router) targetGets(c *gin.Context) {
}, nil)
}
func (rt *Router) targetExtendInfoByIdent(c *gin.Context) {
ident := ginx.QueryStr(c, "ident", "")
key := models.WrapExtendIdent(ident)
vals := storage.MGet(context.Background(), rt.Redis, []string{key})
if len(vals) > 0 {
extInfo := string(vals[0])
if extInfo == "null" {
extInfo = ""
}
ginx.NewRender(c).Data(gin.H{
"extend_info": extInfo,
"ident": ident,
}, nil)
return
}
ginx.NewRender(c).Data(gin.H{
"extend_info": "",
"ident": ident,
}, nil)
}
func (rt *Router) targetGetsByService(c *gin.Context) {
lst, err := models.TargetGetsAll(rt.Ctx)
ginx.NewRender(c).Data(lst, err)
@@ -136,173 +153,242 @@ func (rt *Router) targetGetTags(c *gin.Context) {
}
type targetTagsForm struct {
Idents []string `json:"idents" binding:"required"`
Tags []string `json:"tags" binding:"required"`
Idents []string `json:"idents" binding:"required_without=HostIps"`
HostIps []string `json:"host_ips" binding:"required_without=Idents"`
Tags []string `json:"tags" binding:"required"`
}
func (rt *Router) targetBindTagsByFE(c *gin.Context) {
var f targetTagsForm
var err error
var failedResults = make(map[string]string)
ginx.BindJSON(c, &f)
if len(f.Idents) == 0 {
ginx.Bomb(http.StatusBadRequest, "idents empty")
if len(f.Idents) == 0 && len(f.HostIps) == 0 {
ginx.Bomb(http.StatusBadRequest, "idents or host_ips must be provided")
}
// Acquire idents by idents and hostIps
failedResults, f.Idents, err = models.TargetsGetIdentsByIdentsAndHostIps(rt.Ctx, f.Idents, f.HostIps)
if err != nil {
ginx.Bomb(http.StatusBadRequest, err.Error())
}
rt.checkTargetPerm(c, f.Idents)
ginx.NewRender(c).Message(rt.targetBindTags(f))
ginx.NewRender(c).Data(rt.targetBindTags(f, failedResults))
}
func (rt *Router) targetBindTagsByService(c *gin.Context) {
var f targetTagsForm
var err error
var failedResults = make(map[string]string)
ginx.BindJSON(c, &f)
if len(f.Idents) == 0 {
ginx.Bomb(http.StatusBadRequest, "idents empty")
if len(f.Idents) == 0 && len(f.HostIps) == 0 {
ginx.Bomb(http.StatusBadRequest, "idents or host_ips must be provided")
}
// Acquire idents by idents and hostIps
failedResults, f.Idents, err = models.TargetsGetIdentsByIdentsAndHostIps(rt.Ctx, f.Idents, f.HostIps)
if err != nil {
ginx.Bomb(http.StatusBadRequest, err.Error())
}
ginx.NewRender(c).Message(rt.targetBindTags(f))
ginx.NewRender(c).Data(rt.targetBindTags(f, failedResults))
}
func (rt *Router) targetBindTags(f targetTagsForm) error {
for i := 0; i < len(f.Tags); i++ {
arr := strings.Split(f.Tags[i], "=")
func (rt *Router) targetBindTags(f targetTagsForm, failedIdents map[string]string) (map[string]string, error) {
// 1. Check tags
if err := rt.validateTags(f.Tags); err != nil {
return nil, err
}
// 2. Acquire targets by idents
targets, err := models.TargetsGetByIdents(rt.Ctx, f.Idents)
if err != nil {
return nil, err
}
// 3. Add tags to targets
for _, target := range targets {
if err = rt.addTagsToTarget(target, f.Tags); err != nil {
failedIdents[target.Ident] = err.Error()
}
}
return failedIdents, nil
}
func (rt *Router) validateTags(tags []string) error {
for _, tag := range tags {
arr := strings.Split(tag, "=")
if len(arr) != 2 {
return fmt.Errorf("invalid tag(%s)", f.Tags[i])
return fmt.Errorf("invalid tag format: %s (expected format: key=value)", tag)
}
if strings.TrimSpace(arr[0]) == "" || strings.TrimSpace(arr[1]) == "" {
return fmt.Errorf("invalid tag(%s)", f.Tags[i])
key, value := strings.TrimSpace(arr[0]), strings.TrimSpace(arr[1])
if key == "" {
return fmt.Errorf("invalid tag: key is empty in tag %s", tag)
}
if value == "" {
return fmt.Errorf("invalid tag: value is empty in tag %s", tag)
}
if strings.IndexByte(arr[0], '.') != -1 {
return fmt.Errorf("invalid tagkey(%s): cannot contains . ", arr[0])
if strings.Contains(key, ".") {
return fmt.Errorf("invalid tag key: %s (key cannot contain '.')", key)
}
if strings.IndexByte(arr[0], '-') != -1 {
return fmt.Errorf("invalid tagkey(%s): cannot contains -", arr[0])
if strings.Contains(key, "-") {
return fmt.Errorf("invalid tag key: %s (key cannot contain '-')", key)
}
if !model.LabelNameRE.MatchString(arr[0]) {
return fmt.Errorf("invalid tagkey(%s)", arr[0])
if !model.LabelNameRE.MatchString(key) {
return fmt.Errorf("invalid tag key: %s "+
"(key must start with a letter or underscore, followed by letters, digits, or underscores)", key)
}
}
for i := 0; i < len(f.Idents); i++ {
target, err := models.TargetGetByIdent(rt.Ctx, f.Idents[i])
if err != nil {
return err
}
if target == nil {
continue
}
// 不能有同key的标签否则附到时序数据上会产生覆盖让人困惑
for j := 0; j < len(f.Tags); j++ {
tagkey := strings.Split(f.Tags[j], "=")[0]
tagkeyPrefix := tagkey + "="
if strings.HasPrefix(target.Tags, tagkeyPrefix) {
return fmt.Errorf("duplicate tagkey(%s)", tagkey)
}
}
err = target.AddTags(rt.Ctx, f.Tags)
if err != nil {
return err
}
}
return nil
}
func (rt *Router) addTagsToTarget(target *models.Target, tags []string) error {
for _, tag := range tags {
tagKey := strings.Split(tag, "=")[0]
if strings.Contains(target.Tags, tagKey+"=") {
return fmt.Errorf("duplicate tagkey(%s)", tagKey)
}
}
return target.AddTags(rt.Ctx, tags)
}
func (rt *Router) targetUnbindTagsByFE(c *gin.Context) {
var f targetTagsForm
var err error
var failedResults = make(map[string]string)
ginx.BindJSON(c, &f)
if len(f.Idents) == 0 {
ginx.Bomb(http.StatusBadRequest, "idents empty")
if len(f.Idents) == 0 && len(f.HostIps) == 0 {
ginx.Bomb(http.StatusBadRequest, "idents or host_ips must be provided")
}
// Acquire idents by idents and hostIps
failedResults, f.Idents, err = models.TargetsGetIdentsByIdentsAndHostIps(rt.Ctx, f.Idents, f.HostIps)
if err != nil {
ginx.Bomb(http.StatusBadRequest, err.Error())
}
rt.checkTargetPerm(c, f.Idents)
ginx.NewRender(c).Message(rt.targetUnbindTags(f))
ginx.NewRender(c).Data(rt.targetUnbindTags(f, failedResults))
}
func (rt *Router) targetUnbindTagsByService(c *gin.Context) {
var f targetTagsForm
var err error
var failedResults = make(map[string]string)
ginx.BindJSON(c, &f)
if len(f.Idents) == 0 {
ginx.Bomb(http.StatusBadRequest, "idents empty")
if len(f.Idents) == 0 && len(f.HostIps) == 0 {
ginx.Bomb(http.StatusBadRequest, "idents or host_ips must be provided")
}
// Acquire idents by idents and hostIps
failedResults, f.Idents, err = models.TargetsGetIdentsByIdentsAndHostIps(rt.Ctx, f.Idents, f.HostIps)
if err != nil {
ginx.Bomb(http.StatusBadRequest, err.Error())
}
ginx.NewRender(c).Message(rt.targetUnbindTags(f))
ginx.NewRender(c).Data(rt.targetUnbindTags(f, failedResults))
}
func (rt *Router) targetUnbindTags(f targetTagsForm) error {
for i := 0; i < len(f.Idents); i++ {
target, err := models.TargetGetByIdent(rt.Ctx, f.Idents[i])
if err != nil {
return err
}
if target == nil {
continue
}
func (rt *Router) targetUnbindTags(f targetTagsForm, failedIdents map[string]string) (map[string]string, error) {
// 1. Acquire targets by idents
targets, err := models.TargetsGetByIdents(rt.Ctx, f.Idents)
if err != nil {
return nil, err
}
// 2. Remove tags from targets
for _, target := range targets {
err = target.DelTags(rt.Ctx, f.Tags)
if err != nil {
return err
failedIdents[target.Ident] = err.Error()
continue
}
}
return nil
return failedIdents, nil
}
type targetNoteForm struct {
Idents []string `json:"idents" binding:"required"`
Note string `json:"note"`
Idents []string `json:"idents" binding:"required_without=HostIps"`
HostIps []string `json:"host_ips" binding:"required_without=Idents"`
Note string `json:"note"`
}
func (rt *Router) targetUpdateNote(c *gin.Context) {
var f targetNoteForm
var err error
var failedResults = make(map[string]string)
ginx.BindJSON(c, &f)
if len(f.Idents) == 0 {
ginx.Bomb(http.StatusBadRequest, "idents empty")
if len(f.Idents) == 0 && len(f.HostIps) == 0 {
ginx.Bomb(http.StatusBadRequest, "idents or host_ips must be provided")
}
// Acquire idents by idents and hostIps
failedResults, f.Idents, err = models.TargetsGetIdentsByIdentsAndHostIps(rt.Ctx, f.Idents, f.HostIps)
if err != nil {
ginx.Bomb(http.StatusBadRequest, err.Error())
}
rt.checkTargetPerm(c, f.Idents)
ginx.NewRender(c).Message(models.TargetUpdateNote(rt.Ctx, f.Idents, f.Note))
ginx.NewRender(c).Data(failedResults, models.TargetUpdateNote(rt.Ctx, f.Idents, f.Note))
}
func (rt *Router) targetUpdateNoteByService(c *gin.Context) {
var f targetNoteForm
var err error
var failedResults = make(map[string]string)
ginx.BindJSON(c, &f)
if len(f.Idents) == 0 {
ginx.Bomb(http.StatusBadRequest, "idents empty")
if len(f.Idents) == 0 && len(f.HostIps) == 0 {
ginx.Bomb(http.StatusBadRequest, "idents or host_ips must be provided")
}
ginx.NewRender(c).Message(models.TargetUpdateNote(rt.Ctx, f.Idents, f.Note))
// Acquire idents by idents and hostIps
failedResults, f.Idents, err = models.TargetsGetIdentsByIdentsAndHostIps(rt.Ctx, f.Idents, f.HostIps)
if err != nil {
ginx.Bomb(http.StatusBadRequest, err.Error())
}
ginx.NewRender(c).Data(failedResults, models.TargetUpdateNote(rt.Ctx, f.Idents, f.Note))
}
type targetBgidForm struct {
Idents []string `json:"idents" binding:"required"`
Bgid int64 `json:"bgid"`
Idents []string `json:"idents" binding:"required_without=HostIps"`
HostIps []string `json:"host_ips" binding:"required_without=Idents"`
Bgid int64 `json:"bgid"`
}
func (rt *Router) targetUpdateBgid(c *gin.Context) {
var f targetBgidForm
var err error
var failedResults = make(map[string]string)
ginx.BindJSON(c, &f)
if len(f.Idents) == 0 {
ginx.Bomb(http.StatusBadRequest, "idents empty")
if len(f.Idents) == 0 && len(f.HostIps) == 0 {
ginx.Bomb(http.StatusBadRequest, "idents or host_ips must be provided")
}
// Acquire idents by idents and hostIps
failedResults, f.Idents, err = models.TargetsGetIdentsByIdentsAndHostIps(rt.Ctx, f.Idents, f.HostIps)
if err != nil {
ginx.Bomb(http.StatusBadRequest, err.Error())
}
user := c.MustGet("user").(*models.User)
if user.IsAdmin() {
ginx.NewRender(c).Message(models.TargetUpdateBgid(rt.Ctx, f.Idents, f.Bgid, false))
ginx.NewRender(c).Data(failedResults, models.TargetUpdateBgid(rt.Ctx, f.Idents, f.Bgid, false))
return
}
@@ -315,7 +401,11 @@ func (rt *Router) targetUpdateBgid(c *gin.Context) {
// 机器里边存在未归组的登录用户就需要是admin
if len(orphans) > 0 && !user.IsAdmin() {
ginx.Bomb(http.StatusForbidden, "No permission. Only admin can assign BG")
can, err := user.CheckPerm(rt.Ctx, "/targets/bind")
ginx.Dangerous(err)
if !can {
ginx.Bomb(http.StatusForbidden, "No permission. Only admin can assign BG")
}
}
reBelongs, err := models.IdentsFilter(rt.Ctx, f.Idents, "group_id > ?", 0)
@@ -340,24 +430,69 @@ func (rt *Router) targetUpdateBgid(c *gin.Context) {
ginx.Bomb(http.StatusBadRequest, "invalid bgid")
}
ginx.NewRender(c).Message(models.TargetUpdateBgid(rt.Ctx, f.Idents, f.Bgid, false))
ginx.NewRender(c).Data(failedResults, models.TargetUpdateBgid(rt.Ctx, f.Idents, f.Bgid, false))
}
func (rt *Router) targetUpdateBgidByService(c *gin.Context) {
var f targetBgidForm
var err error
var failedResults = make(map[string]string)
ginx.BindJSON(c, &f)
if len(f.Idents) == 0 && len(f.HostIps) == 0 {
ginx.Bomb(http.StatusBadRequest, "idents or host_ips must be provided")
}
// Acquire idents by idents and hostIps
failedResults, f.Idents, err = models.TargetsGetIdentsByIdentsAndHostIps(rt.Ctx, f.Idents, f.HostIps)
if err != nil {
ginx.Bomb(http.StatusBadRequest, err.Error())
}
ginx.NewRender(c).Data(failedResults, models.TargetUpdateBgid(rt.Ctx, f.Idents, f.Bgid, false))
}
type identsForm struct {
Idents []string `json:"idents" binding:"required"`
Idents []string `json:"idents" binding:"required_without=HostIps"`
HostIps []string `json:"host_ips" binding:"required_without=Idents"`
}
func (rt *Router) targetDel(c *gin.Context) {
var f identsForm
var err error
var failedResults = make(map[string]string)
ginx.BindJSON(c, &f)
if len(f.Idents) == 0 {
ginx.Bomb(http.StatusBadRequest, "idents empty")
if len(f.Idents) == 0 && len(f.HostIps) == 0 {
ginx.Bomb(http.StatusBadRequest, "idents or host_ips must be provided")
}
rt.checkTargetPerm(c, f.Idents)
// Acquire idents by idents and hostIps
failedResults, f.Idents, err = models.TargetsGetIdentsByIdentsAndHostIps(rt.Ctx, f.Idents, f.HostIps)
if err != nil {
ginx.Bomb(http.StatusBadRequest, err.Error())
}
ginx.NewRender(c).Message(models.TargetDel(rt.Ctx, f.Idents))
ginx.NewRender(c).Data(failedResults, models.TargetDel(rt.Ctx, f.Idents))
}
func (rt *Router) targetDelByService(c *gin.Context) {
var f identsForm
var err error
var failedResults = make(map[string]string)
ginx.BindJSON(c, &f)
if len(f.Idents) == 0 && len(f.HostIps) == 0 {
ginx.Bomb(http.StatusBadRequest, "idents or host_ips must be provided")
}
// Acquire idents by idents and hostIps
failedResults, f.Idents, err = models.TargetsGetIdentsByIdentsAndHostIps(rt.Ctx, f.Idents, f.HostIps)
if err != nil {
ginx.Bomb(http.StatusBadRequest, err.Error())
}
ginx.NewRender(c).Data(failedResults, models.TargetDel(rt.Ctx, f.Idents))
}
func (rt *Router) checkTargetPerm(c *gin.Context, idents []string) {
@@ -369,3 +504,21 @@ func (rt *Router) checkTargetPerm(c *gin.Context, idents []string) {
ginx.Bomb(http.StatusForbidden, "No permission to operate the targets: %s", strings.Join(nopri, ", "))
}
}
func (rt *Router) targetsOfAlertRule(c *gin.Context) {
engineName := ginx.QueryStr(c, "engine_name", "")
m, err := models.GetTargetsOfHostAlertRule(rt.Ctx, engineName)
ret := make(map[string]map[int64][]string)
for en, v := range m {
if en != engineName {
continue
}
ret[en] = make(map[int64][]string)
for rid, idents := range v {
ret[en][rid] = idents
}
}
ginx.NewRender(c).Data(ret, err)
}

View File

@@ -1,13 +1,9 @@
package router
import (
"fmt"
"net/http"
"net/http/httputil"
"net/url"
"strings"
"time"
"github.com/ccfos/nightingale/v6/alert/sender"
"github.com/ccfos/nightingale/v6/models"
"github.com/gin-gonic/gin"
@@ -30,10 +26,55 @@ func (rt *Router) taskGets(c *gin.Context) {
beginTime := time.Now().Unix() - days*24*3600
total, err := models.TaskRecordTotal(rt.Ctx, bgid, beginTime, creator, query)
total, err := models.TaskRecordTotal(rt.Ctx, []int64{bgid}, beginTime, creator, query)
ginx.Dangerous(err)
list, err := models.TaskRecordGets(rt.Ctx, bgid, beginTime, creator, query, limit, ginx.Offset(c, limit))
list, err := models.TaskRecordGets(rt.Ctx, []int64{bgid}, beginTime, creator, query, limit, ginx.Offset(c, limit))
ginx.Dangerous(err)
ginx.NewRender(c).Data(gin.H{
"total": total,
"list": list,
}, nil)
}
func (rt *Router) taskGetsByGids(c *gin.Context) {
gids := str.IdsInt64(ginx.QueryStr(c, "gids", ""), ",")
if len(gids) > 0 {
for _, gid := range gids {
rt.bgroCheck(c, gid)
}
} else {
me := c.MustGet("user").(*models.User)
if !me.IsAdmin() {
var err error
gids, err = models.MyBusiGroupIds(rt.Ctx, me.Id)
ginx.Dangerous(err)
if len(gids) == 0 {
ginx.NewRender(c).Data([]int{}, nil)
return
}
}
}
mine := ginx.QueryBool(c, "mine", false)
days := ginx.QueryInt64(c, "days", 7)
limit := ginx.QueryInt(c, "limit", 20)
query := ginx.QueryStr(c, "query", "")
user := c.MustGet("user").(*models.User)
creator := ""
if mine {
creator = user.Username
}
beginTime := time.Now().Unix() - days*24*3600
total, err := models.TaskRecordTotal(rt.Ctx, gids, beginTime, creator, query)
ginx.Dangerous(err)
list, err := models.TaskRecordGets(rt.Ctx, gids, beginTime, creator, query, limit, ginx.Offset(c, limit))
ginx.Dangerous(err)
ginx.NewRender(c).Data(gin.H{
@@ -56,71 +97,6 @@ type taskForm struct {
Hosts []string `json:"hosts" binding:"required"`
}
func (f *taskForm) Verify() error {
if f.Batch < 0 {
return fmt.Errorf("arg(batch) should be nonnegative")
}
if f.Tolerance < 0 {
return fmt.Errorf("arg(tolerance) should be nonnegative")
}
if f.Timeout < 0 {
return fmt.Errorf("arg(timeout) should be nonnegative")
}
if f.Timeout > 3600*24 {
return fmt.Errorf("arg(timeout) longer than one day")
}
if f.Timeout == 0 {
f.Timeout = 30
}
f.Pause = strings.Replace(f.Pause, "", ",", -1)
f.Pause = strings.Replace(f.Pause, " ", "", -1)
f.Args = strings.Replace(f.Args, "", ",", -1)
if f.Title == "" {
return fmt.Errorf("arg(title) is required")
}
if str.Dangerous(f.Title) {
return fmt.Errorf("arg(title) is dangerous")
}
if f.Script == "" {
return fmt.Errorf("arg(script) is required")
}
f.Script = strings.Replace(f.Script, "\r\n", "\n", -1)
if str.Dangerous(f.Args) {
return fmt.Errorf("arg(args) is dangerous")
}
if str.Dangerous(f.Pause) {
return fmt.Errorf("arg(pause) is dangerous")
}
if len(f.Hosts) == 0 {
return fmt.Errorf("arg(hosts) empty")
}
if f.Action != "start" && f.Action != "pause" {
return fmt.Errorf("arg(action) invalid")
}
return nil
}
func (f *taskForm) HandleFH(fh string) {
i := strings.Index(f.Title, " FH: ")
if i > 0 {
f.Title = f.Title[:i]
}
f.Title = f.Title + " FH: " + fh
}
func (rt *Router) taskRecordAdd(c *gin.Context) {
var f *models.TaskRecord
ginx.BindJSON(c, &f)
@@ -128,7 +104,7 @@ func (rt *Router) taskRecordAdd(c *gin.Context) {
}
func (rt *Router) taskAdd(c *gin.Context) {
var f taskForm
var f models.TaskForm
ginx.BindJSON(c, &f)
bgid := ginx.UrlParamInt64(c, "id")
@@ -144,7 +120,7 @@ func (rt *Router) taskAdd(c *gin.Context) {
rt.checkTargetPerm(c, f.Hosts)
// call ibex
taskId, err := TaskCreate(f, rt.NotifyConfigCache.GetIbex())
taskId, err := sender.TaskAdd(f, user.Username, rt.Ctx.IsCenter)
ginx.Dangerous(err)
if taskId <= 0 {
@@ -153,65 +129,20 @@ func (rt *Router) taskAdd(c *gin.Context) {
// write db
record := models.TaskRecord{
Id: taskId,
GroupId: bgid,
IbexAddress: rt.NotifyConfigCache.GetIbex().Address,
IbexAuthUser: rt.NotifyConfigCache.GetIbex().BasicAuthUser,
IbexAuthPass: rt.NotifyConfigCache.GetIbex().BasicAuthPass,
Title: f.Title,
Account: f.Account,
Batch: f.Batch,
Tolerance: f.Tolerance,
Timeout: f.Timeout,
Pause: f.Pause,
Script: f.Script,
Args: f.Args,
CreateAt: time.Now().Unix(),
CreateBy: f.Creator,
Id: taskId,
GroupId: bgid,
Title: f.Title,
Account: f.Account,
Batch: f.Batch,
Tolerance: f.Tolerance,
Timeout: f.Timeout,
Pause: f.Pause,
Script: f.Script,
Args: f.Args,
CreateAt: time.Now().Unix(),
CreateBy: f.Creator,
}
err = record.Add(rt.Ctx)
ginx.NewRender(c).Data(taskId, err)
}
func (rt *Router) taskProxy(c *gin.Context) {
target, err := url.Parse(rt.NotifyConfigCache.GetIbex().Address)
if err != nil {
ginx.NewRender(c).Message("invalid ibex address: %s", rt.NotifyConfigCache.GetIbex().Address)
return
}
director := func(req *http.Request) {
req.URL.Scheme = target.Scheme
req.URL.Host = target.Host
// fe request e.g. /api/n9e/busi-group/:id/task/*url
index := strings.Index(req.URL.Path, "/task/")
if index == -1 {
panic("url path invalid")
}
req.URL.Path = "/ibex/v1" + req.URL.Path[index:]
if target.RawQuery == "" || req.URL.RawQuery == "" {
req.URL.RawQuery = target.RawQuery + req.URL.RawQuery
} else {
req.URL.RawQuery = target.RawQuery + "&" + req.URL.RawQuery
}
if rt.NotifyConfigCache.GetIbex().BasicAuthUser != "" {
req.SetBasicAuth(rt.NotifyConfigCache.GetIbex().BasicAuthUser, rt.NotifyConfigCache.GetIbex().BasicAuthPass)
}
}
errFunc := func(w http.ResponseWriter, r *http.Request, err error) {
ginx.NewRender(c, http.StatusBadGateway).Message(err)
}
proxy := &httputil.ReverseProxy{
Director: director,
ErrorHandler: errFunc,
}
proxy.ServeHTTP(c.Writer, c.Request)
}

View File

@@ -18,10 +18,45 @@ func (rt *Router) taskTplGets(c *gin.Context) {
limit := ginx.QueryInt(c, "limit", 20)
groupId := ginx.UrlParamInt64(c, "id")
total, err := models.TaskTplTotal(rt.Ctx, groupId, query)
total, err := models.TaskTplTotal(rt.Ctx, []int64{groupId}, query)
ginx.Dangerous(err)
list, err := models.TaskTplGets(rt.Ctx, groupId, query, limit, ginx.Offset(c, limit))
list, err := models.TaskTplGets(rt.Ctx, []int64{groupId}, query, limit, ginx.Offset(c, limit))
ginx.Dangerous(err)
ginx.NewRender(c).Data(gin.H{
"total": total,
"list": list,
}, nil)
}
func (rt *Router) taskTplGetsByGids(c *gin.Context) {
query := ginx.QueryStr(c, "query", "")
limit := ginx.QueryInt(c, "limit", 20)
gids := str.IdsInt64(ginx.QueryStr(c, "gids", ""), ",")
if len(gids) > 0 {
for _, gid := range gids {
rt.bgroCheck(c, gid)
}
} else {
me := c.MustGet("user").(*models.User)
if !me.IsAdmin() {
var err error
gids, err = models.MyBusiGroupIds(rt.Ctx, me.Id)
ginx.Dangerous(err)
if len(gids) == 0 {
ginx.NewRender(c).Data([]int{}, nil)
return
}
}
}
total, err := models.TaskTplTotal(rt.Ctx, gids, query)
ginx.Dangerous(err)
list, err := models.TaskTplGets(rt.Ctx, gids, query, limit, ginx.Offset(c, limit))
ginx.Dangerous(err)
ginx.NewRender(c).Data(gin.H{
@@ -61,6 +96,14 @@ func (rt *Router) taskTplGetByService(c *gin.Context) {
ginx.NewRender(c).Data(tpl, err)
}
func (rt *Router) taskTplGetsByService(c *gin.Context) {
ginx.NewRender(c).Data(models.TaskTplGetAll(rt.Ctx))
}
func (rt *Router) taskTplStatistics(c *gin.Context) {
ginx.NewRender(c).Data(models.TaskTplStatistics(rt.Ctx))
}
type taskTplForm struct {
Title string `json:"title" binding:"required"`
Batch int `json:"batch"`

View File

@@ -78,7 +78,7 @@ func (rt *Router) QueryData(c *gin.Context) {
var f models.QueryParam
ginx.BindJSON(c, &f)
var resp []*models.DataResp
var resp []models.DataResp
var err error
tdClient := rt.TdendgineClients.GetCli(f.DatasourceId)
for _, q := range f.Querys {

View File

@@ -5,25 +5,51 @@ import (
"strings"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/flashduty"
"github.com/ccfos/nightingale/v6/pkg/ormx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) userBusiGroupsGets(c *gin.Context) {
userid := ginx.QueryInt64(c, "userid", 0)
username := ginx.QueryStr(c, "username", "")
if userid == 0 && username == "" {
ginx.Bomb(http.StatusBadRequest, "userid or username required")
}
var user *models.User
var err error
if userid > 0 {
user, err = models.UserGetById(rt.Ctx, userid)
} else {
user, err = models.UserGetByUsername(rt.Ctx, username)
}
ginx.Dangerous(err)
groups, err := user.BusiGroups(rt.Ctx, 10000, "")
ginx.NewRender(c).Data(groups, err)
}
func (rt *Router) userFindAll(c *gin.Context) {
list, err := models.UserGetAll(rt.Ctx)
ginx.NewRender(c).Data(list, err)
}
func (rt *Router) userGets(c *gin.Context) {
stime, etime := getTimeRange(c)
limit := ginx.QueryInt(c, "limit", 20)
query := ginx.QueryStr(c, "query", "")
order := ginx.QueryStr(c, "order", "username")
desc := ginx.QueryBool(c, "desc", false)
total, err := models.UserTotal(rt.Ctx, query)
total, err := models.UserTotal(rt.Ctx, query, stime, etime)
ginx.Dangerous(err)
list, err := models.UserGets(rt.Ctx, query, limit, ginx.Offset(c, limit))
list, err := models.UserGets(rt.Ctx, query, limit, ginx.Offset(c, limit), stime, etime, order, desc)
ginx.Dangerous(err)
user := c.MustGet("user").(*models.User)
@@ -57,7 +83,7 @@ func (rt *Router) userAddPost(c *gin.Context) {
ginx.Bomb(http.StatusBadRequest, "roles empty")
}
user := c.MustGet("user").(*models.User)
username := Username(c)
u := models.User{
Username: f.Username,
@@ -68,10 +94,11 @@ func (rt *Router) userAddPost(c *gin.Context) {
Portrait: f.Portrait,
Roles: strings.Join(f.Roles, " "),
Contacts: f.Contacts,
CreateBy: user.Username,
UpdateBy: user.Username,
CreateBy: username,
UpdateBy: username,
}
ginx.Dangerous(u.Verify())
ginx.NewRender(c).Message(u.Add(rt.Ctx))
}
@@ -88,6 +115,30 @@ type userProfileForm struct {
Contacts ormx.JSONObj `json:"contacts"`
}
func (rt *Router) userProfilePutByService(c *gin.Context) {
var f models.User
ginx.BindJSON(c, &f)
if len(f.RolesLst) == 0 {
ginx.Bomb(http.StatusBadRequest, "roles empty")
}
password, err := models.CryptoPass(rt.Ctx, f.Password)
ginx.Dangerous(err)
target := User(rt.Ctx, ginx.UrlParamInt64(c, "id"))
target.Nickname = f.Nickname
target.Password = password
target.Phone = f.Phone
target.Email = f.Email
target.Portrait = f.Portrait
target.Roles = strings.Join(f.RolesLst, " ")
target.Contacts = f.Contacts
target.UpdateBy = Username(c)
ginx.NewRender(c).Message(target.UpdateAllFields(rt.Ctx))
}
func (rt *Router) userProfilePut(c *gin.Context) {
var f userProfileForm
ginx.BindJSON(c, &f)
@@ -97,6 +148,11 @@ func (rt *Router) userProfilePut(c *gin.Context) {
}
target := User(rt.Ctx, ginx.UrlParamInt64(c, "id"))
oldInfo := models.User{
Username: target.Username,
Phone: target.Phone,
Email: target.Email,
}
target.Nickname = f.Nickname
target.Phone = f.Phone
target.Email = f.Email
@@ -104,6 +160,10 @@ func (rt *Router) userProfilePut(c *gin.Context) {
target.Contacts = f.Contacts
target.UpdateBy = c.MustGet("username").(string)
if flashduty.NeedSyncUser(rt.Ctx) {
flashduty.UpdateUser(rt.Ctx, oldInfo, f.Email, f.Phone)
}
ginx.NewRender(c).Message(target.UpdateAllFields(rt.Ctx))
}

View File

@@ -5,10 +5,12 @@ import (
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/flashduty"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/str"
)
func (rt *Router) checkBusiGroupPerm(c *gin.Context) {
@@ -30,8 +32,36 @@ func (rt *Router) userGroupGets(c *gin.Context) {
}
func (rt *Router) userGroupGetsByService(c *gin.Context) {
lst, err := models.UserGroupGetAll(rt.Ctx)
ginx.NewRender(c).Data(lst, err)
ids := str.IdsInt64(ginx.QueryStr(c, "ids", ""))
if len(ids) == 0 {
lst, err := models.UserGroupGetAll(rt.Ctx)
ginx.Dangerous(err)
for i := 0; i < len(lst); i++ {
ids, err := models.MemberIds(rt.Ctx, lst[i].Id)
ginx.Dangerous(err)
lst[i].Users, err = models.UserGetsByIds(rt.Ctx, ids)
ginx.Dangerous(err)
}
ginx.NewRender(c).Data(lst, err)
return
}
lst := make([]models.UserGroup, 0)
for _, id := range ids {
ug := UserGroup(rt.Ctx, id)
ids, err := models.MemberIds(rt.Ctx, ug.Id)
ginx.Dangerous(err)
ug.Users, err = models.UserGetsByIds(rt.Ctx, ids)
ginx.Dangerous(err)
lst = append(lst, *ug)
}
ginx.NewRender(c).Data(lst, nil)
}
// user group member get by service
@@ -41,8 +71,9 @@ func (rt *Router) userGroupMemberGetsByService(c *gin.Context) {
}
type userGroupForm struct {
Name string `json:"name" binding:"required"`
Note string `json:"note"`
Name string `json:"name" binding:"required"`
Note string `json:"note"`
IsSyncToFlashDuty bool `json:"is_sync_to_flashduty"`
}
func (rt *Router) userGroupAdd(c *gin.Context) {
@@ -59,12 +90,19 @@ func (rt *Router) userGroupAdd(c *gin.Context) {
}
err := ug.Add(rt.Ctx)
if err == nil {
// Even failure is not a big deal
models.UserGroupMemberAdd(rt.Ctx, ug.Id, me.Id)
}
ginx.Dangerous(err)
// Even failure is not a big deal
models.UserGroupMemberAdd(rt.Ctx, ug.Id, me.Id)
if f.IsSyncToFlashDuty || flashduty.NeedSyncTeam(rt.Ctx) {
ugs, err := flashduty.NewUserGroupSyncer(rt.Ctx, &ug)
ginx.Dangerous(err)
err = ugs.SyncUGAdd()
ginx.Dangerous(err)
}
ginx.NewRender(c).Data(ug.Id, err)
}
func (rt *Router) userGroupPut(c *gin.Context) {
@@ -73,6 +111,7 @@ func (rt *Router) userGroupPut(c *gin.Context) {
me := c.MustGet("user").(*models.User)
ug := c.MustGet("user_group").(*models.UserGroup)
oldUGName := ug.Name
if ug.Name != f.Name {
// name changed, check duplication
@@ -88,8 +127,14 @@ func (rt *Router) userGroupPut(c *gin.Context) {
ug.Note = f.Note
ug.UpdateBy = me.Username
ug.UpdateAt = time.Now().Unix()
if f.IsSyncToFlashDuty || flashduty.NeedSyncTeam(rt.Ctx) {
ugs, err := flashduty.NewUserGroupSyncer(rt.Ctx, ug)
ginx.Dangerous(err)
err = ugs.SyncUGPut(oldUGName)
ginx.Dangerous(err)
}
ginx.NewRender(c).Message(ug.Update(rt.Ctx, "Name", "Note", "UpdateAt", "UpdateBy"))
}
// Return all members, front-end search and paging
@@ -109,8 +154,16 @@ func (rt *Router) userGroupGet(c *gin.Context) {
}
func (rt *Router) userGroupDel(c *gin.Context) {
isSyncToFlashDuty := ginx.QueryBool(c, "is_sync_to_flashduty", false)
ug := c.MustGet("user_group").(*models.UserGroup)
if isSyncToFlashDuty || flashduty.NeedSyncTeam(rt.Ctx) {
ugs, err := flashduty.NewUserGroupSyncer(rt.Ctx, ug)
ginx.Dangerous(err)
err = ugs.SyncUGDel(ug.Name)
ginx.Dangerous(err)
}
ginx.NewRender(c).Message(ug.Del(rt.Ctx))
}
func (rt *Router) userGroupMemberAdd(c *gin.Context) {
@@ -122,13 +175,21 @@ func (rt *Router) userGroupMemberAdd(c *gin.Context) {
ug := c.MustGet("user_group").(*models.UserGroup)
err := ug.AddMembers(rt.Ctx, f.Ids)
ginx.Dangerous(err)
if err == nil {
ug.UpdateAt = time.Now().Unix()
ug.UpdateBy = me.Username
ug.Update(rt.Ctx, "UpdateAt", "UpdateBy")
}
if f.IsSyncToFlashDuty || flashduty.NeedSyncTeam(rt.Ctx) {
ugs, err := flashduty.NewUserGroupSyncer(rt.Ctx, ug)
ginx.Dangerous(err)
err = ugs.SyncMembersAdd()
ginx.Dangerous(err)
}
ginx.NewRender(c).Message(err)
}
func (rt *Router) userGroupMemberDel(c *gin.Context) {
@@ -145,6 +206,12 @@ func (rt *Router) userGroupMemberDel(c *gin.Context) {
ug.UpdateBy = me.Username
ug.Update(rt.Ctx, "UpdateAt", "UpdateBy")
}
if f.IsSyncToFlashDuty || flashduty.NeedSyncTeam(rt.Ctx) {
ugs, err := flashduty.NewUserGroupSyncer(rt.Ctx, ug)
ginx.Dangerous(err)
err = ugs.SyncMembersDel()
ginx.Dangerous(err)
}
ginx.NewRender(c).Message(err)
}

View File

@@ -34,10 +34,15 @@ func (rt *Router) userVariableConfigPut(context *gin.Context) {
ginx.BindJSON(context, &f)
f.Id = ginx.UrlParamInt64(context, "id")
f.Ckey = strings.TrimSpace(f.Ckey)
//update external config. needs to make sure not plaintext for an encrypted type config
//updating with struct it will update all fields ("ckey", "cval", "note", "encrypted", "update_by", "update_at"), not non-zero fields.
f.UpdateBy = context.MustGet("username").(string)
f.UpdateAt = time.Now().Unix()
user := context.MustGet("user").(*models.User)
if !user.IsAdmin() && f.CreateBy != user.Username {
// only admin or creator can update
ginx.Bomb(403, "no permission")
}
ginx.NewRender(context).Message(models.ConfigsUserVariableUpdate(rt.Ctx, f))
}
@@ -46,6 +51,12 @@ func (rt *Router) userVariableConfigDel(context *gin.Context) {
configs, err := models.ConfigGet(rt.Ctx, id)
ginx.Dangerous(err)
user := context.MustGet("user").(*models.User)
if !user.IsAdmin() && configs.CreateBy != user.Username {
// only admin or creator can delete
ginx.Bomb(403, "no permission")
}
if configs != nil && configs.External == models.ConfigExternal {
ginx.NewRender(context).Message(models.ConfigsDel(rt.Ctx, []int64{id}))
} else {

View File

@@ -2,24 +2,30 @@ package sso
import (
"log"
"time"
"github.com/BurntSushi/toml"
"github.com/ccfos/nightingale/v6/center/cconf"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/cas"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/ldapx"
"github.com/ccfos/nightingale/v6/pkg/oauth2x"
"github.com/ccfos/nightingale/v6/pkg/oidcx"
"github.com/ccfos/nightingale/v6/pkg/tplx"
"github.com/BurntSushi/toml"
"github.com/toolkits/pkg/logger"
)
type SsoClient struct {
OIDC *oidcx.SsoClient
LDAP *ldapx.SsoClient
CAS *cas.SsoClient
OAuth2 *oauth2x.SsoClient
OIDC *oidcx.SsoClient
LDAP *ldapx.SsoClient
CAS *cas.SsoClient
OAuth2 *oauth2x.SsoClient
LastUpdateTime int64
configCache *memsto.ConfigCache
configLastUpdateTime int64
}
const LDAP = `
@@ -29,15 +35,21 @@ Port = 389
BaseDn = 'dc=example,dc=org'
BindUser = 'cn=manager,dc=example,dc=org'
BindPass = '*******'
SyncAddUsers = false
SyncDelUsers = false
# unit: s
SyncInterval = 86400
# openldap format e.g. (&(uid=%s))
# AD format e.g. (&(sAMAccountName=%s))
AuthFilter = '(&(uid=%s))'
UserFilter = '(&(uid=*))'
CoverAttributes = true
TLS = false
StartTLS = true
DefaultRoles = ['Standard']
[Attributes]
Username = 'uid'
Nickname = 'cn'
Phone = 'mobile'
Email = 'mail'
@@ -46,8 +58,9 @@ Email = 'mail'
const OAuth2 = `
Enable = false
DisplayName = 'OAuth2登录'
RedirectURL = 'http://127.0.0.1:18000/callback/oauth'
RedirectURL = 'http://n9e.com/callback/oauth'
SsoAddr = 'https://sso.example.com/oauth2/authorize'
SsoLogoutAddr = 'https://sso.example.com/oauth2/authorize/session/end'
TokenAddr = 'https://sso.example.com/oauth2/token'
UserInfoAddr = 'https://api.example.com/api/v1/user/info'
TranTokenMethod = 'header'
@@ -60,7 +73,7 @@ UserinfoPrefix = 'data'
Scopes = ['profile', 'email', 'phone']
[Attributes]
Username = 'username'
Username = 'sub'
Nickname = 'nickname'
Phone = 'phone_number'
Email = 'email'
@@ -68,34 +81,41 @@ Email = 'email'
const CAS = `
Enable = false
SsoAddr = 'https://cas.example.com/cas/'
RedirectURL = 'http://127.0.0.1:18000/callback/cas'
DisplayName = 'CAS登录'
CoverAttributes = false
RedirectURL = 'http://n9e.com/callback/cas'
SsoAddr = 'https://cas.example.com/cas/'
SsoLogoutAddr = 'https://cas.example.com/cas/session/end'
# LoginPath = ''
CoverAttributes = true
DefaultRoles = ['Standard']
[Attributes]
Username = 'sub'
Nickname = 'nickname'
Phone = 'phone_number'
Email = 'email'
`
const OIDC = `
Enable = false
DisplayName = 'OIDC登录'
RedirectURL = 'http://n9e.com/callback'
SsoAddr = 'http://sso.example.org'
SsoLogoutAddr = 'http://sso.example.org/session/end'
ClientId = ''
ClientSecret = ''
CoverAttributes = true
DefaultRoles = ['Standard']
Scopes = ['openid', 'profile', 'email', 'phone']
[Attributes]
Username = 'sub'
Nickname = 'nickname'
Phone = 'phone_number'
Email = 'email'
`
func Init(center cconf.Center, ctx *ctx.Context) *SsoClient {
func Init(center cconf.Center, ctx *ctx.Context, configCache *memsto.ConfigCache) *SsoClient {
ssoClient := new(SsoClient)
m := make(map[string]string)
m["LDAP"] = LDAP
@@ -124,6 +144,11 @@ func Init(center cconf.Center, ctx *ctx.Context) *SsoClient {
log.Fatalln(err)
}
}
if configCache == nil {
logger.Error("configCache is nil, sso initialization failed")
}
ssoClient.configCache = configCache
userVariableMap := configCache.Get()
configs, err := models.SsoConfigGets(ctx)
if err != nil {
@@ -131,6 +156,7 @@ func Init(center cconf.Center, ctx *ctx.Context) *SsoClient {
}
for _, cfg := range configs {
cfg.Content = tplx.ReplaceTemplateUseText(cfg.Name, cfg.Content, userVariableMap)
switch cfg.Name {
case "LDAP":
var config ldapx.Config
@@ -145,6 +171,7 @@ func Init(center cconf.Center, ctx *ctx.Context) *SsoClient {
if err != nil {
log.Fatalln("init oidc failed:", err)
}
logger.Info("init oidc..")
oidcClient, err := oidcx.New(config)
if err != nil {
logger.Error("init oidc failed:", err)
@@ -167,5 +194,84 @@ func Init(center cconf.Center, ctx *ctx.Context) *SsoClient {
ssoClient.OAuth2 = oauth2x.New(config)
}
}
go ssoClient.SyncSsoUsers(ctx)
go ssoClient.Reload(ctx)
return ssoClient
}
// 定期更新sso配置
func (s *SsoClient) reload(ctx *ctx.Context) error {
lastUpdateTime, err := models.SsoConfigLastUpdateTime(ctx)
if err != nil {
return err
}
lastCacheUpdateTime := s.configCache.GetLastUpdateTime()
if lastUpdateTime == s.LastUpdateTime && lastCacheUpdateTime == s.configLastUpdateTime {
return nil
}
configs, err := models.SsoConfigGets(ctx)
if err != nil {
return err
}
userVariableMap := s.configCache.Get()
for _, cfg := range configs {
cfg.Content = tplx.ReplaceTemplateUseText(cfg.Name, cfg.Content, userVariableMap)
switch cfg.Name {
case "LDAP":
var config ldapx.Config
err := toml.Unmarshal([]byte(cfg.Content), &config)
if err != nil {
logger.Warning("reload ldap failed", err)
continue
}
s.LDAP.Reload(config)
case "OIDC":
var config oidcx.Config
err := toml.Unmarshal([]byte(cfg.Content), &config)
if err != nil {
logger.Warning("reload oidc failed:", err)
continue
}
logger.Info("reload oidc..")
err = s.OIDC.Reload(config)
if err != nil {
logger.Error("reload oidc failed:", err)
continue
}
case "CAS":
var config cas.Config
err := toml.Unmarshal([]byte(cfg.Content), &config)
if err != nil {
logger.Warning("reload cas failed:", err)
continue
}
s.CAS.Reload(config)
case "OAuth2":
var config oauth2x.Config
err := toml.Unmarshal([]byte(cfg.Content), &config)
if err != nil {
logger.Warning("reload oauth2 failed:", err)
continue
}
s.OAuth2.Reload(config)
}
}
s.LastUpdateTime = lastUpdateTime
s.configLastUpdateTime = lastCacheUpdateTime
return nil
}
func (s *SsoClient) Reload(ctx *ctx.Context) {
duration := time.Duration(9000) * time.Millisecond
for {
time.Sleep(duration)
if err := s.reload(ctx); err != nil {
logger.Warning("reload sso client err:", err)
}
}
}

37
center/sso/sync.go Normal file
View File

@@ -0,0 +1,37 @@
package sso
import (
"fmt"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/toolkits/pkg/logger"
)
func (s *SsoClient) SyncSsoUsers(ctx *ctx.Context) {
if err := s.LDAP.SyncAddAndDelUsers(ctx); err != nil {
fmt.Println("failed to sync the addition and deletion of ldap users:", err)
}
if err := s.LDAP.SyncDelUsers(ctx); err != nil {
fmt.Println("failed to sync deletion of ldap users:", err)
}
go s.loopSyncSsoUsers(ctx)
}
func (s *SsoClient) loopSyncSsoUsers(ctx *ctx.Context) {
for {
select {
case <-s.LDAP.Ticker.C:
lc := s.LDAP.Copy()
if err := lc.SyncAddAndDelUsers(ctx); err != nil {
logger.Warningf("failed to sync the addition and deletion of ldap users: %v", err)
}
if err := lc.SyncDelUsers(ctx); err != nil {
logger.Warningf("failed to sync deletion of ldap users: %v", err)
}
}
}
}

View File

@@ -8,6 +8,8 @@ import (
"github.com/ccfos/nightingale/v6/alert"
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/alert/process"
alertrt "github.com/ccfos/nightingale/v6/alert/router"
"github.com/ccfos/nightingale/v6/center/metas"
"github.com/ccfos/nightingale/v6/conf"
"github.com/ccfos/nightingale/v6/dumper"
"github.com/ccfos/nightingale/v6/memsto"
@@ -16,11 +18,12 @@ import (
"github.com/ccfos/nightingale/v6/pkg/logx"
"github.com/ccfos/nightingale/v6/prom"
"github.com/ccfos/nightingale/v6/pushgw/idents"
pushgwrt "github.com/ccfos/nightingale/v6/pushgw/router"
"github.com/ccfos/nightingale/v6/pushgw/writer"
"github.com/ccfos/nightingale/v6/storage"
"github.com/ccfos/nightingale/v6/tdengine"
alertrt "github.com/ccfos/nightingale/v6/alert/router"
pushgwrt "github.com/ccfos/nightingale/v6/pushgw/router"
"github.com/flashcatcloud/ibex/src/cmd/ibex"
)
func Initialize(configDir string, cryptoKey string) (func(), error) {
@@ -39,13 +42,20 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
}
ctx := ctx.NewContext(context.Background(), nil, false, config.CenterApi)
var redis storage.Redis
redis, err = storage.NewRedis(config.Redis)
if err != nil {
return nil, err
}
syncStats := memsto.NewSyncStats()
targetCache := memsto.NewTargetCache(ctx, syncStats, nil)
targetCache := memsto.NewTargetCache(ctx, syncStats, redis)
busiGroupCache := memsto.NewBusiGroupCache(ctx, syncStats)
idents := idents.New(ctx)
idents := idents.New(ctx, redis)
metas := metas.New(redis)
writers := writer.NewWriters(config.Pushgw)
pushgwRouter := pushgwrt.New(config.HTTP, config.Pushgw, targetCache, busiGroupCache, idents, writers, ctx)
pushgwRouter := pushgwrt.New(config.HTTP, config.Pushgw, config.Alert, targetCache, busiGroupCache, idents, metas, writers, ctx)
r := httpx.GinEngine(config.Global.RunMode, config.HTTP)
pushgwRouter.Config(r)
@@ -58,17 +68,22 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
notifyConfigCache := memsto.NewNotifyConfigCache(ctx, configCache)
userCache := memsto.NewUserCache(ctx, syncStats)
userGroupCache := memsto.NewUserGroupCache(ctx, syncStats)
taskTplsCache := memsto.NewTaskTplCache(ctx)
promClients := prom.NewPromClient(ctx, config.Alert.Heartbeat)
promClients := prom.NewPromClient(ctx)
tdengineClients := tdengine.NewTdengineClient(ctx, config.Alert.Heartbeat)
externalProcessors := process.NewExternalProcessors()
alert.Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache,
alertRuleCache, notifyConfigCache, dsCache, ctx, promClients, tdengineClients, userCache, userGroupCache)
alertRuleCache, notifyConfigCache, taskTplsCache, dsCache, ctx, promClients, tdengineClients, userCache, userGroupCache)
alertrtRouter := alertrt.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors)
alertrtRouter.Config(r)
if config.Ibex.Enable {
ibex.ServerStart(false, nil, redis, config.HTTP.APIForService.BasicAuth, config.Alert.Heartbeat, &config.CenterApi, r, nil, config.Ibex, config.HTTP.Port)
}
}
dumper.ConfigRouter(r)

View File

@@ -27,6 +27,7 @@ type ConfigType struct {
Pushgw pconf.Pushgw
Alert aconf.Alert
Center cconf.Center
Ibex Ibex
}
type CenterApi struct {
@@ -40,6 +41,17 @@ type GlobalConfig struct {
RunMode string
}
type Ibex struct {
Enable bool
RPCListen string
Output Output
}
type Output struct {
ComeFrom string
AgtdPort int
}
func InitConfig(configDir, cryptoKey string) (*ConfigType, error) {
var config = new(ConfigType)

BIN
doc/img/Nightingale_L_V.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 230 KiB

View File

@@ -1,12 +0,0 @@
FROM python:3-slim
#FROM ubuntu:21.04
WORKDIR /app
ADD n9e /app
ADD http://download.flashcat.cloud/wait /wait
RUN chmod +x /wait
RUN chmod +x n9e
EXPOSE 17000
CMD ["/app/n9e", "-h"]

View File

@@ -3,7 +3,7 @@ FROM --platform=$TARGETPLATFORM python:3-slim
WORKDIR /app
ADD n9e /app/
ADD etc /app/
ADD etc /app/etc/
ADD integrations /app/integrations/
RUN pip install requests

View File

@@ -3,7 +3,7 @@ FROM --platform=$TARGETPLATFORM python:3-slim
WORKDIR /app
ADD n9e /app/
ADD etc /app/
ADD etc /app/etc/
ADD integrations /app/integrations/
EXPOSE 17000

View File

@@ -69,27 +69,6 @@ services:
command:
- "--loggerTimezone=Asia/Shanghai"
ibex:
image: flashcatcloud/ibex:v1.2.0
container_name: ibex
hostname: ibex
restart: always
environment:
GIN_MODE: release
TZ: Asia/Shanghai
WAIT_HOSTS: mysql:3306
volumes:
- ./etc-ibex:/app/etc
networks:
- nightingale
ports:
- "10090:10090"
- "20090:20090"
depends_on:
- mysql
command: >
sh -c "/app/ibex server"
nightingale:
image: flashcatcloud/nightingale:latest
container_name: nightingale
@@ -105,6 +84,7 @@ services:
- nightingale
ports:
- "17000:17000"
- "20090:20090"
depends_on:
- mysql
- redis
@@ -122,7 +102,7 @@ services:
HOST_PROC: /hostfs/proc
HOST_SYS: /hostfs/sys
HOST_MOUNT_PREFIX: /hostfs
WAIT_HOSTS: nightingale:17000, ibex:20090
WAIT_HOSTS: nightingale:17000, nightingale:20090
volumes:
- ./etc-categraf:/etc/categraf/conf
- /:/hostfs

View File

@@ -78,6 +78,6 @@ enable = true
## ibex flush interval
interval = "1000ms"
## n9e ibex server rpc address
servers = ["ibex:20090"]
servers = ["nightingale:20090"]
## temp script dir
meta_dir = "./meta"

View File

@@ -1,97 +0,0 @@
# debug, release
RunMode = "release"
[Log]
# log write dir
Dir = "logs-server"
# log level: DEBUG INFO WARNING ERROR
Level = "DEBUG"
# stdout, stderr, file
Output = "stdout"
# # rotate by time
# KeepHours: 4
# # rotate by size
# RotateNum = 3
# # unit: MB
# RotateSize = 256
[HTTP]
Enable = true
# http listening address
Host = "0.0.0.0"
# http listening port
Port = 10090
# https cert file path
CertFile = ""
# https key file path
KeyFile = ""
# whether print access log
PrintAccessLog = true
# whether enable pprof
PProf = false
# http graceful shutdown timeout, unit: s
ShutdownTimeout = 30
# max content length: 64M
MaxContentLength = 67108864
# http server read timeout, unit: s
ReadTimeout = 20
# http server write timeout, unit: s
WriteTimeout = 40
# http server idle timeout, unit: s
IdleTimeout = 120
[BasicAuth]
# using when call apis
ibex = "ibex"
[RPC]
Listen = "0.0.0.0:20090"
[Heartbeat]
# auto detect if blank
IP = ""
# unit: ms
Interval = 1000
[Output]
# database | remote
ComeFrom = "database"
AgtdPort = 2090
[Gorm]
# enable debug mode or not
Debug = false
# mysql postgres
DBType = "mysql"
# unit: s
MaxLifetime = 7200
# max open connections
MaxOpenConns = 150
# max idle connections
MaxIdleConns = 50
# table prefix
TablePrefix = ""
[MySQL]
# mysql address host:port
Address = "mysql:3306"
# mysql username
User = "root"
# mysql password
Password = "1234"
# database name
DBName = "ibex"
# connection params
Parameters = "charset=utf8mb4&parseTime=True&loc=Local&allowNativePasswords=true"
[Postgres]
# pg address host:port
Address = "postgres:5432"
# pg user
User = "root"
# pg password
Password = "1234"
# database name
DBName = "ibex"
# ssl mode
SSLMode = "disable"

View File

@@ -55,8 +55,6 @@ Enable = true
user001 = "ccc26da7b9aba533cbb263a36c07dcc5"
[HTTP.JWTAuth]
# signing key
SigningKey = "5b94a0fd640fe2765af826acfe42d151"
# unit: min
AccessExpired = 1500
# unit: min
@@ -96,8 +94,6 @@ MaxLifetime = 7200
MaxOpenConns = 150
# max idle connections
MaxIdleConns = 50
# table prefix
TablePrefix = ""
# enable auto migrate or not
# EnableAutoMigrate = false
@@ -140,7 +136,7 @@ AlertDetail = false
LabelRewrite = true
# # default busigroup key name
# BusiGroupLabelKey = "busigroup"
# ForceUseServerTS = false
ForceUseServerTS = true
# [Pushgw.DebugSample]
# ident = "xx"
@@ -181,3 +177,7 @@ MaxIdleConnsPerHost = 100
# Regex = "([^:]+)(?::\\d+)?"
# Replacement = "$1:80"
# TargetLabel = "__address__"
[Ibex]
Enable = true
RPCListen = "0.0.0.0:20090"

View File

@@ -0,0 +1,143 @@
version: "3.7"
services:
mysql:
image: "mysql:8"
container_name: mysql
hostname: mysql
restart: always
environment:
TZ: Asia/Shanghai
MYSQL_ROOT_PASSWORD: 1234
volumes:
- ./mysqldata:/var/lib/mysql/
- ../initsql:/docker-entrypoint-initdb.d/
- ./etc-mysql/my.cnf:/etc/my.cnf
network_mode: host
redis:
image: "redis:6.2"
container_name: redis
hostname: redis
restart: always
environment:
TZ: Asia/Shanghai
network_mode: host
prometheus:
image: prom/prometheus
container_name: prometheus
hostname: prometheus
restart: always
environment:
TZ: Asia/Shanghai
volumes:
- ./etc-prometheus:/etc/prometheus
network_mode: host
command:
- "--config.file=/etc/prometheus/prometheus.yml"
- "--storage.tsdb.path=/prometheus"
- "--web.console.libraries=/usr/share/prometheus/console_libraries"
- "--web.console.templates=/usr/share/prometheus/consoles"
- "--enable-feature=remote-write-receiver"
- "--query.lookback-delta=2m"
n9e:
image: flashcatcloud/nightingale:latest
container_name: n9e
hostname: n9e
restart: always
environment:
GIN_MODE: release
TZ: Asia/Shanghai
WAIT_HOSTS: 127.0.0.1:3306, 127.0.0.1:6379
volumes:
- ./etc-nightingale:/app/etc
- ./n9e-logs:/app/logs
network_mode: host
depends_on:
- mysql
- redis
- prometheus
command: >
sh -c "/app/n9e"
categraf:
image: "flashcatcloud/categraf:latest"
container_name: "categraf"
hostname: "categraf01"
restart: always
environment:
TZ: Asia/Shanghai
HOST_PROC: /hostfs/proc
HOST_SYS: /hostfs/sys
HOST_MOUNT_PREFIX: /hostfs
WAIT_HOSTS: 127.0.0.1:17000, 127.0.0.1:20090, 127.0.0.1:9092
volumes:
- ./etc-categraf:/etc/categraf/conf
- ./n9e-logs:/logs
- /:/hostfs
network_mode: host
depends_on:
- n9e
- kafka
zookeeper:
image: bitnami/zookeeper:3.9
container_name: "zookeeper"
restart: always
environment:
- TZ=Asia/Shanghai
- ALLOW_ANONYMOUS_LOGIN=yes
network_mode: host
depends_on:
- n9e
kafka:
image: bitnami/kafka:3.4
container_name: "kafka"
restart: always
environment:
TZ: Asia/Shanghai
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://127.0.0.1:9092
KAFKA_LISTENERS: PLAINTEXT://0.0.0.0:9092
KAFKA_ZOOKEEPER_CONNECT: 127.0.0.1:2181
KAFKA_CFG_MESSAGE_MAX_BYTES: 2000000
network_mode: host
depends_on:
- zookeeper
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:7.10.1
container_name: "elasticsearch"
restart: always
environment:
- TZ=Asia/Shanghai
- discovery.type=single-node
network_mode: host
depends_on:
- kafka
logstash:
image: docker.elastic.co/logstash/logstash:8.11.3
container_name: "logstash"
restart: always
environment:
- TZ=Asia/Shanghai
- LS_JAVA_OPTS=-Xmx256m -Xms256m
volumes:
- ./etc-logstash/logstash.yaml:/etc/logstash/conf.d/logstash.yaml
entrypoint:
- logstash
- -f
- /etc/logstash/conf.d/logstash.yaml
network_mode: host
depends_on:
- elasticsearch
- kafka
logging:
driver: "json-file"
options:
max-size: "200m"
max-file: "3"

View File

@@ -0,0 +1,83 @@
[global]
# whether print configs
print_configs = false
# add label(agent_hostname) to series
# "" -> auto detect hostname
# "xx" -> use specified string xx
# "$hostname" -> auto detect hostname
# "$ip" -> auto detect ip
# "$hostname-$ip" -> auto detect hostname and ip to replace the vars
hostname = "$HOSTNAME"
# will not add label(agent_hostname) if true
omit_hostname = false
# s | ms
precision = "ms"
# global collect interval
interval = 15
[global.labels]
source="categraf"
# region = "shanghai"
# env = "localhost"
[writer_opt]
# default: 2000
batch = 2000
# channel(as queue) size
chan_size = 10000
[[writers]]
url = "http://127.0.0.1:17000/prometheus/v1/write"
# Basic auth username
basic_auth_user = ""
# Basic auth password
basic_auth_pass = ""
# timeout settings, unit: ms
timeout = 5000
dial_timeout = 2500
max_idle_conns_per_host = 100
[http]
enable = false
address = ":9100"
print_access = false
run_mode = "release"
[heartbeat]
enable = true
# report os version cpu.util mem.util metadata
url = "http://127.0.0.1:17000/v1/n9e/heartbeat"
# interval, unit: s
interval = 10
# Basic auth username
basic_auth_user = ""
# Basic auth password
basic_auth_pass = ""
## Optional headers
# headers = ["X-From", "categraf", "X-Xyz", "abc"]
# timeout settings, unit: ms
timeout = 5000
dial_timeout = 2500
max_idle_conns_per_host = 100
[ibex]
enable = true
## ibex flush interval
interval = "1000ms"
## n9e ibex server rpc address
servers = ["127.0.0.1:20090"]
## temp script dir
meta_dir = "./meta"

View File

@@ -0,0 +1,5 @@
# # collect interval
# interval = 15
# # whether collect per cpu
# collect_per_cpu = false

View File

@@ -0,0 +1,11 @@
# # collect interval
# interval = 15
# # By default stats will be gathered for all mount points.
# # Set mount_points will restrict the stats to only the specified mount points.
# mount_points = ["/"]
# Ignore mount points by filesystem type.
ignore_fs = ["tmpfs", "devtmpfs", "devfs", "iso9660", "overlay", "aufs", "squashfs"]
ignore_mount_points = ["/boot"]

View File

@@ -0,0 +1,6 @@
# # collect interval
# interval = 15
# # By default, categraf will gather stats for all devices including disk partitions.
# # Setting devices will restrict the stats to the specified devices.
# devices = ["sda", "sdb", "vd*"]

View File

@@ -0,0 +1,2 @@
# # collect interval
# interval = 15

View File

@@ -0,0 +1,5 @@
# # collect interval
# interval = 15
# # whether collect platform specified metrics
collect_platform_fields = true

View File

@@ -0,0 +1,8 @@
# # collect interval
# interval = 15
# # whether collect protocol stats on Linux
# collect_protocol_stats = false
# # setting interfaces will tell categraf to gather these explicit interfaces
# interfaces = ["eth0"]

View File

@@ -0,0 +1,2 @@
# # collect interval
# interval = 15

View File

@@ -0,0 +1,5 @@
# # collect interval
# interval = 15
# # whether collect metric: system_n_users
# collect_user_number = false

View File

@@ -0,0 +1,73 @@
[logs]
## just a placholder
api_key = "ef4ahfbwzwwtlwfpbertgq1i6mq0ab1q"
## enable log collect or not
enable = true
## the server receive logs, http/tcp/kafka, only kafka brokers can be multiple ip:ports with concatenation character ","
send_to = "127.0.0.1:9092"
## send logs with protocol: http/tcp/kafka
send_type = "kafka"
topic = "flashcatcloud"
## send logs with compression or not
use_compress = false
## use ssl or not
send_with_tls = false
## send logs in batchs
batch_wait = 5
## save offset in this path
run_path = "/opt/categraf/run"
## max files can be open
open_files_limit = 100
## scan config file in 10 seconds
scan_period = 10
## read buffer of udp
frame_size = 9000
## channal size, default 100
## 读取日志缓冲区,行数
chan_size = 1000
## pipeline num , default 4
## 有多少线程处理日志
pipeline=4
## configuration for kafka
## 指定kafka版本
kafka_version="2.8.1"
# 默认0 表示串行,如果对日志顺序有要求,保持默认配置
batch_max_concurrence = 0
# 最大并发批次, 默认100
batch_max_size=100
# 每次最大发送的内容上限 默认1000000
batch_max_contentsize=1000000
# client timeout in seconds
producer_timeout= 10
# 是否开启sasl模式
sasl_enable = false
sasl_user = "admin"
sasl_password = "admin"
# PLAIN
sasl_mechanism= "PLAIN"
# v1
sasl_version=1
# set true
sasl_handshake = true
# optional
# sasl_auth_identity=""
#
##
# v0.3.39以上版本新增,是否开启pod日志采集
enable_collect_container=false
# 是否采集所有pod的stdout stderr
collect_container_all = false
## glog processing rules
# [[logs.Processing_rules]]
## single log configure
[[logs.items]]
## file/journald/tcp/udp
type = "file"
## type=file, path is required; type=journald/tcp/udp, port is required
path = "/logs/*"
source = "n9e"
service = "n9e_service"

View File

@@ -0,0 +1,22 @@
input {
kafka {
bootstrap_servers => "127.0.0.1:9092"
topics => ["flashcatcloud"]
codec => json
type => n9e
}
}
filter {
grok {
match => {"message" => "%{LOGLEVEL:status}"}
overwrite => ["status"]
}
}
output {
elasticsearch {
hosts => ["127.0.0.1:9200"]
index => "n9e-%{+YYYY.MM.DD}"
}
}

View File

@@ -0,0 +1,5 @@
[mysqld]
pid-file = /var/run/mysqld/mysqld.pid
socket = /var/run/mysqld/mysqld.sock
datadir = /var/lib/mysql
bind-address = 127.0.0.1

Some files were not shown because too many files have changed in this diff Show More