Compare commits

..

129 Commits

Author SHA1 Message Date
ning
fa49449588 fix: user phone encrypt 2025-11-11 16:47:49 +08:00
ning
876f1d1084 fix prom datasource update 2025-11-10 10:51:44 +08:00
ning
678830be37 refactor: add /builtin-component service api 2025-11-10 10:29:14 +08:00
ning
5e30f3a00d Merge branch 'main' of github.com:ccfos/nightingale 2025-11-06 01:39:58 +08:00
ning
7f1eefd033 fix: dsn 2025-11-05 15:08:08 +08:00
Wenyu Su
c8dd26ca4c fix(notify): http callback retry interval in millisecond (#2928) 2025-11-04 18:11:30 +08:00
ning
37c57e66ea refactor: optimize db dsn 2025-11-04 15:09:03 +08:00
ning
878e940325 docs: update migrate.sql 2025-11-03 19:16:00 +08:00
Ulric Qin
cbc715305d Merge branch 'main' of https://github.com/ccfos/nightingale 2025-10-31 18:35:39 +08:00
Ulric Qin
5011766c70 delete single config dir 2025-10-31 18:35:32 +08:00
Snowykami
b3ed8a1e8c fix: elasticsearch v9 range query compatibility (#2692) (#2922) 2025-10-31 15:05:45 +08:00
Ulric Qin
814ded90b6 remove no use configuration: EnableAutoMigrate and add etc/single/config.toml 2025-10-31 11:38:17 +08:00
ning
43e89040eb refactor: update oidc logout 2025-10-31 10:26:41 +08:00
710leo
3d339fe03c update issue-translator 2025-10-30 22:50:53 +08:00
710leo
7618858912 update issue-translator 2025-10-30 22:24:49 +08:00
710leo
15b4ef8611 update issue-translator 2025-10-30 22:19:20 +08:00
710leo
5083a5cc96 add issue-translator 2025-10-30 22:10:09 +08:00
Yening Qin
d51e83d7d4 feat: alert rule add event process (#2921) 2025-10-28 16:18:12 +08:00
ning
601d4f0c95 update built in 2025-10-28 15:42:37 +08:00
Ulric Qin
90fac12953 update readme 2025-10-26 22:25:16 +08:00
ning
19d76824d9 update user group api 2025-10-25 16:45:31 +08:00
ning
1341554bbc refactor: optimize push data log 2025-10-19 12:12:06 +08:00
Ulric Qin
fd3ce338cb Merge branch 'main' of https://github.com/ccfos/nightingale 2025-10-15 17:21:58 +08:00
Ulric Qin
b8f36ce3cb add more prometheus rule severity choices 2025-10-15 17:21:52 +08:00
ning
037112a9e6 refactor: update alert mute api 2025-10-15 15:19:01 +08:00
zjxpsetp
c6e75d31a1 update jmx dashboard to support multi gc mode 2025-10-15 12:41:01 +08:00
zjxpsetp
bd24f5b056 Merge remote-tracking branch 'origin/main'
# Conflicts:
#	integrations/Java/dashboards/jmx_by_kubernetes.json
2025-10-15 12:36:14 +08:00
zjxpsetp
89551c8edb update jmx dashboard to support multi gc mode 2025-10-15 12:29:11 +08:00
ning
042b44940d docs: update i18n 2025-10-14 17:38:25 +08:00
ning
8cd8674848 refactor: optimize alert mute match 2025-10-14 16:59:57 +08:00
ning
7bb6ac8a03 refactor: update alert mute sync 2025-10-14 16:45:15 +08:00
ning
76b35276af refactor: update event api 2025-10-13 20:24:52 +08:00
SaladDay
439a21b784 feat: add expired filter for alert mute rules (#2907) 2025-10-13 14:04:32 +08:00
Yening Qin
47e70a2dba fix: sql order (#2908) 2025-10-13 12:18:16 +08:00
Yening Qin
16b3cb1abc feat: support encrypt user phone (#2906)
* feature: add a new configuration option to encrypt user phone numbers in the database (#2902)

Co-authored-by: yuanzaiping_dxm <yuanzaiping@duxiaoman.com>
---------

Co-authored-by: zaipingY <30775871+zaipingy@users.noreply.github.com>
Co-authored-by: yuanzaiping_dxm <yuanzaiping@duxiaoman.com>
2025-10-11 14:37:14 +08:00
ning
32995c1b2d fix: event pipeline insert by PostgreSQL 2025-09-23 16:40:27 +08:00
ning
b4fa36fa0e refactor: update message tpl 2025-09-22 18:18:19 +08:00
ning
f412f82eb8 fix: event value is inf 2025-09-19 19:32:40 +08:00
ning
9da1cd506b fix: datasource sync 2025-09-19 17:41:57 +08:00
ning
99ea838863 refactor: optimize query target 2025-09-19 15:00:07 +08:00
ning
7feb003b72 refactor: change feishucard body 2025-09-19 10:37:58 +08:00
ning
b0a053361f refactor: change log 2025-09-17 20:28:22 +08:00
ning
959f75394b refactor: alert rule api 2025-09-17 12:10:43 +08:00
ning
03e95973b2 refactor: message tpl api 2025-09-16 20:13:25 +08:00
ning
e890705167 refactor: event notify 2025-09-16 19:24:05 +08:00
ning
6716f1bdf1 refactor: update event notify 2025-09-15 20:19:18 +08:00
ning
739b9406a4 Merge branch 'main' of github.com:ccfos/nightingale 2025-09-15 17:37:38 +08:00
ning
77f280d1cc fix: event delete api 2025-09-15 17:37:25 +08:00
pioneerlfn
04fe1b9dd6 for slice, when marshal, return [] instead null (#2878) 2025-09-15 17:28:57 +08:00
ning
552758e0e1 refactor: pushgw writer support async 2025-09-14 15:00:40 +08:00
ning
68bc474c1b refactor: update message tpl 2025-09-11 20:38:17 +08:00
ning
f692035deb refactor: change datasource log 2025-09-11 16:14:59 +08:00
ning
eb441353c3 refactor: message tpl 2025-09-11 15:33:02 +08:00
ning
b606b22ae6 fix: opensearch alert 2025-09-10 22:24:50 +08:00
ning
1de0428860 docs: update init.sql 2025-09-09 18:51:23 +08:00
ning
3d0c288c9f refactor: event api 2025-09-09 16:36:40 +08:00
ning
343814a802 refactor: notify rule update 2025-09-09 11:05:58 +08:00
ning
12e2761467 fix: dingtalk message tpl 2025-09-09 10:18:08 +08:00
Yening Qin
0edd5ee772 refactor: event notify (#2869) 2025-09-08 15:04:33 +08:00
ning
5e430cedc7 fix: build, router_target miss idents 2025-09-03 14:17:15 +08:00
ning
a791a9901e refactor: push ts to kafka 2025-09-03 12:17:32 +08:00
totoro
222cdd76f0 refactor : es support search_after (#2859) 2025-09-03 12:12:07 +08:00
arch3754
ed4e3937e0 feat: add target update (#2853) 2025-09-03 12:05:38 +08:00
Yening Qin
60f9e1c48e refactor: dscache sync add datasource process hook (#2792) 2025-09-02 18:12:36 +08:00
Ulric Qin
276dfe7372 update linux dash 2025-09-02 08:57:36 +08:00
Ulric Qin
4a6dacbe30 add host table ng 2025-09-02 08:54:54 +08:00
Ulric Qin
48eebba11a update linux dashboard 2025-09-02 08:52:58 +08:00
ning
eca82e5ec2 change ops update 2025-09-01 14:24:17 +08:00
Yening Qin
21478fcf3d fix: send http notify retry (#2849) 2025-08-30 01:57:32 +08:00
ulricqin
a87c856299 fix: call flashduty to push event (#2848) 2025-08-30 00:06:53 +08:00
ning
ba035a446d refactor: change some log 2025-08-29 16:32:57 +08:00
ning
bf840e6bb2 docs: update dashboard tpl 2025-08-29 10:14:59 +08:00
ning
cd01092aed refactor: update alert rule import api 2025-08-28 19:44:05 +08:00
ning
e202fd50c8 refactor: datasource api 2025-08-28 16:48:04 +08:00
ning
f0e5062485 refactor: optimize edge ident update 2025-08-28 16:24:33 +08:00
ning
861fe96de5 add UpdateDBTargetTimestampDisable 2025-08-27 19:12:56 +08:00
Ulric Qin
5b66ada96d Merge branch 'main' of https://github.com/ccfos/nightingale 2025-08-27 10:06:19 +08:00
Ulric Qin
d5a98debff upgrade ibex 2025-08-27 10:06:11 +08:00
ning
4977052a67 Merge branch 'main' of github.com:ccfos/nightingale 2025-08-22 15:03:17 +08:00
ning
dcc461e587 refactor: push writer support sync 2025-08-22 15:00:49 +08:00
Ulric Qin
f5ce1733bb update minio dashboard 2025-08-21 18:58:47 +08:00
Ulric Qin
436cf25409 Merge branch 'main' of https://github.com/ccfos/nightingale 2025-08-21 10:34:56 +08:00
Ulric Qin
038f68b0b7 add minio dashboard for new version 2025-08-21 10:34:50 +08:00
ning
96ef1895b7 refactor: event_script_notify_result log add stdin 2025-08-20 14:24:28 +08:00
zjxpsetp
eeaa7b46f1 update es dashboard for categraf version bigger than 0.3.102 2025-08-19 23:43:54 +08:00
zjxpsetp
dc525352f1 Merge remote-tracking branch 'origin/main' 2025-08-19 17:36:12 +08:00
zjxpsetp
98a3fe9375 update jmx dashboard in kubernetes 2025-08-19 17:35:55 +08:00
ning
74b0f802ec Merge branch 'main' of github.com:ccfos/nightingale 2025-08-18 11:19:07 +08:00
ning
85bd3148d5 refactor: add update db metric 2025-08-18 11:18:52 +08:00
ning
0931fa9603 fix: target update ts 2025-08-18 11:18:39 +08:00
zjxpsetp
65cdb2da9e 更新 jmx 的仪表盘,新的jmx Exporter 指标和之前有一些差别 2025-08-17 17:23:39 +08:00
ning
9ad6514af6 refactor: ds query api 2025-08-13 11:50:01 +08:00
ning
302c6549e4 refactor: ds query api 2025-08-13 11:12:57 +08:00
ning
a3122270e6 refactor: ds query api 2025-08-13 11:02:00 +08:00
Yening Qin
1245c453bb refactor: send flashduty (#2824) 2025-08-12 20:55:23 +08:00
Ulric Qin
9c5ccf0c8f fix: update update_at when batch-updating-rules 2025-08-06 20:21:57 +08:00
Ulric Qin
cd468af250 refactor batch updating rules 2025-08-06 17:16:02 +08:00
Ulric Qin
2d3449c0ec code refactor for batch updating 2025-08-06 15:45:36 +08:00
ning
e15bdbce92 refactor: optimize import prom alert rule 2025-08-06 11:09:53 +08:00
ning
3890243d42 fix: new mysql db client 2025-08-04 18:40:04 +08:00
ning
37fb4ee867 add case-insensitive search for builtin payload filtering 2025-08-04 16:31:28 +08:00
ning
6db63eafc1 refactor: change import prom rule 2025-08-01 19:00:06 +08:00
ning
1e9cbfc316 fix: event query log 2025-08-01 16:47:14 +08:00
ning
4f95554fe3 refactor: update msg tpl 2025-07-31 18:08:12 +08:00
ning
8eba9aa92f refactor: update msg tpl 2025-07-31 15:31:29 +08:00
ning
6ba74b8e21 fix: pgsql cross database query 2025-07-31 11:51:27 +08:00
ning
8ea4632681 refactor: update duty user sync 2025-07-28 14:36:08 +08:00
ning
f958f27de1 fix: AlertRuleExists 2025-07-27 13:28:46 +08:00
ning
1bdfa3e032 refactor: update TargetDel 2025-07-27 12:46:22 +08:00
ning
143880cd46 Merge branch 'main' of github.com:ccfos/nightingale 2025-07-25 13:21:55 +08:00
ning
38f0b4f1bb refactor: modify add loki api resp 2025-07-25 13:01:27 +08:00
dependabot[bot]
2bccd5be99 build(deps): bump golang.org/x/oauth2 from 0.23.0 to 0.27.0 (#2793)
Bumps [golang.org/x/oauth2](https://github.com/golang/oauth2) from 0.23.0 to 0.27.0.
- [Commits](https://github.com/golang/oauth2/compare/v0.23.0...v0.27.0)

---
updated-dependencies:
- dependency-name: golang.org/x/oauth2
  dependency-version: 0.27.0
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-07-24 21:21:29 +08:00
ning
7b328b3eaa refactor: update prom rule import 2025-07-24 21:17:12 +08:00
Ulric Qin
8bd5b90e94 fix: support old rule format when importing 2025-07-23 09:36:28 +08:00
ning
96629e284f refactor: add email notify record 2025-07-17 11:45:58 +08:00
ning
67d2875690 fix: update record rule datasource ids api 2025-07-15 16:29:30 +08:00
ning
238895a1f8 refactor: init tpl 2025-07-15 15:52:38 +08:00
ning
fb341b645d refactor: sub alert add host filter 2025-07-15 14:46:09 +08:00
Haobo Zhang
2d84fd8cf3 fix: ai summary customize parameter parse from interface (#2788) 2025-07-14 14:54:15 +08:00
ning
2611f87c41 refactor: drop builtin_components idx_ident 2025-07-11 19:12:24 +08:00
ning
a5b7aa7a26 refactor: drop builtin_components idx_ident 2025-07-11 18:57:03 +08:00
ning
0714a0f8f1 refactor: change log level 2025-07-11 16:41:14 +08:00
ning
063cc750e1 refactor: update notify channel api 2025-07-11 12:25:08 +08:00
ning
b2a912d72f refactor: log level 2025-07-11 12:06:18 +08:00
ning
4ba745f442 fix: alert rule batch update notify rule 2025-07-11 11:38:09 +08:00
smx_Morgan
fa7d46ecad fix: compatible user_token table with postgresql (#2785) 2025-07-10 11:03:33 +08:00
pioneerlfn
a5a43df44f refactor: doris search sql (#2778)
* doris:support search sql with macro

* Update doris.go

---------

Co-authored-by: Yening Qin <710leo@gmail.com>
2025-07-09 21:33:17 +08:00
smx_Morgan
fbf1d68b84 fix: update postgresql init sql (#2784) 2025-07-09 20:53:56 +08:00
ulricqin
ca712f62a4 fix execution of notify script (#2769) 2025-07-06 08:40:13 +08:00
100 changed files with 10307 additions and 1413 deletions

22
.github/workflows/issue-translator.yml vendored Normal file
View File

@@ -0,0 +1,22 @@
name: 'Issue Translator'
on:
issues:
types: [opened]
jobs:
translate:
runs-on: ubuntu-latest
permissions:
issues: write
contents: read
steps:
- name: Translate Issues
uses: usthe/issues-translate-action@v2.7
with:
# 是否翻译 issue 标题
IS_MODIFY_TITLE: true
# GitHub Token
BOT_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# 自定义翻译标注(可选)
# CUSTOM_BOT_NOTE: "Translation by bot"

4
.gitignore vendored
View File

@@ -58,6 +58,10 @@ _test
.idea
.index
.vscode
.issue
.issue/*
.cursor
.claude
.DS_Store
.cache-loader
.payload

View File

@@ -47,7 +47,7 @@ Nightingale itself does not provide monitoring data collection capabilities. We
For certain edge data centers with poor network connectivity to the central Nightingale server, we offer a distributed deployment mode for the alerting engine. In this mode, even if the network is disconnected, the alerting functionality remains unaffected.
![Edge Deployment Mode](doc/img/readme/20240222102119.png)
![Edge Deployment Mode](doc/img/readme/multi-region-arch.png)
> In the above diagram, Data Center A has a good network with the central data center, so it uses the Nightingale process in the central data center as the alerting engine. Data Center B has a poor network with the central data center, so it deploys `n9e-edge` as the alerting engine to handle alerting for its own data sources.
@@ -68,7 +68,7 @@ Then Nightingale is not suitable. It is recommended that you choose on-call prod
## 🔑 Key Features
![Nightingale Alerting rules](doc/img/readme/2025-05-23_18-43-37.png)
![Nightingale Alerting rules](doc/img/readme/alerting-rules-en.png)
- Nightingale supports alerting rules, mute rules, subscription rules, and notification rules. It natively supports 20 types of notification media and allows customization of message templates.
- It supports event pipelines for Pipeline processing of alarms, facilitating automated integration with in-house systems. For example, it can append metadata to alarms or perform relabeling on events.
@@ -76,19 +76,19 @@ Then Nightingale is not suitable. It is recommended that you choose on-call prod
- Many databases and middleware come with built-in alert rules that can be directly imported and used. It also supports direct import of Prometheus alerting rules.
- It supports alerting self-healing, which automatically triggers a script to execute predefined logic after an alarm is generated—such as cleaning up disk space or capturing the current system state.
![Nightingale Alarm Dashboard](doc/img/readme/2025-05-30_08-49-28.png)
![Nightingale Alarm Dashboard](doc/img/readme/active-events-en.png)
- Nightingale archives historical alarms and supports multi-dimensional query and statistics.
- It supports flexible aggregation grouping, allowing a clear view of the distribution of alarms across the company.
![Nightingale Integration Center](doc/img/readme/2025-05-23_18-46-06.png)
![Nightingale Integration Center](doc/img/readme/integration-components-en.png)
- Nightingale has built-in metric descriptions, dashboards, and alerting rules for common operating systems, middleware, and databases, which are contributed by the community with varying quality.
- It directly receives data via multiple protocols such as Remote Write, OpenTSDB, Datadog, and Falcon, integrates with various Agents.
- It supports data sources like Prometheus, ElasticSearch, Loki, ClickHouse, MySQL, Postgres, allowing alerting based on data from these sources.
- Nightingale can be easily embedded into internal enterprise systems (e.g. Grafana, CMDB), and even supports configuring menu visibility for these embedded systems.
![Nightingale dashboards](doc/img/readme/2025-05-23_18-49-02.png)
![Nightingale dashboards](doc/img/readme/dashboard-en.png)
- Nightingale supports dashboard functionality, including common chart types, and comes with pre-built dashboards. The image above is a screenshot of one of these dashboards.
- If you are already accustomed to Grafana, it is recommended to continue using Grafana for visualization, as Grafana has deeper expertise in this area.

View File

@@ -118,7 +118,7 @@ func Start(alertc aconf.Alert, pushgwc pconf.Pushgw, syncStats *memsto.Stats, al
eventProcessorCache := memsto.NewEventProcessorCache(ctx, syncStats)
dp := dispatch.NewDispatch(alertRuleCache, userCache, userGroupCache, alertSubscribeCache, targetCache, notifyConfigCache, taskTplsCache, notifyRuleCache, notifyChannelCache, messageTemplateCache, eventProcessorCache, alertc.Alerting, ctx, alertStats)
consumer := dispatch.NewConsumer(alertc.Alerting, ctx, dp, promClients)
consumer := dispatch.NewConsumer(alertc.Alerting, ctx, dp, promClients, alertMuteCache)
notifyRecordComsumer := sender.NewNotifyRecordConsumer(ctx)

View File

@@ -35,9 +35,9 @@ func MatchGroupsName(groupName string, groupFilter []models.TagFilter) bool {
func matchTag(value string, filter models.TagFilter) bool {
switch filter.Func {
case "==":
return strings.TrimSpace(filter.Value) == strings.TrimSpace(value)
return strings.TrimSpace(fmt.Sprintf("%v", filter.Value)) == strings.TrimSpace(value)
case "!=":
return strings.TrimSpace(filter.Value) != strings.TrimSpace(value)
return strings.TrimSpace(fmt.Sprintf("%v", filter.Value)) != strings.TrimSpace(value)
case "in":
_, has := filter.Vset[value]
return has

View File

@@ -10,6 +10,7 @@ import (
"github.com/ccfos/nightingale/v6/alert/aconf"
"github.com/ccfos/nightingale/v6/alert/common"
"github.com/ccfos/nightingale/v6/alert/queue"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/poster"
@@ -26,10 +27,15 @@ type Consumer struct {
alerting aconf.Alerting
ctx *ctx.Context
dispatch *Dispatch
promClients *prom.PromClientMap
dispatch *Dispatch
promClients *prom.PromClientMap
alertMuteCache *memsto.AlertMuteCacheType
}
type EventMuteHookFunc func(event *models.AlertCurEvent) bool
var EventMuteHook EventMuteHookFunc = func(event *models.AlertCurEvent) bool { return false }
func InitRegisterQueryFunc(promClients *prom.PromClientMap) {
tplx.RegisterQueryFunc(func(datasourceID int64, promql string) model.Value {
if promClients.IsNil(datasourceID) {
@@ -43,12 +49,14 @@ func InitRegisterQueryFunc(promClients *prom.PromClientMap) {
}
// 创建一个 Consumer 实例
func NewConsumer(alerting aconf.Alerting, ctx *ctx.Context, dispatch *Dispatch, promClients *prom.PromClientMap) *Consumer {
func NewConsumer(alerting aconf.Alerting, ctx *ctx.Context, dispatch *Dispatch, promClients *prom.PromClientMap, alertMuteCache *memsto.AlertMuteCacheType) *Consumer {
return &Consumer{
alerting: alerting,
ctx: ctx,
dispatch: dispatch,
promClients: promClients,
alertMuteCache: alertMuteCache,
}
}
@@ -110,10 +118,6 @@ func (e *Consumer) consumeOne(event *models.AlertCurEvent) {
e.persist(event)
if event.IsRecovered && event.NotifyRecovered == 0 {
return
}
e.dispatch.HandleEventNotify(event, false)
}

View File

@@ -24,6 +24,17 @@ import (
"github.com/toolkits/pkg/logger"
)
var ShouldSkipNotify func(*ctx.Context, *models.AlertCurEvent, int64) bool
var SendByNotifyRule func(*ctx.Context, *memsto.UserCacheType, *memsto.UserGroupCacheType, *memsto.NotifyChannelCacheType,
[]*models.AlertCurEvent, int64, *models.NotifyConfig, *models.NotifyChannelConfig, *models.MessageTemplate)
var EventProcessorCache *memsto.EventProcessorCacheType
func init() {
ShouldSkipNotify = shouldSkipNotify
SendByNotifyRule = SendNotifyRuleMessage
}
type Dispatch struct {
alertRuleCache *memsto.AlertRuleCacheType
userCache *memsto.UserCacheType
@@ -45,9 +56,8 @@ type Dispatch struct {
tpls map[string]*template.Template
ExtraSenders map[string]sender.Sender
BeforeSenderHook func(*models.AlertCurEvent) bool
ctx *ctx.Context
Astats *astats.Stats
ctx *ctx.Context
Astats *astats.Stats
RwLock sync.RWMutex
}
@@ -56,7 +66,7 @@ type Dispatch struct {
func NewDispatch(alertRuleCache *memsto.AlertRuleCacheType, userCache *memsto.UserCacheType, userGroupCache *memsto.UserGroupCacheType,
alertSubscribeCache *memsto.AlertSubscribeCacheType, targetCache *memsto.TargetCacheType, notifyConfigCache *memsto.NotifyConfigCacheType,
taskTplsCache *memsto.TaskTplCache, notifyRuleCache *memsto.NotifyRuleCacheType, notifyChannelCache *memsto.NotifyChannelCacheType,
messageTemplateCache *memsto.MessageTemplateCacheType, eventProcessorCache *memsto.EventProcessorCacheType, alerting aconf.Alerting, ctx *ctx.Context, astats *astats.Stats) *Dispatch {
messageTemplateCache *memsto.MessageTemplateCacheType, eventProcessorCache *memsto.EventProcessorCacheType, alerting aconf.Alerting, c *ctx.Context, astats *astats.Stats) *Dispatch {
notify := &Dispatch{
alertRuleCache: alertRuleCache,
userCache: userCache,
@@ -77,11 +87,12 @@ func NewDispatch(alertRuleCache *memsto.AlertRuleCacheType, userCache *memsto.Us
ExtraSenders: make(map[string]sender.Sender),
BeforeSenderHook: func(*models.AlertCurEvent) bool { return true },
ctx: ctx,
ctx: c,
Astats: astats,
}
pipeline.Init()
EventProcessorCache = eventProcessorCache
// 设置通知记录回调函数
notifyChannelCache.SetNotifyRecordFunc(sender.NotifyRecord)
@@ -166,41 +177,12 @@ func (e *Dispatch) HandleEventWithNotifyRule(eventOrigin *models.AlertCurEvent)
if !notifyRule.Enable {
continue
}
eventCopy.NotifyRuleId = notifyRuleId
eventCopy.NotifyRuleName = notifyRule.Name
var processors []models.Processor
for _, pipelineConfig := range notifyRule.PipelineConfigs {
if !pipelineConfig.Enable {
continue
}
eventPipeline := e.eventProcessorCache.Get(pipelineConfig.PipelineId)
if eventPipeline == nil {
logger.Warningf("notify_id: %d, event:%+v, processor not found", notifyRuleId, eventCopy)
continue
}
if !pipelineApplicable(eventPipeline, eventCopy) {
logger.Debugf("notify_id: %d, event:%+v, pipeline_id: %d, not applicable", notifyRuleId, eventCopy, pipelineConfig.PipelineId)
continue
}
processors = append(processors, e.eventProcessorCache.GetProcessorsById(pipelineConfig.PipelineId)...)
}
for _, processor := range processors {
var res string
var err error
logger.Infof("before processor notify_id: %d, event:%+v, processor:%+v", notifyRuleId, eventCopy, processor)
eventCopy, res, err = processor.Process(e.ctx, eventCopy)
if eventCopy == nil {
logger.Warningf("after processor notify_id: %d, event:%+v, processor:%+v, event is nil", notifyRuleId, eventCopy, processor)
break
}
logger.Infof("after processor notify_id: %d, event:%+v, processor:%+v, res:%v, err:%v", notifyRuleId, eventCopy, processor, res, err)
}
if eventCopy == nil {
// 如果 eventCopy 为 nil说明 eventCopy 被 processor drop 掉了, 不再发送通知
eventCopy = HandleEventPipeline(notifyRule.PipelineConfigs, eventOrigin, eventCopy, e.eventProcessorCache, e.ctx, notifyRuleId, "notify_rule")
if ShouldSkipNotify(e.ctx, eventCopy, notifyRuleId) {
logger.Infof("notify_id: %d, event:%+v, should skip notify", notifyRuleId, eventCopy)
continue
}
@@ -227,15 +209,67 @@ func (e *Dispatch) HandleEventWithNotifyRule(eventOrigin *models.AlertCurEvent)
continue
}
// todo go send
// todo 聚合 event
go e.sendV2([]*models.AlertCurEvent{eventCopy}, notifyRuleId, &notifyRule.NotifyConfigs[i], notifyChannel, messageTemplate)
go SendByNotifyRule(e.ctx, e.userCache, e.userGroupCache, e.notifyChannelCache, []*models.AlertCurEvent{eventCopy}, notifyRuleId, &notifyRule.NotifyConfigs[i], notifyChannel, messageTemplate)
}
}
}
}
func pipelineApplicable(pipeline *models.EventPipeline, event *models.AlertCurEvent) bool {
func shouldSkipNotify(ctx *ctx.Context, event *models.AlertCurEvent, notifyRuleId int64) bool {
if event == nil {
// 如果 eventCopy 为 nil说明 eventCopy 被 processor drop 掉了, 不再发送通知
return true
}
if event.IsRecovered && event.NotifyRecovered == 0 {
// 如果 eventCopy 是恢复事件,且 NotifyRecovered 为 0则不发送通知
return true
}
return false
}
func HandleEventPipeline(pipelineConfigs []models.PipelineConfig, eventOrigin, event *models.AlertCurEvent, eventProcessorCache *memsto.EventProcessorCacheType, ctx *ctx.Context, id int64, from string) *models.AlertCurEvent {
for _, pipelineConfig := range pipelineConfigs {
if !pipelineConfig.Enable {
continue
}
eventPipeline := eventProcessorCache.Get(pipelineConfig.PipelineId)
if eventPipeline == nil {
logger.Warningf("processor_by_%s_id:%d pipeline_id:%d, event pipeline not found, event: %+v", from, id, pipelineConfig.PipelineId, event)
continue
}
if !PipelineApplicable(eventPipeline, event) {
logger.Debugf("processor_by_%s_id:%d pipeline_id:%d, event pipeline not applicable, event: %+v", from, id, pipelineConfig.PipelineId, event)
continue
}
processors := eventProcessorCache.GetProcessorsById(pipelineConfig.PipelineId)
for _, processor := range processors {
var res string
var err error
logger.Infof("processor_by_%s_id:%d pipeline_id:%d, before processor:%+v, event: %+v", from, id, pipelineConfig.PipelineId, processor, event)
event, res, err = processor.Process(ctx, event)
if event == nil {
logger.Infof("processor_by_%s_id:%d pipeline_id:%d, event dropped, after processor:%+v, event: %+v", from, id, pipelineConfig.PipelineId, processor, eventOrigin)
if from == "notify_rule" {
// alert_rule 获取不到 eventId 记录没有意义
sender.NotifyRecord(ctx, []*models.AlertCurEvent{eventOrigin}, id, "", "", res, fmt.Errorf("processor_by_%s_id:%d pipeline_id:%d, drop by processor", from, id, pipelineConfig.PipelineId))
}
return nil
}
logger.Infof("processor_by_%s_id:%d pipeline_id:%d, after processor:%+v, event: %+v, res:%v, err:%v", from, id, pipelineConfig.PipelineId, processor, event, res, err)
}
}
event.FE2DB()
event.FillTagsMap()
return event
}
func PipelineApplicable(pipeline *models.EventPipeline, event *models.AlertCurEvent) bool {
if pipeline == nil {
return true
}
@@ -445,7 +479,8 @@ func GetNotifyConfigParams(notifyConfig *models.NotifyConfig, contactKey string,
return sendtos, flashDutyChannelIDs, customParams
}
func (e *Dispatch) sendV2(events []*models.AlertCurEvent, notifyRuleId int64, notifyConfig *models.NotifyConfig, notifyChannel *models.NotifyChannelConfig, messageTemplate *models.MessageTemplate) {
func SendNotifyRuleMessage(ctx *ctx.Context, userCache *memsto.UserCacheType, userGroupCache *memsto.UserGroupCacheType, notifyChannelCache *memsto.NotifyChannelCacheType,
events []*models.AlertCurEvent, notifyRuleId int64, notifyConfig *models.NotifyConfig, notifyChannel *models.NotifyChannelConfig, messageTemplate *models.MessageTemplate) {
if len(events) == 0 {
logger.Errorf("notify_id: %d events is empty", notifyRuleId)
return
@@ -461,10 +496,7 @@ func (e *Dispatch) sendV2(events []*models.AlertCurEvent, notifyRuleId int64, no
contactKey = notifyChannel.ParamConfig.UserInfo.ContactKey
}
sendtos, flashDutyChannelIDs, customParams := GetNotifyConfigParams(notifyConfig, contactKey, e.userCache, e.userGroupCache)
e.Astats.GaugeNotifyRecordQueueSize.Inc()
defer e.Astats.GaugeNotifyRecordQueueSize.Dec()
sendtos, flashDutyChannelIDs, customParams := GetNotifyConfigParams(notifyConfig, contactKey, userCache, userGroupCache)
switch notifyChannel.RequestType {
case "flashduty":
@@ -474,10 +506,10 @@ func (e *Dispatch) sendV2(events []*models.AlertCurEvent, notifyRuleId int64, no
for i := range flashDutyChannelIDs {
start := time.Now()
respBody, err := notifyChannel.SendFlashDuty(events, flashDutyChannelIDs[i], e.notifyChannelCache.GetHttpClient(notifyChannel.ID))
respBody, err := notifyChannel.SendFlashDuty(events, flashDutyChannelIDs[i], notifyChannelCache.GetHttpClient(notifyChannel.ID))
respBody = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), respBody)
logger.Infof("notify_id: %d, channel_name: %v, event:%+v, IntegrationUrl: %v dutychannel_id: %v, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0], notifyChannel.RequestConfig.FlashDutyRequestConfig.IntegrationUrl, flashDutyChannelIDs[i], respBody, err)
sender.NotifyRecord(e.ctx, events, notifyRuleId, notifyChannel.Name, strconv.FormatInt(flashDutyChannelIDs[i], 10), respBody, err)
logger.Infof("duty_sender notify_id: %d, channel_name: %v, event:%+v, IntegrationUrl: %v dutychannel_id: %v, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0], notifyChannel.RequestConfig.FlashDutyRequestConfig.IntegrationUrl, flashDutyChannelIDs[i], respBody, err)
sender.NotifyRecord(ctx, events, notifyRuleId, notifyChannel.Name, strconv.FormatInt(flashDutyChannelIDs[i], 10), respBody, err)
}
case "http":
@@ -493,22 +525,22 @@ func (e *Dispatch) sendV2(events []*models.AlertCurEvent, notifyRuleId int64, no
}
// 将任务加入队列
success := e.notifyChannelCache.EnqueueNotifyTask(task)
success := notifyChannelCache.EnqueueNotifyTask(task)
if !success {
logger.Errorf("failed to enqueue notify task for channel %d, notify_id: %d", notifyChannel.ID, notifyRuleId)
// 如果入队失败,记录错误通知
sender.NotifyRecord(e.ctx, events, notifyRuleId, notifyChannel.Name, getSendTarget(customParams, sendtos), "", errors.New("failed to enqueue notify task, queue is full"))
sender.NotifyRecord(ctx, events, notifyRuleId, notifyChannel.Name, getSendTarget(customParams, sendtos), "", errors.New("failed to enqueue notify task, queue is full"))
}
case "smtp":
notifyChannel.SendEmail(notifyRuleId, events, tplContent, sendtos, e.notifyChannelCache.GetSmtpClient(notifyChannel.ID))
notifyChannel.SendEmail(notifyRuleId, events, tplContent, sendtos, notifyChannelCache.GetSmtpClient(notifyChannel.ID))
case "script":
start := time.Now()
target, res, err := notifyChannel.SendScript(events, tplContent, customParams, sendtos)
res = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), res)
logger.Infof("notify_id: %d, channel_name: %v, event:%+v, tplContent:%s, customParams:%v, target:%s, res:%s, err:%v", notifyRuleId, notifyChannel.Name, events[0], tplContent, customParams, target, res, err)
sender.NotifyRecord(e.ctx, events, notifyRuleId, notifyChannel.Name, target, res, err)
logger.Infof("script_sender notify_id: %d, channel_name: %v, event:%+v, tplContent:%s, customParams:%v, target:%s, res:%s, err:%v", notifyRuleId, notifyChannel.Name, events[0], tplContent, customParams, target, res, err)
sender.NotifyRecord(ctx, events, notifyRuleId, notifyChannel.Name, target, res, err)
default:
logger.Warningf("notify_id: %d, channel_name: %v, event:%+v send type not found", notifyRuleId, notifyChannel.Name, events[0])
}
@@ -523,6 +555,11 @@ func NeedBatchContacts(requestConfig *models.HTTPRequestConfig) bool {
// event: 告警/恢复事件
// isSubscribe: 告警事件是否由subscribe的配置产生
func (e *Dispatch) HandleEventNotify(event *models.AlertCurEvent, isSubscribe bool) {
go e.HandleEventWithNotifyRule(event)
if event.IsRecovered && event.NotifyRecovered == 0 {
return
}
rule := e.alertRuleCache.Get(event.RuleId)
if rule == nil {
return
@@ -555,7 +592,6 @@ func (e *Dispatch) HandleEventNotify(event *models.AlertCurEvent, isSubscribe bo
notifyTarget.AndMerge(handler(rule, event, notifyTarget, e))
}
go e.HandleEventWithNotifyRule(event)
go e.Send(rule, event, notifyTarget, isSubscribe)
// 如果是不是订阅规则出现的event, 则需要处理订阅规则的event
@@ -595,6 +631,10 @@ func (e *Dispatch) handleSub(sub *models.AlertSubscribe, event models.AlertCurEv
return
}
if !sub.MatchCate(event.Cate) {
return
}
if !common.MatchTags(event.TagsMap, sub.ITags) {
return
}

View File

@@ -286,7 +286,7 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
continue
}
if query.VarEnabled {
if query.VarEnabled && strings.Contains(query.PromQl, "$") {
var anomalyPoints []models.AnomalyPoint
if hasLabelLossAggregator(query) || notExactMatch(query) {
// 若有聚合函数或非精确匹配则需要先填充变量然后查询,这个方式效率较低

View File

@@ -1,6 +1,7 @@
package mute
import (
"slices"
"strconv"
"strings"
"time"
@@ -153,13 +154,7 @@ func MatchMute(event *models.AlertCurEvent, mute *models.AlertMute, clock ...int
// 如果不是全局的,判断 匹配的 datasource id
if len(mute.DatasourceIdsJson) != 0 && mute.DatasourceIdsJson[0] != 0 && event.DatasourceId != 0 {
idm := make(map[int64]struct{}, len(mute.DatasourceIdsJson))
for i := 0; i < len(mute.DatasourceIdsJson); i++ {
idm[mute.DatasourceIdsJson[i]] = struct{}{}
}
// 判断 event.datasourceId 是否包含在 idm 中
if _, has := idm[event.DatasourceId]; !has {
if !slices.Contains(mute.DatasourceIdsJson, event.DatasourceId) {
return false, errors.New("datasource id not match")
}
}
@@ -198,7 +193,7 @@ func MatchMute(event *models.AlertCurEvent, mute *models.AlertMute, clock ...int
return false, errors.New("event severity not match mute severity")
}
if mute.ITags == nil || len(mute.ITags) == 0 {
if len(mute.ITags) == 0 {
return true, nil
}
if !common.MatchTags(event.TagsMap, mute.ITags) {

View File

@@ -8,6 +8,7 @@ import (
"io"
"net/http"
"net/url"
"strconv"
"strings"
"text/template"
"time"
@@ -143,7 +144,11 @@ func (c *AISummaryConfig) generateAISummary(eventInfo string) (string, error) {
// 合并自定义参数
for k, v := range c.CustomParams {
reqParams[k] = v
converted, err := convertCustomParam(v)
if err != nil {
return "", fmt.Errorf("failed to convert custom param %s: %v", k, err)
}
reqParams[k] = converted
}
// 序列化请求体
@@ -196,3 +201,44 @@ func (c *AISummaryConfig) generateAISummary(eventInfo string) (string, error) {
return chatResp.Choices[0].Message.Content, nil
}
// convertCustomParam 将前端传入的参数转换为正确的类型
func convertCustomParam(value interface{}) (interface{}, error) {
if value == nil {
return nil, nil
}
// 如果是字符串,尝试转换为其他类型
if str, ok := value.(string); ok {
// 尝试转换为数字
if f, err := strconv.ParseFloat(str, 64); err == nil {
// 检查是否为整数
if f == float64(int64(f)) {
return int64(f), nil
}
return f, nil
}
// 尝试转换为布尔值
if b, err := strconv.ParseBool(str); err == nil {
return b, nil
}
// 尝试解析为JSON数组
if strings.HasPrefix(strings.TrimSpace(str), "[") {
var arr []interface{}
if err := json.Unmarshal([]byte(str), &arr); err == nil {
return arr, nil
}
}
// 尝试解析为JSON对象
if strings.HasPrefix(strings.TrimSpace(str), "{") {
var obj map[string]interface{}
if err := json.Unmarshal([]byte(str), &obj); err == nil {
return obj, nil
}
}
}
return value, nil
}

View File

@@ -67,3 +67,73 @@ func TestAISummaryConfig_Process(t *testing.T) {
t.Logf("原始注释: %v", result.AnnotationsJSON["description"])
t.Logf("AI总结: %s", result.AnnotationsJSON["ai_summary"])
}
func TestConvertCustomParam(t *testing.T) {
tests := []struct {
name string
input interface{}
expected interface{}
hasError bool
}{
{
name: "nil value",
input: nil,
expected: nil,
hasError: false,
},
{
name: "string number to int64",
input: "123",
expected: int64(123),
hasError: false,
},
{
name: "string float to float64",
input: "123.45",
expected: 123.45,
hasError: false,
},
{
name: "string boolean to bool",
input: "true",
expected: true,
hasError: false,
},
{
name: "string false to bool",
input: "false",
expected: false,
hasError: false,
},
{
name: "JSON array string to slice",
input: `["a", "b", "c"]`,
expected: []interface{}{"a", "b", "c"},
hasError: false,
},
{
name: "JSON object string to map",
input: `{"key": "value", "num": 123}`,
expected: map[string]interface{}{"key": "value", "num": float64(123)},
hasError: false,
},
{
name: "plain string remains string",
input: "hello world",
expected: "hello world",
hasError: false,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
converted, err := convertCustomParam(test.input)
if test.hasError {
assert.Error(t, err)
return
}
assert.NoError(t, err)
assert.Equal(t, test.expected, converted)
})
}
}

View File

@@ -26,8 +26,6 @@ import (
"github.com/toolkits/pkg/str"
)
type EventMuteHookFunc func(event *models.AlertCurEvent) bool
type ExternalProcessorsType struct {
ExternalLock sync.RWMutex
Processors map[string]*Processor
@@ -76,7 +74,6 @@ type Processor struct {
HandleFireEventHook HandleEventFunc
HandleRecoverEventHook HandleEventFunc
EventMuteHook EventMuteHookFunc
ScheduleEntry cron.Entry
PromEvalInterval int
@@ -121,7 +118,6 @@ func NewProcessor(engineName string, rule *models.AlertRule, datasourceId int64,
HandleFireEventHook: func(event *models.AlertCurEvent) {},
HandleRecoverEventHook: func(event *models.AlertCurEvent) {},
EventMuteHook: func(event *models.AlertCurEvent) bool { return false },
}
p.mayHandleGroup()
@@ -155,9 +151,19 @@ func (p *Processor) Handle(anomalyPoints []models.AnomalyPoint, from string, inh
// 如果 event 被 mute 了,本质也是 fire 的状态,这里无论如何都添加到 alertingKeys 中,防止 fire 的事件自动恢复了
hash := event.Hash
alertingKeys[hash] = struct{}{}
// event processor
eventCopy := event.DeepCopy()
event = dispatch.HandleEventPipeline(cachedRule.PipelineConfigs, eventCopy, event, dispatch.EventProcessorCache, p.ctx, cachedRule.Id, "alert_rule")
if event == nil {
logger.Infof("rule_eval:%s is muted drop by pipeline event:%v", p.Key(), eventCopy)
continue
}
// event mute
isMuted, detail, muteId := mute.IsMuted(cachedRule, event, p.TargetCache, p.alertMuteCache)
if isMuted {
logger.Debugf("rule_eval:%s event:%v is muted, detail:%s", p.Key(), event, detail)
logger.Infof("rule_eval:%s is muted, detail:%s event:%v", p.Key(), detail, event)
p.Stats.CounterMuteTotal.WithLabelValues(
fmt.Sprintf("%v", event.GroupName),
fmt.Sprintf("%v", p.rule.Id),
@@ -167,8 +173,8 @@ func (p *Processor) Handle(anomalyPoints []models.AnomalyPoint, from string, inh
continue
}
if p.EventMuteHook(event) {
logger.Debugf("rule_eval:%s event:%v is muted by hook", p.Key(), event)
if dispatch.EventMuteHook(event) {
logger.Infof("rule_eval:%s is muted by hook event:%v", p.Key(), event)
p.Stats.CounterMuteTotal.WithLabelValues(
fmt.Sprintf("%v", event.GroupName),
fmt.Sprintf("%v", p.rule.Id),

View File

@@ -25,6 +25,7 @@ func (rt *Router) pushEventToQueue(c *gin.Context) {
if event.RuleId == 0 {
ginx.Bomb(200, "event is illegal")
}
event.FE2DB()
event.TagsMap = make(map[string]string)
for i := 0; i < len(event.TagsJSON); i++ {
@@ -40,7 +41,7 @@ func (rt *Router) pushEventToQueue(c *gin.Context) {
event.TagsMap[arr[0]] = arr[1]
}
hit, _ := mute.EventMuteStrategy(event, rt.AlertMuteCache)
hit, _ := mute.EventMuteStrategy(event, rt.AlertMuteCache)
if hit {
logger.Infof("event_muted: rule_id=%d %s", event.RuleId, event.Hash)
ginx.NewRender(c).Message(nil)

View File

@@ -141,7 +141,7 @@ func updateSmtp(ctx *ctx.Context, ncc *memsto.NotifyConfigCacheType) {
func startEmailSender(ctx *ctx.Context, smtp aconf.SMTPConfig) {
conf := smtp
if conf.Host == "" || conf.Port == 0 {
logger.Warning("SMTP configurations invalid")
logger.Debug("SMTP configurations invalid")
<-mailQuit
return
}

View File

@@ -187,7 +187,7 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
CreatedBy: SYSTEM,
UpdatedBy: SYSTEM,
}
BuiltinPayloadInFile.addBuiltinPayload(&builtinAlert)
BuiltinPayloadInFile.AddBuiltinPayload(&builtinAlert)
}
}
@@ -245,7 +245,7 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
CreatedBy: SYSTEM,
UpdatedBy: SYSTEM,
}
BuiltinPayloadInFile.addBuiltinPayload(&builtinDashboard)
BuiltinPayloadInFile.AddBuiltinPayload(&builtinDashboard)
}
} else if err != nil {
logger.Warningf("read builtin component dash dir fail %s %v", component.Ident, err)
@@ -314,7 +314,7 @@ func NewBuiltinPayloadInFileType() *BuiltinPayloadInFileType {
}
}
func (b *BuiltinPayloadInFileType) addBuiltinPayload(bp *models.BuiltinPayload) {
func (b *BuiltinPayloadInFileType) AddBuiltinPayload(bp *models.BuiltinPayload) {
if _, exists := b.Data[bp.ComponentID]; !exists {
b.Data[bp.ComponentID] = make(map[string]map[string][]*models.BuiltinPayload)
}
@@ -331,6 +331,30 @@ func (b *BuiltinPayloadInFileType) addBuiltinPayload(bp *models.BuiltinPayload)
b.IndexData[bp.UUID] = bp
}
func (b *BuiltinPayloadInFileType) GetComponentIdentByCate(typ, cate string) string {
for _, source := range b.Data {
if source == nil {
continue
}
typeMap, exists := source[typ]
if !exists {
continue
}
payloads, exists := typeMap[cate]
if !exists {
continue
}
if len(payloads) > 0 {
return payloads[0].Component
}
}
return ""
}
func (b *BuiltinPayloadInFileType) GetBuiltinPayload(typ, cate, query string, componentId uint64) ([]*models.BuiltinPayload, error) {
var result []*models.BuiltinPayload
@@ -390,9 +414,10 @@ func filterByQuery(payloads []*models.BuiltinPayload, query string) []*models.Bu
return payloads
}
queryLower := strings.ToLower(query)
var filtered []*models.BuiltinPayload
for _, p := range payloads {
if strings.Contains(p.Name, query) || strings.Contains(p.Tags, query) {
if strings.Contains(strings.ToLower(p.Name), queryLower) || strings.Contains(strings.ToLower(p.Tags), queryLower) {
filtered = append(filtered, p)
}
}

View File

@@ -316,6 +316,7 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/busi-groups/tags", rt.auth(), rt.user(), rt.busiGroupsGetTags)
pages.GET("/targets", rt.auth(), rt.user(), rt.targetGets)
pages.POST("/target-update", rt.auth(), rt.targetUpdate)
pages.GET("/target/extra-meta", rt.auth(), rt.user(), rt.targetExtendInfoByIdent)
pages.POST("/target/list", rt.auth(), rt.user(), rt.targetGetsByHostFilter)
pages.DELETE("/targets", rt.auth(), rt.user(), rt.perm("/targets/del"), rt.targetDel)
@@ -543,6 +544,9 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/notify-rule/custom-params", rt.auth(), rt.user(), rt.perm("/notification-rules"), rt.notifyRuleCustomParamsGet)
pages.POST("/notify-rule/event-pipelines-tryrun", rt.auth(), rt.user(), rt.perm("/notification-rules/add"), rt.tryRunEventProcessorByNotifyRule)
pages.GET("/event-tagkeys", rt.auth(), rt.user(), rt.eventTagKeys)
pages.GET("/event-tagvalues", rt.auth(), rt.user(), rt.eventTagValues)
// 事件Pipeline相关路由
pages.GET("/event-pipelines", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.eventPipelinesList)
pages.POST("/event-pipeline", rt.auth(), rt.user(), rt.perm("/event-pipelines/add"), rt.addEventPipeline)
@@ -624,6 +628,7 @@ func (rt *Router) Config(r *gin.Engine) {
service.GET("/recording-rules", rt.recordingRuleGetsByService)
service.GET("/alert-mutes", rt.alertMuteGets)
service.GET("/active-alert-mutes", rt.activeAlertMuteGets)
service.POST("/alert-mutes", rt.alertMuteAddByService)
service.DELETE("/alert-mutes", rt.alertMuteDel)
@@ -672,6 +677,14 @@ func (rt *Router) Config(r *gin.Engine) {
service.GET("/message-templates", rt.messageTemplateGets)
service.GET("/event-pipelines", rt.eventPipelinesListByService)
// 手机号加密存储配置接口
service.POST("/users/phone/encrypt", rt.usersPhoneEncrypt)
service.POST("/users/phone/decrypt", rt.usersPhoneDecrypt)
service.POST("/users/phone/refresh-encryption-config", rt.usersPhoneDecryptRefresh)
service.GET("/builtin-components", rt.builtinComponentsGets)
service.GET("/builtin-payloads", rt.builtinPayloadsGets)
}
}

View File

@@ -13,6 +13,7 @@ import (
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
func getUserGroupIds(ctx *gin.Context, rt *Router, myGroups bool) ([]int64, error) {
@@ -305,3 +306,123 @@ func (rt *Router) alertCurEventDelByHash(c *gin.Context) {
hash := ginx.QueryStr(c, "hash")
ginx.NewRender(c).Message(models.AlertCurEventDelByHash(rt.Ctx, hash))
}
func (rt *Router) eventTagKeys(c *gin.Context) {
// 获取最近1天的活跃告警事件
now := time.Now().Unix()
stime := now - 24*3600
etime := now
// 获取用户可见的业务组ID列表
bgids, err := GetBusinessGroupIds(c, rt.Ctx, rt.Center.EventHistoryGroupView, false)
if err != nil {
logger.Warningf("failed to get business group ids: %v", err)
ginx.NewRender(c).Data([]string{"ident", "app", "service", "instance"}, nil)
return
}
// 查询活跃告警事件,限制数量以提高性能
events, err := models.AlertCurEventsGet(rt.Ctx, []string{}, bgids, stime, etime, []int64{}, []int64{}, []string{}, 0, "", 200, 0, []int64{})
if err != nil {
logger.Warningf("failed to get current alert events: %v", err)
ginx.NewRender(c).Data([]string{"ident", "app", "service", "instance"}, nil)
return
}
// 如果没有查到事件,返回默认标签
if len(events) == 0 {
ginx.NewRender(c).Data([]string{"ident", "app", "service", "instance"}, nil)
return
}
// 收集所有标签键并去重
tagKeys := make(map[string]struct{})
for _, event := range events {
for key := range event.TagsMap {
tagKeys[key] = struct{}{}
}
}
// 转换为字符串切片
var result []string
for key := range tagKeys {
result = append(result, key)
}
// 如果没有收集到任何标签键,返回默认值
if len(result) == 0 {
result = []string{"ident", "app", "service", "instance"}
}
ginx.NewRender(c).Data(result, nil)
}
func (rt *Router) eventTagValues(c *gin.Context) {
// 获取标签key
tagKey := ginx.QueryStr(c, "key")
// 获取最近1天的活跃告警事件
now := time.Now().Unix()
stime := now - 24*3600
etime := now
// 获取用户可见的业务组ID列表
bgids, err := GetBusinessGroupIds(c, rt.Ctx, rt.Center.EventHistoryGroupView, false)
if err != nil {
logger.Warningf("failed to get business group ids: %v", err)
ginx.NewRender(c).Data([]string{}, nil)
return
}
// 查询活跃告警事件,获取更多数据以保证统计准确性
events, err := models.AlertCurEventsGet(rt.Ctx, []string{}, bgids, stime, etime, []int64{}, []int64{}, []string{}, 0, "", 1000, 0, []int64{})
if err != nil {
logger.Warningf("failed to get current alert events: %v", err)
ginx.NewRender(c).Data([]string{}, nil)
return
}
// 如果没有查到事件,返回空数组
if len(events) == 0 {
ginx.NewRender(c).Data([]string{}, nil)
return
}
// 统计标签值出现次数
valueCount := make(map[string]int)
for _, event := range events {
// TagsMap已经在AlertCurEventsGet中处理直接使用
if value, exists := event.TagsMap[tagKey]; exists && value != "" {
valueCount[value]++
}
}
// 转换为切片并按出现次数降序排序
type tagValue struct {
value string
count int
}
tagValues := make([]tagValue, 0, len(valueCount))
for value, count := range valueCount {
tagValues = append(tagValues, tagValue{value, count})
}
// 按出现次数降序排序
sort.Slice(tagValues, func(i, j int) bool {
return tagValues[i].count > tagValues[j].count
})
// 只取Top20并转换为字符串数组
limit := 20
if len(tagValues) < limit {
limit = len(tagValues)
}
result := make([]string, 0, limit)
for i := 0; i < limit; i++ {
result = append(result, tagValues[i].value)
}
ginx.NewRender(c).Data(result, nil)
}

View File

@@ -62,11 +62,11 @@ func (rt *Router) alertHisEventsList(c *gin.Context) {
ginx.Dangerous(err)
total, err := models.AlertHisEventTotal(rt.Ctx, prods, bgids, stime, etime, severity,
recovered, dsIds, cates, ruleId, query)
recovered, dsIds, cates, ruleId, query, []int64{})
ginx.Dangerous(err)
list, err := models.AlertHisEventGets(rt.Ctx, prods, bgids, stime, etime, severity, recovered,
dsIds, cates, ruleId, query, limit, ginx.Offset(c, limit))
dsIds, cates, ruleId, query, limit, ginx.Offset(c, limit), []int64{})
ginx.Dangerous(err)
cache := make(map[int64]*models.UserGroup)
@@ -115,7 +115,18 @@ func (rt *Router) alertHisEventsDelete(c *gin.Context) {
time.Sleep(100 * time.Millisecond) // 防止锁表
}
}()
ginx.NewRender(c).Message("Alert history events deletion started")
ginx.NewRender(c).Data("Alert history events deletion started", nil)
}
var TransferEventToCur func(*ctx.Context, *models.AlertHisEvent) *models.AlertCurEvent
func init() {
TransferEventToCur = transferEventToCur
}
func transferEventToCur(ctx *ctx.Context, event *models.AlertHisEvent) *models.AlertCurEvent {
cur := event.ToCur()
return cur
}
func (rt *Router) alertHisEventGet(c *gin.Context) {
@@ -142,7 +153,7 @@ func (rt *Router) alertHisEventGet(c *gin.Context) {
ginx.Dangerous(err)
event.NotifyRules, err = GetEventNorifyRuleNames(rt.Ctx, event.NotifyRuleIds)
ginx.NewRender(c).Data(event, err)
ginx.NewRender(c).Data(TransferEventToCur(rt.Ctx, event), err)
}
func GetBusinessGroupIds(c *gin.Context, ctx *ctx.Context, onlySelfGroupView bool, myGroups bool) ([]int64, error) {

View File

@@ -35,13 +35,12 @@ func (rt *Router) alertRuleGets(c *gin.Context) {
cache := make(map[int64]*models.UserGroup)
for i := 0; i < len(ars); i++ {
ars[i].FillNotifyGroups(rt.Ctx, cache)
ars[i].FillSeverities()
}
}
ginx.NewRender(c).Data(ars, err)
}
func getAlertCueEventTimeRange(c *gin.Context) (stime, etime int64) {
func GetAlertCueEventTimeRange(c *gin.Context) (stime, etime int64) {
stime = ginx.QueryInt64(c, "stime", 0)
etime = ginx.QueryInt64(c, "etime", 0)
if etime == 0 {
@@ -80,7 +79,6 @@ func (rt *Router) alertRuleGetsByGids(c *gin.Context) {
names := make([]string, 0, len(ars))
for i := 0; i < len(ars); i++ {
ars[i].FillNotifyGroups(rt.Ctx, cache)
ars[i].FillSeverities()
if len(ars[i].DatasourceQueries) != 0 {
ars[i].DatasourceIdsJson = rt.DatasourceCache.GetIDsByDsCateAndQueries(ars[i].Cate, ars[i].DatasourceQueries)
@@ -90,7 +88,7 @@ func (rt *Router) alertRuleGetsByGids(c *gin.Context) {
names = append(names, ars[i].UpdateBy)
}
stime, etime := getAlertCueEventTimeRange(c)
stime, etime := GetAlertCueEventTimeRange(c)
cnt := models.AlertCurEventCountByRuleId(rt.Ctx, rids, stime, etime)
if cnt != nil {
for i := 0; i < len(ars); i++ {
@@ -290,6 +288,15 @@ func (rt *Router) alertRuleAddByImport(c *gin.Context) {
models.DataSourceQueryAll,
}
}
// 将导入的规则统一转为新版本的通知规则配置
lst[i].NotifyVersion = 1
lst[i].NotifyChannelsJSON = []string{}
lst[i].NotifyGroupsJSON = []string{}
lst[i].NotifyChannels = ""
lst[i].NotifyGroups = ""
lst[i].Callbacks = ""
lst[i].CallbacksJSON = []string{}
}
bgid := ginx.UrlParamInt64(c, "id")
@@ -308,19 +315,52 @@ func (rt *Router) alertRuleAddByImportPromRule(c *gin.Context) {
var f promRuleForm
ginx.Dangerous(c.BindJSON(&f))
// 首先尝试解析带 groups 的格式
var pr struct {
Groups []models.PromRuleGroup `yaml:"groups"`
}
err := yaml.Unmarshal([]byte(f.Payload), &pr)
if err != nil {
ginx.Bomb(http.StatusBadRequest, "invalid yaml format, please use the example format. err: %v", err)
var groups []models.PromRuleGroup
if err != nil || len(pr.Groups) == 0 {
// 如果解析失败或没有 groups尝试解析规则数组格式
var rules []models.PromRule
err = yaml.Unmarshal([]byte(f.Payload), &rules)
if err != nil {
// 最后尝试解析单个规则格式
var singleRule models.PromRule
err = yaml.Unmarshal([]byte(f.Payload), &singleRule)
if err != nil {
ginx.Bomb(http.StatusBadRequest, "invalid yaml format. err: %v", err)
}
// 验证单个规则是否有效
if singleRule.Alert == "" && singleRule.Record == "" {
ginx.Bomb(http.StatusBadRequest, "input yaml is empty or invalid")
}
rules = []models.PromRule{singleRule}
}
// 验证规则数组是否为空
if len(rules) == 0 {
ginx.Bomb(http.StatusBadRequest, "input yaml contains no rules")
}
// 将规则数组包装成 group
groups = []models.PromRuleGroup{
{
Name: "imported_rules",
Rules: rules,
},
}
} else {
// 使用已解析的 groups
groups = pr.Groups
}
if len(pr.Groups) == 0 {
ginx.Bomb(http.StatusBadRequest, "input yaml is empty")
}
lst := models.DealPromGroup(pr.Groups, f.DatasourceQueries, f.Disabled)
lst := models.DealPromGroup(groups, f.DatasourceQueries, f.Disabled)
username := c.MustGet("username").(string)
bgid := ginx.UrlParamInt64(c, "id")
ginx.NewRender(c).Data(rt.alertRuleAdd(lst, username, bgid, c.GetHeader("X-Language")), nil)
@@ -465,8 +505,8 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
ginx.Bomb(http.StatusBadRequest, "fields empty")
}
f.Fields["update_by"] = c.MustGet("username").(string)
f.Fields["update_at"] = time.Now().Unix()
updateBy := c.MustGet("username").(string)
updateAt := time.Now().Unix()
for i := 0; i < len(f.Ids); i++ {
ar, err := models.AlertRuleGetById(rt.Ctx, f.Ids[i])
@@ -483,7 +523,6 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
b, err := json.Marshal(originRule)
ginx.Dangerous(err)
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"rule_config": string(b)}))
continue
}
}
@@ -496,7 +535,6 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
b, err := json.Marshal(ar.AnnotationsJSON)
ginx.Dangerous(err)
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"annotations": string(b)}))
continue
}
}
@@ -509,7 +547,6 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
b, err := json.Marshal(ar.AnnotationsJSON)
ginx.Dangerous(err)
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"annotations": string(b)}))
continue
}
}
@@ -519,7 +556,6 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
callback := callbacks.(string)
if !strings.Contains(ar.Callbacks, callback) {
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"callbacks": ar.Callbacks + " " + callback}))
continue
}
}
}
@@ -529,7 +565,6 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
if callbacks, has := f.Fields["callbacks"]; has {
callback := callbacks.(string)
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"callbacks": strings.ReplaceAll(ar.Callbacks, callback, "")}))
continue
}
}
@@ -539,7 +574,6 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
bytes, err := json.Marshal(datasourceQueries)
ginx.Dangerous(err)
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"datasource_queries": bytes}))
continue
}
}
@@ -555,6 +589,12 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
ginx.Dangerous(ar.UpdateColumn(rt.Ctx, k, v))
}
}
// 统一更新更新时间和更新人,只有更新时间变了,告警规则才会被引擎拉取
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{
"update_by": updateBy,
"update_at": updateAt,
}))
}
ginx.NewRender(c).Message(nil)

View File

@@ -288,6 +288,7 @@ func (rt *Router) alertSubscribePut(c *gin.Context) {
"busi_groups",
"note",
"notify_rule_ids",
"notify_version",
))
}

View File

@@ -14,6 +14,7 @@ import (
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
"github.com/toolkits/pkg/logger"
)
@@ -102,7 +103,7 @@ func (rt *Router) datasourceUpsert(c *gin.Context) {
if !req.ForceSave {
if req.PluginType == models.PROMETHEUS || req.PluginType == models.LOKI || req.PluginType == models.TDENGINE {
err = DatasourceCheck(req)
err = DatasourceCheck(c, req)
if err != nil {
Dangerous(c, err)
return
@@ -173,7 +174,7 @@ func (rt *Router) datasourceUpsert(c *gin.Context) {
Render(c, nil, err)
}
func DatasourceCheck(ds models.Datasource) error {
func DatasourceCheck(c *gin.Context, ds models.Datasource) error {
if ds.PluginType == models.PROMETHEUS || ds.PluginType == models.LOKI || ds.PluginType == models.TDENGINE {
if ds.HTTPJson.Url == "" {
return fmt.Errorf("url is empty")
@@ -232,6 +233,10 @@ func DatasourceCheck(ds models.Datasource) error {
req, err = http.NewRequest("GET", fullURL, nil)
if err != nil {
logger.Errorf("Error creating request: %v", err)
if !strings.Contains(ds.HTTPJson.Url, "/loki") {
lang := c.GetHeader("X-Language")
return fmt.Errorf(i18n.Sprintf(lang, "/loki suffix is miss, please add /loki to the url: %s", ds.HTTPJson.Url+"/loki"))
}
return fmt.Errorf("request url:%s failed: %v", fullURL, err)
}
}
@@ -253,6 +258,10 @@ func DatasourceCheck(ds models.Datasource) error {
if resp.StatusCode != 200 {
logger.Errorf("Error making request: %v\n", resp.StatusCode)
if resp.StatusCode == 404 && ds.PluginType == models.LOKI && !strings.Contains(ds.HTTPJson.Url, "/loki") {
lang := c.GetHeader("X-Language")
return fmt.Errorf(i18n.Sprintf(lang, "/loki suffix is miss, please add /loki to the url: %s", ds.HTTPJson.Url+"/loki"))
}
body, _ := io.ReadAll(resp.Body)
return fmt.Errorf("request url:%s failed code:%d body:%s", fullURL, resp.StatusCode, string(body))
}

View File

@@ -8,6 +8,7 @@ import (
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
)
// 获取事件Pipeline列表
@@ -139,12 +140,14 @@ func (rt *Router) tryRunEventPipeline(c *gin.Context) {
}
event := hisEvent.ToCur()
lang := c.GetHeader("X-Language")
var result string
for _, p := range f.PipelineConfig.ProcessorConfigs {
processor, err := models.GetProcessorByType(p.Typ, p.Config)
if err != nil {
ginx.Bomb(http.StatusBadRequest, "get processor: %+v err: %+v", p, err)
}
event, _, err = processor.Process(rt.Ctx, event)
event, result, err = processor.Process(rt.Ctx, event)
if err != nil {
ginx.Bomb(http.StatusBadRequest, "processor: %+v err: %+v", p, err)
}
@@ -152,7 +155,7 @@ func (rt *Router) tryRunEventPipeline(c *gin.Context) {
if event == nil {
ginx.NewRender(c).Data(map[string]interface{}{
"event": event,
"result": "event is dropped",
"result": i18n.Sprintf(lang, "event is dropped"),
}, nil)
return
}
@@ -160,7 +163,7 @@ func (rt *Router) tryRunEventPipeline(c *gin.Context) {
m := map[string]interface{}{
"event": event,
"result": "",
"result": i18n.Sprintf(lang, result),
}
ginx.NewRender(c).Data(m, nil)
}
@@ -188,9 +191,10 @@ func (rt *Router) tryRunEventProcessor(c *gin.Context) {
ginx.Bomb(200, "processor err: %+v", err)
}
lang := c.GetHeader("X-Language")
ginx.NewRender(c).Data(map[string]interface{}{
"event": event,
"result": res,
"result": i18n.Sprintf(lang, res),
}, nil)
}
@@ -231,9 +235,10 @@ func (rt *Router) tryRunEventProcessorByNotifyRule(c *gin.Context) {
ginx.Bomb(http.StatusBadRequest, "processor: %+v err: %+v", p, err)
}
if event == nil {
lang := c.GetHeader("X-Language")
ginx.NewRender(c).Data(map[string]interface{}{
"event": event,
"result": "event is dropped",
"result": i18n.Sprintf(lang, "event is dropped"),
}, nil)
return
}

View File

@@ -107,9 +107,20 @@ func (rt *Router) logoutPost(c *gin.Context) {
var logoutAddr string
user := c.MustGet("user").(*models.User)
// 获取用户的 id_token
idToken, err := rt.fetchIdToken(c.Request.Context(), user.Id)
if err != nil {
logger.Debugf("fetch id_token failed: %v, user_id: %d", err, user.Id)
idToken = "" // 如果获取失败,使用空字符串
}
// 删除 id_token
rt.deleteIdToken(c.Request.Context(), user.Id)
switch user.Belong {
case "oidc":
logoutAddr = rt.Sso.OIDC.GetSsoLogoutAddr()
logoutAddr = rt.Sso.OIDC.GetSsoLogoutAddr(idToken)
case "cas":
logoutAddr = rt.Sso.CAS.GetSsoLogoutAddr()
case "oauth2":
@@ -199,6 +210,14 @@ func (rt *Router) refreshPost(c *gin.Context) {
ginx.Dangerous(err)
ginx.Dangerous(rt.createAuth(c.Request.Context(), userIdentity, ts))
// 延长 id_token 的过期时间,使其与新的 refresh token 生命周期保持一致
// 注意:这里不会获取新的 id_token只是延长 Redis 中现有 id_token 的 TTL
if idToken, err := rt.fetchIdToken(c.Request.Context(), userid); err == nil && idToken != "" {
if err := rt.saveIdToken(c.Request.Context(), userid, idToken); err != nil {
logger.Debugf("refresh id_token ttl failed: %v, user_id: %d", err, userid)
}
}
ginx.NewRender(c).Data(gin.H{
"access_token": ts.AccessToken,
"refresh_token": ts.RefreshToken,
@@ -286,6 +305,13 @@ func (rt *Router) loginCallback(c *gin.Context) {
ginx.Dangerous(err)
ginx.Dangerous(rt.createAuth(c.Request.Context(), userIdentity, ts))
// 保存 id_token 到 Redis用于登出时使用
if ret.IdToken != "" {
if err := rt.saveIdToken(c.Request.Context(), user.Id, ret.IdToken); err != nil {
logger.Errorf("save id_token failed: %v, user_id: %d", err, user.Id)
}
}
redirect := "/"
if ret.Redirect != "/login" {
redirect = ret.Redirect

View File

@@ -193,10 +193,9 @@ func (rt *Router) eventsMessage(c *gin.Context) {
events[i] = he.ToCur()
}
var defs = []string{
"{{$events := .}}",
"{{$event := index . 0}}",
}
renderData := make(map[string]interface{})
renderData["events"] = events
defs := models.GetDefs(renderData)
ret := make(map[string]string, len(req.Tpl.Content))
for k, v := range req.Tpl.Content {
text := strings.Join(append(defs, v), "")
@@ -207,7 +206,7 @@ func (rt *Router) eventsMessage(c *gin.Context) {
}
var buf bytes.Buffer
err = tpl.Execute(&buf, events)
err = tpl.Execute(&buf, renderData)
if err != nil {
ret[k] = err.Error()
continue

View File

@@ -18,7 +18,10 @@ import (
// Return all, front-end search and paging
func (rt *Router) alertMuteGetsByBG(c *gin.Context) {
bgid := ginx.UrlParamInt64(c, "id")
lst, err := models.AlertMuteGetsByBG(rt.Ctx, bgid)
prods := strings.Fields(ginx.QueryStr(c, "prods", ""))
query := ginx.QueryStr(c, "query", "")
expired := ginx.QueryInt(c, "expired", -1)
lst, err := models.AlertMuteGets(rt.Ctx, prods, bgid, -1, expired, query)
ginx.NewRender(c).Data(lst, err)
}
@@ -53,11 +56,17 @@ func (rt *Router) alertMuteGets(c *gin.Context) {
bgid := ginx.QueryInt64(c, "bgid", -1)
query := ginx.QueryStr(c, "query", "")
disabled := ginx.QueryInt(c, "disabled", -1)
lst, err := models.AlertMuteGets(rt.Ctx, prods, bgid, disabled, query)
expired := ginx.QueryInt(c, "expired", -1)
lst, err := models.AlertMuteGets(rt.Ctx, prods, bgid, disabled, expired, query)
ginx.NewRender(c).Data(lst, err)
}
func (rt *Router) activeAlertMuteGets(c *gin.Context) {
lst, err := models.AlertMuteGetsAll(rt.Ctx)
ginx.NewRender(c).Data(lst, err)
}
func (rt *Router) alertMuteAdd(c *gin.Context) {
var f models.AlertMute
@@ -67,7 +76,9 @@ func (rt *Router) alertMuteAdd(c *gin.Context) {
f.CreateBy = username
f.UpdateBy = username
f.GroupId = ginx.UrlParamInt64(c, "id")
ginx.NewRender(c).Message(f.Add(rt.Ctx))
ginx.Dangerous(f.Add(rt.Ctx))
ginx.NewRender(c).Data(f.Id, nil)
}
type MuteTestForm struct {

View File

@@ -453,6 +453,30 @@ func (rt *Router) wrapJwtKey(key string) string {
return rt.HTTP.JWTAuth.RedisKeyPrefix + key
}
func (rt *Router) wrapIdTokenKey(userId int64) string {
return fmt.Sprintf("n9e_id_token_%d", userId)
}
// saveIdToken 保存用户的 id_token 到 Redis
func (rt *Router) saveIdToken(ctx context.Context, userId int64, idToken string) error {
if idToken == "" {
return nil
}
// id_token 的过期时间应该与 RefreshToken 保持一致,确保在整个会话期间都可用于登出
expiration := time.Minute * time.Duration(rt.HTTP.JWTAuth.RefreshExpired)
return rt.Redis.Set(ctx, rt.wrapIdTokenKey(userId), idToken, expiration).Err()
}
// fetchIdToken 从 Redis 获取用户的 id_token
func (rt *Router) fetchIdToken(ctx context.Context, userId int64) (string, error) {
return rt.Redis.Get(ctx, rt.wrapIdTokenKey(userId)).Result()
}
// deleteIdToken 从 Redis 删除用户的 id_token
func (rt *Router) deleteIdToken(ctx context.Context, userId int64) error {
return rt.Redis.Del(ctx, rt.wrapIdTokenKey(userId)).Err()
}
type TokenDetails struct {
AccessToken string
RefreshToken string

View File

@@ -161,6 +161,9 @@ func (rt *Router) notifyTest(c *gin.Context) {
}
resp, err := SendNotifyChannelMessage(rt.Ctx, rt.UserCache, rt.UserGroupCache, f.NotifyConfig, events)
if resp == "" {
resp = "success"
}
ginx.NewRender(c).Data(resp, err)
}

View File

@@ -148,6 +148,8 @@ func (rt *Router) dsProxy(c *gin.Context) {
if ds.AuthJson.BasicAuthUser != "" {
req.SetBasicAuth(ds.AuthJson.BasicAuthUser, ds.AuthJson.BasicAuthPassword)
} else {
req.Header.Del("Authorization")
}
headerCount := len(ds.HTTPJson.Headers)

View File

@@ -149,6 +149,12 @@ func (rt *Router) recordingRulePutFields(c *gin.Context) {
f.Fields["datasource_queries"] = string(bytes)
}
if datasourceIds, ok := f.Fields["datasource_ids"]; ok {
bytes, err := json.Marshal(datasourceIds)
ginx.Dangerous(err)
f.Fields["datasource_ids"] = string(bytes)
}
for i := 0; i < len(f.Ids); i++ {
ar, err := models.RecordingRuleGetById(rt.Ctx, f.Ids[i])
ginx.Dangerous(err)

View File

@@ -11,6 +11,7 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pushgw/idents"
"github.com/ccfos/nightingale/v6/storage"
"github.com/gin-gonic/gin"
@@ -601,3 +602,10 @@ func (rt *Router) targetsOfHostQuery(c *gin.Context) {
ginx.NewRender(c).Data(lst, nil)
}
func (rt *Router) targetUpdate(c *gin.Context) {
var f idents.TargetUpdate
ginx.BindJSON(c, &f)
ginx.NewRender(c).Message(rt.IdentSet.UpdateTargets(f.Lst, f.Now))
}

View File

@@ -1,6 +1,7 @@
package router
import (
"fmt"
"net/http"
"strings"
@@ -12,6 +13,7 @@ import (
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
"gorm.io/gorm"
)
func (rt *Router) userBusiGroupsGets(c *gin.Context) {
@@ -233,6 +235,16 @@ func (rt *Router) userDel(c *gin.Context) {
return
}
// 如果要删除的用户是 admin 角色,检查是否是最后一个 admin
if target.IsAdmin() {
adminCount, err := models.CountAdminUsers(rt.Ctx)
ginx.Dangerous(err)
if adminCount <= 1 {
ginx.Bomb(http.StatusBadRequest, "Cannot delete the last admin user")
}
}
ginx.NewRender(c).Message(target.Del(rt.Ctx))
}
@@ -252,3 +264,210 @@ func (rt *Router) installDateGet(c *gin.Context) {
ginx.NewRender(c).Data(rootUser.CreateAt, nil)
}
// usersPhoneEncrypt 统一手机号加密
func (rt *Router) usersPhoneEncrypt(c *gin.Context) {
users, err := models.UserGetAll(rt.Ctx)
if err != nil {
ginx.NewRender(c).Message(fmt.Errorf("get users failed: %v", err))
return
}
// 获取RSA密钥
_, publicKey, _, err := models.GetRSAKeys(rt.Ctx)
if err != nil {
ginx.NewRender(c).Message(fmt.Errorf("get RSA keys failed: %v", err))
return
}
// 先启用手机号加密功能
err = models.SetPhoneEncryptionEnabled(rt.Ctx, true)
if err != nil {
ginx.NewRender(c).Message(fmt.Errorf("enable phone encryption failed: %v", err))
return
}
// 刷新配置缓存
err = models.RefreshPhoneEncryptionCache(rt.Ctx)
if err != nil {
logger.Errorf("Failed to refresh phone encryption cache: %v", err)
// 回滚配置
models.SetPhoneEncryptionEnabled(rt.Ctx, false)
ginx.NewRender(c).Message(fmt.Errorf("refresh cache failed: %v", err))
return
}
successCount := 0
failCount := 0
var failedUsers []string
// 使用事务处理所有用户的手机号加密
err = models.DB(rt.Ctx).Transaction(func(tx *gorm.DB) error {
// 对每个用户的手机号进行加密
for _, user := range users {
if user.Phone == "" {
continue
}
if isPhoneEncrypted(user.Phone) {
continue
}
encryptedPhone, err := secu.EncryptValue(user.Phone, publicKey)
if err != nil {
logger.Errorf("Failed to encrypt phone for user %s: %v", user.Username, err)
failCount++
failedUsers = append(failedUsers, user.Username)
continue
}
err = tx.Model(&models.User{}).Where("id = ?", user.Id).Update("phone", encryptedPhone).Error
if err != nil {
logger.Errorf("Failed to update phone for user %s: %v", user.Username, err)
failCount++
failedUsers = append(failedUsers, user.Username)
continue
}
successCount++
logger.Debugf("Successfully encrypted phone for user %s", user.Username)
}
// 如果有失败的用户,回滚事务
if failCount > 0 {
return fmt.Errorf("encrypt failed users: %d, failed users: %v", failCount, failedUsers)
}
return nil
})
if err != nil {
// 加密失败,回滚配置
models.SetPhoneEncryptionEnabled(rt.Ctx, false)
models.RefreshPhoneEncryptionCache(rt.Ctx)
ginx.NewRender(c).Message(fmt.Errorf("encrypt phone failed: %v", err))
return
}
ginx.NewRender(c).Data(gin.H{
"success_count": successCount,
"fail_count": failCount,
}, nil)
}
func (rt *Router) usersPhoneDecryptRefresh(c *gin.Context) {
err := models.RefreshPhoneEncryptionCache(rt.Ctx)
if err != nil {
ginx.NewRender(c).Message(fmt.Errorf("refresh phone encryption cache failed: %v", err))
return
}
ginx.NewRender(c).Message(nil)
}
// usersPhoneDecrypt 统一手机号解密
func (rt *Router) usersPhoneDecrypt(c *gin.Context) {
// 先关闭手机号加密功能
err := models.SetPhoneEncryptionEnabled(rt.Ctx, false)
if err != nil {
ginx.NewRender(c).Message(fmt.Errorf("disable phone encryption failed: %v", err))
return
}
// 刷新配置缓存
err = models.RefreshPhoneEncryptionCache(rt.Ctx)
if err != nil {
logger.Errorf("Failed to refresh phone encryption cache: %v", err)
// 回滚配置
models.SetPhoneEncryptionEnabled(rt.Ctx, true)
ginx.NewRender(c).Message(fmt.Errorf("refresh cache failed: %v", err))
return
}
// 获取所有用户(此时加密开关已关闭,直接读取数据库原始数据)
var users []*models.User
err = models.DB(rt.Ctx).Find(&users).Error
if err != nil {
// 回滚配置
models.SetPhoneEncryptionEnabled(rt.Ctx, true)
models.RefreshPhoneEncryptionCache(rt.Ctx)
ginx.NewRender(c).Message(fmt.Errorf("get users failed: %v", err))
return
}
// 获取RSA密钥
privateKey, _, password, err := models.GetRSAKeys(rt.Ctx)
if err != nil {
// 回滚配置
models.SetPhoneEncryptionEnabled(rt.Ctx, true)
models.RefreshPhoneEncryptionCache(rt.Ctx)
ginx.NewRender(c).Message(fmt.Errorf("get RSA keys failed: %v", err))
return
}
successCount := 0
failCount := 0
var failedUsers []string
// 使用事务处理所有用户的手机号解密
err = models.DB(rt.Ctx).Transaction(func(tx *gorm.DB) error {
// 对每个用户的手机号进行解密
for _, user := range users {
if user.Phone == "" {
continue
}
// 检查是否是加密的手机号
if !isPhoneEncrypted(user.Phone) {
continue
}
// 对手机号进行解密
decryptedPhone, err := secu.Decrypt(user.Phone, privateKey, password)
if err != nil {
logger.Errorf("Failed to decrypt phone for user %s: %v", user.Username, err)
failCount++
failedUsers = append(failedUsers, user.Username)
continue
}
// 直接更新数据库中的手机号字段绕过GORM钩子
err = tx.Model(&models.User{}).Where("id = ?", user.Id).Update("phone", decryptedPhone).Error
if err != nil {
logger.Errorf("Failed to update phone for user %s: %v", user.Username, err)
failCount++
failedUsers = append(failedUsers, user.Username)
continue
}
successCount++
logger.Debugf("Successfully decrypted phone for user %s", user.Username)
}
// 如果有失败的用户,回滚事务
if failCount > 0 {
return fmt.Errorf("decrypt failed users: %d, failed users: %v", failCount, failedUsers)
}
return nil
})
if err != nil {
// 解密失败,回滚配置
models.SetPhoneEncryptionEnabled(rt.Ctx, true)
models.RefreshPhoneEncryptionCache(rt.Ctx)
ginx.NewRender(c).Message(fmt.Errorf("decrypt phone failed: %v", err))
return
}
ginx.NewRender(c).Data(gin.H{
"success_count": successCount,
"fail_count": failCount,
}, nil)
}
// isPhoneEncrypted 检查手机号是否已经加密
func isPhoneEncrypted(phone string) bool {
// 检查是否有 "enc:" 前缀标记
return len(phone) > 4 && phone[:4] == "enc:"
}

View File

@@ -27,6 +27,40 @@ func (rt *Router) userGroupGets(c *gin.Context) {
me := c.MustGet("user").(*models.User)
lst, err := me.UserGroups(rt.Ctx, limit, query)
if err != nil {
ginx.Dangerous(err)
}
allBusiGroups, err := models.BusiGroupGetAll(rt.Ctx)
if err != nil {
ginx.Dangerous(err)
}
var allMembers []models.BusiGroupMember
if err = models.DB(rt.Ctx).Model(&models.BusiGroupMember{}).Find(&allMembers).Error; err != nil {
ginx.Dangerous(err)
}
busiGroupMap := make(map[int64]*models.BusiGroup)
for _, bg := range allBusiGroups {
busiGroupMap[bg.Id] = bg
}
userGroupToBusiGroupsMap := make(map[int64][]*models.BusiGroupRes)
for _, member := range allMembers {
if bg, ok := busiGroupMap[member.BusiGroupId]; ok {
userGroupToBusiGroupsMap[member.UserGroupId] = append(userGroupToBusiGroupsMap[member.UserGroupId], &models.BusiGroupRes{
Id: bg.Id,
Name: bg.Name,
})
}
}
for i := 0; i < len(lst); i++ {
if busiGroups, ok := userGroupToBusiGroupsMap[lst[i].Id]; ok {
lst[i].BusiGroupsRes = busiGroups
}
}
ginx.NewRender(c).Data(lst, err)
}

View File

@@ -10,12 +10,20 @@ import (
"github.com/araddon/dateparse"
"github.com/bitly/go-simplejson"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/mitchellh/mapstructure"
"github.com/olivere/elastic/v7"
"github.com/prometheus/common/model"
"github.com/toolkits/pkg/logger"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
)
type FixedField string
const (
FieldIndex FixedField = "_index"
FieldId FixedField = "_id"
)
type Query struct {
@@ -37,6 +45,18 @@ type Query struct {
Timeout int `json:"timeout" mapstructure:"timeout"`
MaxShard int `json:"max_shard" mapstructure:"max_shard"`
SearchAfter *SearchAfter `json:"search_after" mapstructure:"search_after"`
}
type SortField struct {
Field string `json:"field" mapstructure:"field"`
Ascending bool `json:"ascending" mapstructure:"ascending"`
}
type SearchAfter struct {
SortFields []SortField `json:"sort_fields" mapstructure:"sort_fields"` // 指定排序字段, 一般是timestamp:desc, _index:asc, _id:asc 三者组合,构成唯一的排序字段
SearchAfter []interface{} `json:"search_after" mapstructure:"search_after"` // 指定排序字段的搜索值搜索值必须和sort_fields的顺序一致为上一次查询的最后一条日志的值
}
type MetricAggr struct {
@@ -271,7 +291,10 @@ func MakeLogQuery(ctx context.Context, query interface{}, eventTags []string, st
}
for i := 0; i < len(eventTags); i++ {
eventTags[i] = strings.Replace(eventTags[i], "=", ":", 1)
arr := strings.SplitN(eventTags[i], "=", 2)
if len(arr) == 2 {
eventTags[i] = fmt.Sprintf("%s:%s", arr[0], strconv.Quote(arr[1]))
}
}
if len(eventTags) > 0 {
@@ -295,7 +318,10 @@ func MakeTSQuery(ctx context.Context, query interface{}, eventTags []string, sta
}
for i := 0; i < len(eventTags); i++ {
eventTags[i] = strings.Replace(eventTags[i], "=", ":", 1)
arr := strings.SplitN(eventTags[i], "=", 2)
if len(arr) == 2 {
eventTags[i] = fmt.Sprintf("%s:%s", arr[0], strconv.Quote(arr[1]))
}
}
if len(eventTags) > 0 {
@@ -415,10 +441,32 @@ func QueryData(ctx context.Context, queryParam interface{}, cliTimeout int64, ve
Field(param.DateField).
MinDocCount(1)
if strings.HasPrefix(version, "7") {
versionParts := strings.Split(version, ".")
major := 0
if len(versionParts) > 0 {
if m, err := strconv.Atoi(versionParts[0]); err == nil {
major = m
}
}
minor := 0
if len(versionParts) > 1 {
if m, err := strconv.Atoi(versionParts[1]); err == nil {
minor = m
}
}
if major >= 7 {
// 添加偏移量使第一个分桶bucket的左边界对齐为 start 时间
offset := (start % param.Interval) - param.Interval
tsAggr.FixedInterval(fmt.Sprintf("%ds", param.Interval)).Offset(fmt.Sprintf("%ds", offset))
// 使用 fixed_interval 的条件ES 7.2+ 或者任何 major > 7例如 ES8
if (major > 7) || (major == 7 && minor >= 2) {
// ES 7.2+ 以及 ES8+ 使用 fixed_interval
tsAggr.FixedInterval(fmt.Sprintf("%ds", param.Interval)).Offset(fmt.Sprintf("%ds", offset))
} else {
// 7.0-7.1 使用 interval带 offset
tsAggr.Interval(fmt.Sprintf("%ds", param.Interval)).Offset(fmt.Sprintf("%ds", offset))
}
} else {
// 兼容 7.0 以下的版本
// OpenSearch 也使用这个字段
@@ -605,14 +653,27 @@ func QueryLog(ctx context.Context, queryParam interface{}, timeout int64, versio
if param.MaxShard < 1 {
param.MaxShard = maxShard
}
// from+size 分页方式获取日志受es 的max_result_window参数限制默认最多返回1w条日志, 可以使用search_after方式获取更多日志
source := elastic.NewSearchSource().
TrackTotalHits(true).
Query(queryString).
From(param.P).
Size(param.Limit).
Sort(param.DateField, param.Ascending)
Size(param.Limit)
// 是否使用search_after方式
if param.SearchAfter != nil {
// 设置默认排序字段
if len(param.SearchAfter.SortFields) == 0 {
source = source.Sort(param.DateField, param.Ascending).Sort(string(FieldIndex), true).Sort(string(FieldId), true)
} else {
for _, field := range param.SearchAfter.SortFields {
source = source.Sort(field.Field, field.Ascending)
}
}
if len(param.SearchAfter.SearchAfter) > 0 {
source = source.SearchAfter(param.SearchAfter.SearchAfter...)
}
} else {
source = source.From(param.P).Sort(param.DateField, param.Ascending)
}
result, err := search(ctx, indexArr, source, param.Timeout, param.MaxShard)
if err != nil {
logger.Warningf("query data error:%v", err)

View File

@@ -8,6 +8,7 @@ import (
"github.com/ccfos/nightingale/v6/datasource"
"github.com/ccfos/nightingale/v6/dskit/doris"
"github.com/ccfos/nightingale/v6/dskit/types"
"github.com/ccfos/nightingale/v6/pkg/macros"
"github.com/ccfos/nightingale/v6/models"
"github.com/mitchellh/mapstructure"
@@ -27,11 +28,16 @@ type Doris struct {
}
type QueryParam struct {
Ref string `json:"ref" mapstructure:"ref"`
Database string `json:"database" mapstructure:"database"`
Table string `json:"table" mapstructure:"table"`
SQL string `json:"sql" mapstructure:"sql"`
Keys datasource.Keys `json:"keys" mapstructure:"keys"`
Ref string `json:"ref" mapstructure:"ref"`
Database string `json:"database" mapstructure:"database"`
Table string `json:"table" mapstructure:"table"`
SQL string `json:"sql" mapstructure:"sql"`
Keys datasource.Keys `json:"keys" mapstructure:"keys"`
Limit int `json:"limit" mapstructure:"limit"`
From int64 `json:"from" mapstructure:"from"`
To int64 `json:"to" mapstructure:"to"`
TimeField string `json:"time_field" mapstructure:"time_field"`
TimeFormat string `json:"time_format" mapstructure:"time_format"`
}
func (d *Doris) InitClient() error {
@@ -66,7 +72,7 @@ func (d *Doris) Validate(ctx context.Context) error {
func (d *Doris) Equal(p datasource.Datasource) bool {
newest, ok := p.(*Doris)
if !ok {
logger.Errorf("unexpected plugin type, expected is ck")
logger.Errorf("unexpected plugin type, expected is doris")
return false
}
@@ -174,6 +180,14 @@ func (d *Doris) QueryLog(ctx context.Context, query interface{}) ([]interface{},
return nil, 0, err
}
if strings.Contains(dorisQueryParam.SQL, "$__") {
var err error
dorisQueryParam.SQL, err = macros.Macro(dorisQueryParam.SQL, dorisQueryParam.From, dorisQueryParam.To)
if err != nil {
return nil, 0, err
}
}
items, err := d.QueryLogs(ctx, &doris.QueryParam{
Database: dorisQueryParam.Database,
Sql: dorisQueryParam.SQL,
@@ -187,7 +201,7 @@ func (d *Doris) QueryLog(ctx context.Context, query interface{}) ([]interface{},
logs = append(logs, items[i])
}
return logs, 0, nil
return logs, int64(len(logs)), nil
}
func (d *Doris) DescribeTable(ctx context.Context, query interface{}) ([]*types.ColumnProperty, error) {

View File

@@ -106,6 +106,29 @@ func (e *Elasticsearch) InitClient() error {
options = append(options, elastic.SetHealthcheck(false))
e.Client, err = elastic.NewClient(options...)
if err != nil {
return err
}
if e.Client != nil {
for _, addr := range e.Nodes {
if addr == "" {
continue
}
if ver, verr := e.Client.ElasticsearchVersion(addr); verr == nil {
logger.Infof("detected elasticsearch version from %s: %s", addr, ver)
e.Version = ver
e.Addr = addr
break
} else {
logger.Debugf("detect version failed from %s: %v", addr, verr)
}
}
if e.Version == "" {
logger.Warning("failed to detect elasticsearch version from configured nodes, keep configured version")
}
}
return err
}
@@ -183,7 +206,6 @@ func (e *Elasticsearch) MakeTSQuery(ctx context.Context, query interface{}, even
}
func (e *Elasticsearch) QueryData(ctx context.Context, queryParam interface{}) ([]models.DataResp, error) {
search := func(ctx context.Context, indices []string, source interface{}, timeout int, maxShard int) (*elastic.SearchResult, error) {
return e.Client.Search().
Index(indices...).
@@ -193,7 +215,6 @@ func (e *Elasticsearch) QueryData(ctx context.Context, queryParam interface{}) (
MaxConcurrentShardRequests(maxShard).
Do(ctx)
}
return eslike.QueryData(ctx, queryParam, e.Timeout, e.Version, search)
}

View File

@@ -100,7 +100,8 @@ func (os *OpenSearch) InitClient() error {
Header: headers,
}
if os.Basic.Enable && os.Basic.Username != "" {
// 只要有用户名就添加认证,不依赖 Enable 字段
if os.Basic.Username != "" {
options.Username = os.Basic.Username
options.Password = os.Basic.Password
}
@@ -154,8 +155,9 @@ func (os *OpenSearch) Validate(ctx context.Context) (err error) {
}
}
if os.Basic.Enable && (len(os.Basic.Username) == 0 || len(os.Basic.Password) == 0) {
return fmt.Errorf("need a valid user, password")
// 如果提供了用户名,必须同时提供密码
if len(os.Basic.Username) > 0 && len(os.Basic.Password) == 0 {
return fmt.Errorf("password is required when username is provided")
}
if os.MaxShard == 0 {

View File

@@ -23,7 +23,7 @@ const (
)
var (
regx = "(?i)from\\s+([a-zA-Z0-9_]+)\\.([a-zA-Z0-9_]+)\\.([a-zA-Z0-9_]+)"
regx = `(?i)from\s+((?:"[^"]+"|[a-zA-Z0-9_]+))\.((?:"[^"]+"|[a-zA-Z0-9_]+))\.((?:"[^"]+"|[a-zA-Z0-9_]+))`
)
func init() {
@@ -162,6 +162,7 @@ func (p *PostgreSQL) QueryData(ctx context.Context, query interface{}) ([]models
return nil, err
}
postgresqlQueryParam.SQL = formatSQLDatabaseNameWithRegex(postgresqlQueryParam.SQL)
if strings.Contains(postgresqlQueryParam.SQL, "$__") {
var err error
postgresqlQueryParam.SQL, err = macros.Macro(postgresqlQueryParam.SQL, postgresqlQueryParam.From, postgresqlQueryParam.To)
@@ -229,6 +230,7 @@ func (p *PostgreSQL) QueryLog(ctx context.Context, query interface{}) ([]interfa
p.Shards[0].DB = db
}
postgresqlQueryParam.SQL = formatSQLDatabaseNameWithRegex(postgresqlQueryParam.SQL)
if strings.Contains(postgresqlQueryParam.SQL, "$__") {
var err error
postgresqlQueryParam.SQL, err = macros.Macro(postgresqlQueryParam.SQL, postgresqlQueryParam.From, postgresqlQueryParam.To)
@@ -280,7 +282,17 @@ func parseDBName(sql string) (db string, err error) {
if len(matches) != 4 {
return "", fmt.Errorf("no valid table name in format database.schema.table found")
}
return matches[1], nil
return strings.Trim(matches[1], `"`), nil
}
// formatSQLDatabaseNameWithRegex 只对 dbname.scheme.tabname 格式进行数据库名称格式化,转为 "dbname".scheme.tabname
// 在pgsql中大小写是通过"" 双引号括起来区分的,默认pg都是转为小写的所以这里转为 "dbname".scheme."tabname"
func formatSQLDatabaseNameWithRegex(sql string) string {
// 匹配 from dbname.scheme.table_name 的模式
// 使用捕获组来精确匹配数据库名称确保后面跟着scheme和table
re := regexp.MustCompile(`(?i)\bfrom\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\.\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*\.\s*([a-zA-Z_][a-zA-Z0-9_]*)`)
return re.ReplaceAllString(sql, `from "$1"."$2"."$3"`)
}
func extractColumns(sql string) ([]string, error) {

Binary file not shown.

After

Width:  |  Height:  |  Size: 481 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 508 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 386 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 424 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 78 KiB

View File

@@ -89,8 +89,6 @@ MaxLifetime = 7200
MaxOpenConns = 150
# max idle connections
MaxIdleConns = 50
# enable auto migrate or not
# EnableAutoMigrate = false
[Redis]
# address, ip:port or ip1:port,ip2:port for cluster and sentinel(SentinelAddrs)

View File

@@ -86,8 +86,6 @@ MaxLifetime = 7200
MaxOpenConns = 150
# max idle connections
MaxIdleConns = 50
# enable auto migrate or not
# EnableAutoMigrate = false
[Redis]
# address, ip:port or ip1:port,ip2:port for cluster and sentinel(SentinelAddrs)

View File

@@ -86,8 +86,6 @@ MaxLifetime = 7200
MaxOpenConns = 150
# max idle connections
MaxIdleConns = 50
# enable auto migrate or not
# EnableAutoMigrate = false
[Redis]
# address, ip:port or ip1:port,ip2:port for cluster and sentinel(SentinelAddrs)

View File

@@ -956,7 +956,7 @@ CREATE TABLE notify_rule (
id bigserial PRIMARY KEY,
name varchar(255) NOT NULL,
description text,
enable smallint NOT NULL DEFAULT 0,
enable boolean DEFAULT false,
user_group_ids varchar(255) NOT NULL DEFAULT '',
notify_configs text,
pipeline_configs text,
@@ -971,7 +971,7 @@ CREATE TABLE notify_channel (
name varchar(255) NOT NULL,
ident varchar(255) NOT NULL,
description text,
enable smallint NOT NULL DEFAULT 0,
enable boolean DEFAULT false,
param_config text,
request_type varchar(50) NOT NULL,
request_config text,

View File

@@ -90,8 +90,6 @@ MaxLifetime = 7200
MaxOpenConns = 150
# max idle connections
MaxIdleConns = 50
# enable auto migrate or not
# EnableAutoMigrate = false
[Redis]
# address, ip:port or ip1:port,ip2:port for cluster and sentinel(SentinelAddrs)

View File

@@ -836,8 +836,8 @@ CREATE TABLE `event_pipeline` (
`description` varchar(255) not null default '',
`filter_enable` tinyint(1) not null default 0,
`label_filters` text,
`attribute_filters` text,
`processors` text,
`attr_filters` text,
`processor_configs` text,
`create_at` bigint not null default 0,
`create_by` varchar(64) not null default '',
`update_at` bigint not null default 0,

View File

@@ -235,9 +235,8 @@ CREATE TABLE `event_pipeline` (
`team_ids` text,
`description` varchar(255) not null default '',
`filter_enable` tinyint(1) not null default 0,
`label_filters` text,
`attribute_filters` text,
`processors` text,
`attr_filters` text,
`processor_configs` text,
`create_at` bigint not null default 0,
`create_by` varchar(64) not null default '',
`update_at` bigint not null default 0,
@@ -273,8 +272,6 @@ CREATE TABLE `source_token` (
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
/* Add translation column for builtin metrics */
ALTER TABLE `builtin_metrics` ADD COLUMN `translation` TEXT COMMENT 'translation of metric' AFTER `lang`;
/* v8.0.0-beta.12 2025-06-03 */
ALTER TABLE `alert_his_event` ADD COLUMN `notify_rule_ids` text COMMENT 'notify rule ids';
@@ -284,3 +281,11 @@ ALTER TABLE `alert_cur_event` ADD COLUMN `notify_rule_ids` text COMMENT 'notify
-- 删除 builtin_metrics 表的 idx_collector_typ_name 唯一索引
DROP INDEX IF EXISTS `idx_collector_typ_name` ON `builtin_metrics`;
/* v8.0.0 2025-07-03 */
ALTER TABLE `builtin_metrics` ADD COLUMN `translation` TEXT COMMENT 'translation of metric' AFTER `lang`;
/* v8.4.0 2025-10-15 */
ALTER TABLE `notify_rule` ADD COLUMN `extra_config` text COMMENT 'extra config';
/* v8.4.1 2025-11-10 */
ALTER TABLE `alert_rule` ADD COLUMN `pipeline_configs` text COMMENT 'pipeline configs';

View File

@@ -22,6 +22,8 @@ import (
var FromAPIHook func()
var DatasourceProcessHook func(items []datasource.DatasourceInfo) []datasource.DatasourceInfo
func Init(ctx *ctx.Context, fromAPI bool) {
go getDatasourcesFromDBLoop(ctx, fromAPI)
}
@@ -100,6 +102,10 @@ func getDatasourcesFromDBLoop(ctx *ctx.Context, fromAPI bool) {
atomic.StoreInt64(&PromDefaultDatasourceId, 0)
}
if DatasourceProcessHook != nil {
dss = DatasourceProcessHook(dss)
}
PutDatasources(dss)
} else {
FromAPIHook()
@@ -163,7 +169,7 @@ func PutDatasources(items []datasource.DatasourceInfo) {
ds, err := datasource.GetDatasourceByType(typ, item.Settings)
if err != nil {
logger.Warningf("get plugin:%+v fail: %v", item, err)
logger.Debugf("get plugin:%+v fail: %v", item, err)
continue
}

View File

@@ -129,9 +129,7 @@ func (c *Clickhouse) QueryRows(ctx context.Context, query string) (*sql.Rows, er
// ShowDatabases lists all databases in Clickhouse
func (c *Clickhouse) ShowDatabases(ctx context.Context) ([]string, error) {
var (
res []string
)
res := make([]string, 0)
rows, err := c.QueryRows(ctx, ShowDatabases)
if err != nil {
@@ -151,9 +149,7 @@ func (c *Clickhouse) ShowDatabases(ctx context.Context) ([]string, error) {
// ShowTables lists all tables in a given database
func (c *Clickhouse) ShowTables(ctx context.Context, database string) ([]string, error) {
var (
res []string
)
res := make([]string, 0)
showTables := fmt.Sprintf(ShowTables, database)
rows, err := c.QueryRows(ctx, showTables)

View File

@@ -20,8 +20,8 @@ import (
// Doris struct to hold connection details and the connection object
type Doris struct {
Addr string `json:"doris.addr" mapstructure:"doris.addr"` // be node
FeAddr string `json:"doris.fe_addr" mapstructure:"doris.fe_addr"` // fe node
Addr string `json:"doris.addr" mapstructure:"doris.addr"` // fe mysql endpoint
FeAddr string `json:"doris.fe_addr" mapstructure:"doris.fe_addr"` // fe http endpoint
User string `json:"doris.user" mapstructure:"doris.user"` //
Password string `json:"doris.password" mapstructure:"doris.password"` //
Timeout int `json:"doris.timeout" mapstructure:"doris.timeout"`
@@ -138,7 +138,7 @@ func (d *Doris) ShowDatabases(ctx context.Context) ([]string, error) {
}
defer rows.Close()
var databases []string
databases := make([]string, 0)
for rows.Next() {
var dbName string
if err := rows.Scan(&dbName); err != nil {
@@ -201,7 +201,7 @@ func (d *Doris) ShowResources(ctx context.Context, resourceType string) ([]strin
}
// 将 map 转换为切片
var resources []string
resources := make([]string, 0)
for name := range distinctName {
resources = append(resources, name)
}
@@ -226,7 +226,7 @@ func (d *Doris) ShowTables(ctx context.Context, database string) ([]string, erro
}
defer rows.Close()
var tables []string
tables := make([]string, 0)
for rows.Next() {
var tableName string
if err := rows.Scan(&tableName); err != nil {

View File

@@ -115,14 +115,14 @@ func (m *MySQL) NewConn(ctx context.Context, database string) (*gorm.DB, error)
}()
dsn := fmt.Sprintf("%s:%s@tcp(%s)/%s?charset=utf8&parseTime=True", shard.User, shard.Password, shard.Addr, database)
return sqlbase.NewDB(
db, err = sqlbase.NewDB(
ctx,
mysql.Open(dsn),
shard.MaxIdleConns,
shard.MaxOpenConns,
time.Duration(shard.ConnMaxLifetime)*time.Second,
)
return db, err
}
func (m *MySQL) ShowDatabases(ctx context.Context) ([]string, error) {

View File

@@ -7,6 +7,7 @@ import (
"encoding/json"
"errors"
"fmt"
"net/url"
"strings"
"time"
@@ -117,7 +118,8 @@ func (p *PostgreSQL) NewConn(ctx context.Context, database string) (*gorm.DB, er
}()
// Simplified connection logic for PostgreSQL
dsn := fmt.Sprintf("postgres://%s:%s@%s/%s?sslmode=disable&TimeZone=Asia/Shanghai", p.Shard.User, p.Shard.Password, p.Shard.Addr, database)
dsn := fmt.Sprintf("postgres://%s:%s@%s/%s?sslmode=disable&TimeZone=Asia/Shanghai", url.QueryEscape(p.Shard.User), url.QueryEscape(p.Shard.Password), p.Shard.Addr, database)
db, err = sqlbase.NewDB(
ctx,
postgres.Open(dsn),

View File

@@ -48,7 +48,7 @@ func CloseDB(db *gorm.DB) error {
// ShowTables retrieves a list of all tables in the specified database
func ShowTables(ctx context.Context, db *gorm.DB, query string) ([]string, error) {
var tables []string
tables := make([]string, 0)
rows, err := db.WithContext(ctx).Raw(query).Rows()
if err != nil {

View File

@@ -122,7 +122,7 @@ func (tc *Tdengine) QueryTable(query string) (APIResponse, error) {
}
func (tc *Tdengine) ShowDatabases(context.Context) ([]string, error) {
var databases []string
databases := make([]string, 0)
data, err := tc.QueryTable("show databases")
if err != nil {
return databases, err
@@ -135,7 +135,7 @@ func (tc *Tdengine) ShowDatabases(context.Context) ([]string, error) {
}
func (tc *Tdengine) ShowTables(ctx context.Context, database string) ([]string, error) {
var tables []string
tables := make([]string, 0)
sql := fmt.Sprintf("show %s", database)
data, err := tc.QueryTable(sql)
if err != nil {

View File

@@ -90,8 +90,6 @@ MaxLifetime = 7200
MaxOpenConns = 150
# max idle connections
MaxIdleConns = 50
# enable auto migrate or not
# EnableAutoMigrate = false
[Redis]
# address, ip:port or ip1:port,ip2:port for cluster and sentinel(SentinelAddrs)

10
go.mod
View File

@@ -1,6 +1,6 @@
module github.com/ccfos/nightingale/v6
go 1.22
go 1.23.0
require (
github.com/BurntSushi/toml v1.4.0
@@ -13,7 +13,7 @@ require (
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc
github.com/dgrijalva/jwt-go v3.2.0+incompatible
github.com/expr-lang/expr v1.16.1
github.com/flashcatcloud/ibex v1.3.5
github.com/flashcatcloud/ibex v1.3.6
github.com/gin-contrib/pprof v1.4.0
github.com/gin-gonic/gin v1.9.1
github.com/glebarez/sqlite v1.11.0
@@ -47,7 +47,7 @@ require (
github.com/tidwall/gjson v1.14.2
github.com/toolkits/pkg v1.3.8
golang.org/x/exp v0.0.0-20231006140011-7918f672742d
golang.org/x/oauth2 v0.23.0
golang.org/x/oauth2 v0.27.0
gopkg.in/gomail.v2 v2.0.0-20160411212932-81ebce5c23df
gopkg.in/yaml.v2 v2.4.0
gorm.io/driver/clickhouse v0.6.1
@@ -160,3 +160,7 @@ require (
)
replace golang.org/x/exp v0.0.0-20231006140011-7918f672742d => golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1
replace github.com/olivere/elastic/v7 => github.com/n9e/elastic/v7 v7.0.33-0.20251031061708-f480a2dfcfa7
// replace github.com/flashcatcloud/ibex => ../github.com/flashcatcloud/ibex

12
go.sum
View File

@@ -89,8 +89,8 @@ github.com/fatih/camelcase v1.0.0 h1:hxNvNX/xYBp0ovncs8WyWZrOrpBNub/JfaMvbURyft8
github.com/fatih/camelcase v1.0.0/go.mod h1:yN2Sb0lFhZJUdVvtELVWefmrXpuZESvPmqwoZc+/fpc=
github.com/fatih/structs v1.1.0 h1:Q7juDM0QtcnhCpeyLGQKyg4TOIghuNXrkL32pHAUMxo=
github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M=
github.com/flashcatcloud/ibex v1.3.5 h1:8GOOf5+aJT0TP/MC6izz7CO5JKJSdKVFBwL0vQp93Nc=
github.com/flashcatcloud/ibex v1.3.5/go.mod h1:T8hbMUySK2q6cXUaYp0AUVeKkU9Od2LjzwmB5lmTRBM=
github.com/flashcatcloud/ibex v1.3.6 h1:lJShPFxcZksmkB0w99a3uROGB+Fie1NsqOlkAdar12A=
github.com/flashcatcloud/ibex v1.3.6/go.mod h1:iTU1dKT9TnDNllRPRHUOjXe+HDTQkPH2TeaucHtSuh4=
github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8vw=
github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g=
github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU=
@@ -259,10 +259,10 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU=
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/n9e/elastic/v7 v7.0.33-0.20251031061708-f480a2dfcfa7 h1:fPs1GClmnQZ6E/nzrJCieQKJNM46eqMkHaBg3SoHcgY=
github.com/n9e/elastic/v7 v7.0.33-0.20251031061708-f480a2dfcfa7/go.mod h1:/kVskIy0Pd8nAiKtPtcI4XnzOM+pM6MWQ+zP6YqPVFI=
github.com/oklog/ulid v1.3.1 h1:EGfNDEx6MqHz8B3uNV6QAib1UR2Lm97sHi3ocA6ESJ4=
github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U=
github.com/olivere/elastic/v7 v7.0.32 h1:R7CXvbu8Eq+WlsLgxmKVKPox0oOwAE/2T9Si5BnvK6E=
github.com/olivere/elastic/v7 v7.0.32/go.mod h1:c7PVmLe3Fxq77PIfY/bZmxY/TAamBhCzZ8xDOE09a9k=
github.com/opensearch-project/opensearch-go/v2 v2.3.0 h1:nQIEMr+A92CkhHrZgUhcfsrZjibvB3APXf2a1VwCmMQ=
github.com/opensearch-project/opensearch-go/v2 v2.3.0/go.mod h1:8LDr9FCgUTVoT+5ESjc2+iaZuldqE+23Iq0r1XeNue8=
github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc=
@@ -416,8 +416,8 @@ golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc=
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
golang.org/x/net v0.34.0 h1:Mb7Mrk043xzHgnRM88suvJFwzVrRfHEHJEl5/71CKw0=
golang.org/x/net v0.34.0/go.mod h1:di0qlW3YNM5oh6GqDGQr92MyTozJPmybPK4Ev/Gm31k=
golang.org/x/oauth2 v0.23.0 h1:PbgcYx2W7i4LvjJWEbf0ngHV6qJYr86PkAV3bXdLEbs=
golang.org/x/oauth2 v0.23.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
golang.org/x/oauth2 v0.27.0 h1:da9Vo7/tDv5RH/7nZDz1eMGS/q1Vv1N/7FCrBhI9I3M=
golang.org/x/oauth2 v0.27.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,5 @@
{
"name": "机器常用指标(使用 Categraf 作为采集器,如果只想看当前业务组内的机器修改大盘变量 ident 的变量类型为机器标识即可)",
"name": "机器常用指标(如果只想看当前业务组内的机器修改大盘变量 ident 的变量类型为机器标识即可)",
"tags": "Categraf",
"ident": "",
"uuid": 1737103014612000,

View File

@@ -0,0 +1,300 @@
{
"name": "Host Table NG",
"tags": "Categraf",
"ident": "",
"uuid": 1756720567064000,
"configs": {
"var": [
{
"name": "prom",
"label": "PROM",
"type": "datasource",
"hide": false,
"definition": "prometheus"
},
{
"name": "ident",
"label": "机器",
"type": "query",
"hide": false,
"datasource": {
"cate": "prometheus",
"value": "${prom}"
},
"definition": "label_values(mem_free, ident)",
"multi": true,
"allOption": true
}
],
"panels": [
{
"type": "tableNG",
"id": "306cab0d-f643-4d86-94d0-248fc05fd8a8",
"layout": {
"h": 10,
"w": 24,
"x": 0,
"y": 0,
"i": "306cab0d-f643-4d86-94d0-248fc05fd8a8",
"isResizable": true
},
"version": "3.1.0",
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"targets": [
{
"refId": "A",
"expr": "cpu_usage_active{ident=~\"$ident\"}",
"instant": true
},
{
"expr": "100 - mem_available_percent{ident=~\"$ident\"}",
"__mode__": "__query__",
"refId": "B",
"instant": true
},
{
"expr": "disk_used_percent{path=\"/\", ident=~\"$ident\"}",
"__mode__": "__query__",
"refId": "C",
"instant": true
},
{
"expr": "categraf_info{ident=~\"$ident\"}",
"__mode__": "__query__",
"refId": "D",
"instant": true
}
],
"transformationsNG": [
{
"id": "joinByField",
"options": {
"mode": "outer",
"byField": "ident"
}
},
{
"id": "organize",
"options": {
"fields": [
"ident",
"__time_0",
"__name___0",
"cpu",
"__value_#A",
"__time_1",
"__value_#B",
"__time_2",
"__name___2",
"device",
"fstype",
"mode",
"path",
"__value_#C",
"__time_3",
"__name___3",
"version",
"__value_#D"
],
"renameByName": {
"ident": "机器",
"__value_#A": "CPU利用率%",
"__value_#B": "内存利用率%",
"__value_#C": "根分区利用率%",
"version": "Categraf Version"
},
"excludeByName": {
"__time_0": true,
"__name__": true,
"agent_isp": true,
"agent_region": true,
"cpu": true,
"env": true,
"myenv": true,
"__time_1": true,
"__time_2": true,
"__name___2": true,
"device": true,
"fstype": true,
"mode": true,
"path": true,
"__name___0": true,
"__value_#D": true,
"__time_3": true,
"__name___3": true
},
"indexByName": {
"ident": 0,
"version": 1,
"__time_0": 2,
"__name___0": 3,
"agent_isp": 4,
"agent_region": 5,
"cpu": 6,
"env": 7,
"myenv": 8,
"__value_#A": 9,
"__time_1": 10,
"__value_#B": 11,
"__time_2": 12,
"__name___2": 13,
"device": 14,
"fstype": 15,
"mode": 16,
"path": 17,
"__value_#C": 18,
"__time_3": 19,
"__name___3": 20,
"__value_#D": 21
}
}
}
],
"name": "机器表格样例",
"maxPerRow": 4,
"custom": {
"showHeader": true,
"filterable": true,
"cellOptions": {
"type": "none",
"wrapText": false
}
},
"options": {
"links": [
{
"title": "详情",
"url": "/components/dashboard/detail?__uuid__=1737103014612000&ident=${ident}&prom=${prom}",
"targetBlank": true
}
],
"standardOptions": {
"decimals": 2
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"value": "CPU利用率%"
},
"properties": {
"cellOptions": {
"type": "color-background",
"mode": "lcd",
"valueDisplayMode": "text"
},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "rgb(255, 101, 107)",
"value": 85,
"type": ""
},
{
"color": "rgba(236, 210, 69, 1)",
"value": 70,
"type": ""
},
{
"color": "rgb(44, 157, 61)",
"value": null,
"type": "base"
}
]
},
"valueMappings": [],
"standardOptions": {
"util": "percent",
"decimals": 2,
"min": 0,
"max": 100
}
}
},
{
"matcher": {
"id": "byName",
"value": "内存利用率%"
},
"properties": {
"cellOptions": {
"type": "gauge",
"mode": "lcd",
"valueDisplayMode": "text"
},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "rgb(255, 101, 107)",
"value": 80,
"type": ""
},
{
"color": "rgba(236, 210, 69, 1)",
"value": 60,
"type": ""
},
{
"color": "rgb(44, 157, 61)",
"value": null,
"type": "base"
}
]
},
"standardOptions": {
"util": "percent",
"decimals": 2,
"min": 0,
"max": 100
}
}
},
{
"matcher": {
"id": "byName",
"value": "根分区利用率%"
},
"properties": {
"cellOptions": {
"type": "gauge",
"mode": "basic",
"valueDisplayMode": "text"
},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "rgb(255, 101, 107)",
"value": 90,
"type": ""
},
{
"color": "rgba(236, 210, 69, 1)",
"value": 60,
"type": ""
},
{
"color": "rgb(44, 157, 61)",
"value": null,
"type": "base"
}
]
},
"standardOptions": {
"util": "percent",
"decimals": 2,
"min": 0,
"max": 100
}
}
}
]
}
],
"version": "3.1.0"
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -42,6 +42,11 @@ func (c *CvalCache) initSyncConfigs() {
log.Fatalln("failed to sync configs:", err)
}
err = models.RefreshPhoneEncryptionCache(c.ctx)
if err != nil {
logger.Errorf("failed to refresh phone encryption cache: %v", err)
}
go c.loopSyncConfigs()
}

View File

@@ -240,17 +240,17 @@ func (ncc *NotifyChannelCacheType) startHttpChannel(chID int64, channel *models.
go ncc.startNotifyConsumer(chID, queue, quitCh)
}
logger.Infof("started %d notify consumers for channel %d", concurrency, chID)
logger.Debugf("started %d notify consumers for channel %d", concurrency, chID)
}
// 启动通知消费者协程
func (ncc *NotifyChannelCacheType) startNotifyConsumer(channelID int64, queue *list.SafeListLimited, quitCh chan struct{}) {
logger.Infof("starting notify consumer for channel %d", channelID)
logger.Debugf("starting notify consumer for channel %d", channelID)
for {
select {
case <-quitCh:
logger.Infof("notify consumer for channel %d stopped", channelID)
logger.Debugf("notify consumer for channel %d stopped", channelID)
return
default:
// 从队列中取出任务
@@ -276,6 +276,7 @@ func (ncc *NotifyChannelCacheType) startNotifyConsumer(channelID int64, queue *l
// processNotifyTask 处理通知任务(仅处理 http 类型)
func (ncc *NotifyChannelCacheType) processNotifyTask(task *NotifyTask) {
httpClient := ncc.GetHttpClient(task.NotifyChannel.ID)
logger.Debugf("processNotifyTask: task: %+v", task)
// 现在只处理 http 类型flashduty 保持直接发送
if task.NotifyChannel.RequestType == "http" {
@@ -283,7 +284,7 @@ func (ncc *NotifyChannelCacheType) processNotifyTask(task *NotifyTask) {
start := time.Now()
resp, err := task.NotifyChannel.SendHTTP(task.Events, task.TplContent, task.CustomParams, task.Sendtos, httpClient)
resp = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), resp)
logger.Infof("notify_id: %d, channel_name: %v, event:%+v, tplContent:%v, customParams:%v, userInfo:%+v, respBody: %v, err: %v",
logger.Infof("http_sendernotify_id: %d, channel_name: %v, event:%+v, tplContent:%v, customParams:%v, userInfo:%+v, respBody: %v, err: %v",
task.NotifyRuleId, task.NotifyChannel.Name, task.Events[0], task.TplContent, task.CustomParams, task.Sendtos, resp, err)
// 调用通知记录回调函数
@@ -294,8 +295,8 @@ func (ncc *NotifyChannelCacheType) processNotifyTask(task *NotifyTask) {
for i := range task.Sendtos {
start := time.Now()
resp, err := task.NotifyChannel.SendHTTP(task.Events, task.TplContent, task.CustomParams, []string{task.Sendtos[i]}, httpClient)
resp = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), resp)
logger.Infof("notify_id: %d, channel_name: %v, event:%+v, tplContent:%v, customParams:%v, userInfo:%+v, respBody: %v, err: %v",
resp = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), resp)
logger.Infof("http_sender notify_id: %d, channel_name: %v, event:%+v, tplContent:%v, customParams:%v, userInfo:%+v, respBody: %v, err: %v",
task.NotifyRuleId, task.NotifyChannel.Name, task.Events[0], task.TplContent, task.CustomParams, task.Sendtos[i], resp, err)
// 调用通知记录回调函数
@@ -448,7 +449,7 @@ func (ncc *NotifyChannelCacheType) startEmailSender(chID int64, smtp *models.SMT
logger.Warning("SMTP configurations invalid")
return
}
logger.Infof("start email sender... conf.Host:%+v,conf.Port:%+v", conf.Host, conf.Port)
logger.Debugf("start email sender... conf.Host:%+v,conf.Port:%+v", conf.Host, conf.Port)
d := gomail.NewDialer(conf.Host, conf.Port, conf.Username, conf.Password)
if conf.InsecureSkipVerify {
@@ -502,7 +503,11 @@ func (ncc *NotifyChannelCacheType) startEmailSender(chID int64, smtp *models.SMT
m.Mail.GetHeader("Subject"), m.Mail.GetHeader("To"))
}
// sender.NotifyRecord(ncc.ctx, m.Events, m.NotifyRuleId, models.Email, strings.Join(m.Mail.GetHeader("To"), ","), "", err)
// 记录通知详情
if ncc.notifyRecordFunc != nil {
target := strings.Join(m.Mail.GetHeader("To"), ",")
ncc.notifyRecordFunc(ncc.ctx, m.Events, m.NotifyRuleId, "Email", target, "success", err)
}
size++
if size >= conf.Batch {

View File

@@ -78,9 +78,12 @@ type AlertCurEvent struct {
RuleHash string `json:"rule_hash" gorm:"-"`
ExtraInfoMap []map[string]string `json:"extra_info_map" gorm:"-"`
NotifyRuleIds []int64 `json:"notify_rule_ids" gorm:"serializer:json"`
NotifyRuleId int64 `json:"notify_rule_id" gorm:"-"`
NotifyRuleName string `json:"notify_rule_name" gorm:"-"`
NotifyVersion int `json:"notify_version" gorm:"-"` // 0: old, 1: new
NotifyRules []*EventNotifyRule `json:"notify_rules" gorm:"-"`
RecoverTime int64 `json:"recover_time" gorm:"-"`
}
type EventNotifyRule struct {
@@ -495,6 +498,23 @@ func (e *AlertCurEvent) FE2DB() {
}
func (e *AlertCurEvent) FillTagsMap() {
e.TagsMap = make(map[string]string)
for i := 0; i < len(e.TagsJSON); i++ {
pair := strings.TrimSpace(e.TagsJSON[i])
if pair == "" {
continue
}
arr := strings.SplitN(pair, "=", 2)
if len(arr) != 2 {
continue
}
e.TagsMap[arr[0]] = arr[1]
}
}
func (e *AlertCurEvent) DB2Mem() {
e.IsRecovered = false
e.NotifyGroupsJSON = strings.Fields(e.NotifyGroups)

View File

@@ -127,7 +127,7 @@ func (e *AlertHisEvent) FillNotifyGroups(ctx *ctx.Context, cache map[int64]*User
func AlertHisEventTotal(
ctx *ctx.Context, prods []string, bgids []int64, stime, etime int64, severity int,
recovered int, dsIds []int64, cates []string, ruleId int64, query string) (int64, error) {
recovered int, dsIds []int64, cates []string, ruleId int64, query string, eventIds []int64) (int64, error) {
session := DB(ctx).Model(&AlertHisEvent{}).Where("last_eval_time between ? and ?", stime, etime)
if len(prods) > 0 {
@@ -158,6 +158,10 @@ func AlertHisEventTotal(
session = session.Where("rule_id = ?", ruleId)
}
if len(eventIds) > 0 {
session = session.Where("id in ?", eventIds)
}
if query != "" {
arr := strings.Fields(query)
for i := 0; i < len(arr); i++ {
@@ -171,7 +175,7 @@ func AlertHisEventTotal(
func AlertHisEventGets(ctx *ctx.Context, prods []string, bgids []int64, stime, etime int64,
severity int, recovered int, dsIds []int64, cates []string, ruleId int64, query string,
limit, offset int) ([]AlertHisEvent, error) {
limit, offset int, eventIds []int64) ([]AlertHisEvent, error) {
session := DB(ctx).Where("last_eval_time between ? and ?", stime, etime)
if len(prods) != 0 {
@@ -202,6 +206,10 @@ func AlertHisEventGets(ctx *ctx.Context, prods []string, bgids []int64, stime, e
session = session.Where("rule_id = ?", ruleId)
}
if len(eventIds) > 0 {
session = session.Where("id in ?", eventIds)
}
if query != "" {
arr := strings.Fields(query)
for i := 0; i < len(arr); i++ {
@@ -415,6 +423,10 @@ func (e *AlertHisEvent) ToCur() *AlertCurEvent {
NotifyChannelsJSON: e.NotifyChannelsJSON,
NotifyGroupsJSON: e.NotifyGroupsJSON,
OriginalTagsJSON: e.OriginalTagsJSON,
NotifyRuleIds: e.NotifyRuleIds,
NotifyRules: e.NotifyRules,
NotifyVersion: e.NotifyVersion,
RecoverTime: e.RecoverTime,
}
cur.SetTagsMap()

View File

@@ -20,7 +20,7 @@ type TagFilter struct {
Key string `json:"key"` // tag key
Func string `json:"func"` // `==` | `=~` | `in` | `!=` | `!~` | `not in`
Op string `json:"op"` // `==` | `=~` | `in` | `!=` | `!~` | `not in`
Value string `json:"value"` // tag value
Value interface{} `json:"value"` // tag value
Regexp *regexp.Regexp // parse value to regexp if func = '=~' or '!~'
Vset map[string]struct{} // parse value to regexp if func = 'in' or 'not in'
}
@@ -46,15 +46,59 @@ func ParseTagFilter(bFilters []TagFilter) ([]TagFilter, error) {
var err error
for i := 0; i < len(bFilters); i++ {
if bFilters[i].Func == "=~" || bFilters[i].Func == "!~" {
bFilters[i].Regexp, err = regexp.Compile(bFilters[i].Value)
// 这里存在两个情况,一个是 string 一个是 int
var pattern string
switch v := bFilters[i].Value.(type) {
case string:
pattern = v
case int:
pattern = strconv.Itoa(v)
default:
return nil, fmt.Errorf("unsupported value type for regex: %T", v)
}
bFilters[i].Regexp, err = regexp.Compile(pattern)
if err != nil {
return nil, err
}
} else if bFilters[i].Func == "in" || bFilters[i].Func == "not in" {
arr := strings.Fields(bFilters[i].Value)
// 这里存在两个情况,一个是 string 一个是[]int
bFilters[i].Vset = make(map[string]struct{})
for j := 0; j < len(arr); j++ {
bFilters[i].Vset[arr[j]] = struct{}{}
switch v := bFilters[i].Value.(type) {
case string:
// 处理字符串情况
arr := strings.Fields(v)
for j := 0; j < len(arr); j++ {
bFilters[i].Vset[arr[j]] = struct{}{}
}
case []int:
// 处理[]int情况
for j := 0; j < len(v); j++ {
bFilters[i].Vset[strconv.Itoa(v[j])] = struct{}{}
}
case []string:
for j := 0; j < len(v); j++ {
bFilters[i].Vset[v[j]] = struct{}{}
}
case []interface{}:
// 处理[]interface{}情况JSON解析可能产生
for j := 0; j < len(v); j++ {
switch item := v[j].(type) {
case string:
bFilters[i].Vset[item] = struct{}{}
case int:
bFilters[i].Vset[strconv.Itoa(item)] = struct{}{}
case float64:
bFilters[i].Vset[strconv.Itoa(int(item))] = struct{}{}
}
}
default:
// 兜底处理,转为字符串
str := fmt.Sprintf("%v", v)
arr := strings.Fields(str)
for j := 0; j < len(arr); j++ {
bFilters[i].Vset[arr[j]] = struct{}{}
}
}
}
}
@@ -73,15 +117,54 @@ func GetTagFilters(jsonArr ormx.JSONArr) ([]TagFilter, error) {
}
for i := 0; i < len(bFilters); i++ {
if bFilters[i].Func == "=~" || bFilters[i].Func == "!~" {
bFilters[i].Regexp, err = regexp.Compile(bFilters[i].Value)
var pattern string
switch v := bFilters[i].Value.(type) {
case string:
pattern = v
case int:
pattern = strconv.Itoa(v)
default:
return nil, fmt.Errorf("unsupported value type for regex: %T", v)
}
bFilters[i].Regexp, err = regexp.Compile(pattern)
if err != nil {
return nil, err
}
} else if bFilters[i].Func == "in" || bFilters[i].Func == "not in" {
arr := strings.Fields(bFilters[i].Value)
bFilters[i].Vset = make(map[string]struct{})
for j := 0; j < len(arr); j++ {
bFilters[i].Vset[arr[j]] = struct{}{}
// 在GetTagFilters中Value通常是string类型但也要处理其他可能的类型
switch v := bFilters[i].Value.(type) {
case string:
// 处理字符串情况
arr := strings.Fields(v)
for j := 0; j < len(arr); j++ {
bFilters[i].Vset[arr[j]] = struct{}{}
}
case []int:
// 处理[]int情况
for j := 0; j < len(v); j++ {
bFilters[i].Vset[strconv.Itoa(v[j])] = struct{}{}
}
case []interface{}:
// 处理[]interface{}情况JSON解析可能产生
for j := 0; j < len(v); j++ {
switch item := v[j].(type) {
case string:
bFilters[i].Vset[item] = struct{}{}
case int:
bFilters[i].Vset[strconv.Itoa(item)] = struct{}{}
case float64:
bFilters[i].Vset[strconv.Itoa(int(item))] = struct{}{}
}
}
default:
// 兜底处理,转为字符串
str := fmt.Sprintf("%v", v)
arr := strings.Fields(str)
for j := 0; j < len(arr); j++ {
bFilters[i].Vset[arr[j]] = struct{}{}
}
}
}
}
@@ -147,7 +230,7 @@ func AlertMuteGet(ctx *ctx.Context, where string, args ...interface{}) (*AlertMu
return lst[0], err
}
func AlertMuteGets(ctx *ctx.Context, prods []string, bgid int64, disabled int, query string) (lst []AlertMute, err error) {
func AlertMuteGets(ctx *ctx.Context, prods []string, bgid int64, disabled int, expired int, query string) (lst []AlertMute, err error) {
session := DB(ctx)
if bgid != -1 {
@@ -166,6 +249,15 @@ func AlertMuteGets(ctx *ctx.Context, prods []string, bgid int64, disabled int, q
}
}
if expired != -1 {
now := time.Now().Unix()
if expired == 1 {
session = session.Where("mute_time_type = ? AND etime < ?", TimeRange, now)
} else {
session = session.Where("(mute_time_type = ? AND etime >= ?) OR mute_time_type = ?", TimeRange, now, Periodic)
}
}
if query != "" {
arr := strings.Fields(query)
for i := 0; i < len(arr); i++ {
@@ -395,7 +487,7 @@ func AlertMuteGetsAll(ctx *ctx.Context) ([]*AlertMute, error) {
// get my cluster's mutes
var lst []*AlertMute
if !ctx.IsCenter {
lst, err := poster.GetByUrls[[]*AlertMute](ctx, "/v1/n9e/alert-mutes?disabled=0")
lst, err := poster.GetByUrls[[]*AlertMute](ctx, "/v1/n9e/active-alert-mutes")
if err != nil {
return nil, err
}
@@ -407,6 +499,10 @@ func AlertMuteGetsAll(ctx *ctx.Context) ([]*AlertMute, error) {
session := DB(ctx).Model(&AlertMute{}).Where("disabled = 0")
// 只筛选在生效时间内的屏蔽规则, 这里 btime < now+10 是为了避免同步期间有规则满足了生效时间条件
now := time.Now().Unix()
session = session.Where("(mute_time_type = ? AND btime <= ? AND etime >= ?) OR mute_time_type = ?", TimeRange, now+10, now, Periodic)
err := session.Find(&lst).Error
if err != nil {
return nil, err

View File

@@ -119,6 +119,7 @@ type AlertRule struct {
UpdateByNickname string `json:"update_by_nickname" gorm:"-"` // for fe
CronPattern string `json:"cron_pattern"`
NotifyRuleIds []int64 `json:"notify_rule_ids" gorm:"serializer:json"`
PipelineConfigs []PipelineConfig `json:"pipeline_configs" gorm:"serializer:json"`
NotifyVersion int `json:"notify_version"` // 0: old, 1: new
}
@@ -380,8 +381,7 @@ func GetHostsQuery(queries []HostQuery) []map[string]interface{} {
if q.Op == "==" {
m["target_busi_group.group_id in (?)"] = ids
} else {
m["target.ident not in (select target_ident "+
"from target_busi_group where group_id in (?))"] = ids
m["NOT EXISTS (SELECT 1 FROM target_busi_group tbg WHERE tbg.target_ident = target.ident AND tbg.group_id IN (?))"] = ids
}
case "tags":
lst := []string{}
@@ -735,6 +735,15 @@ func (ar *AlertRule) UpdateColumn(ctx *ctx.Context, column string, value interfa
return DB(ctx).Model(ar).Updates(updates).Error
}
if column == "notify_groups" || column == "notify_channels" {
updates := map[string]interface{}{
column: value,
"notify_version": 0,
"notify_rule_ids": []int64{},
}
return DB(ctx).Model(ar).Updates(updates).Error
}
return DB(ctx).Model(ar).UpdateColumn(column, value).Error
}
@@ -892,7 +901,8 @@ func (ar *AlertRule) FE2DB() error {
}
ar.AlgoParams = string(algoParamsByte)
if ar.RuleConfigJson == nil {
// 老的规则,是 PromQl 和 Severity 字段,新版的规则,使用 RuleConfig 字段
if ar.RuleConfigJson == nil || len(ar.PromQl) > 0 {
query := PromQuery{
PromQl: ar.PromQl,
Severity: ar.Severity,
@@ -977,6 +987,8 @@ func (ar *AlertRule) DB2FE() error {
return err
}
ar.FillSeverities()
return nil
}
@@ -1008,11 +1020,8 @@ func AlertRuleExists(ctx *ctx.Context, id, groupId int64, name string) (bool, er
if err != nil {
return false, err
}
if len(lst) == 0 {
return false, nil
}
return false, nil
return len(lst) > 0, nil
}
func GetAlertRuleIdsByTaskId(ctx *ctx.Context, taskId int64) ([]int64, error) {

View File

@@ -116,7 +116,18 @@ func (s *AlertSubscribe) Verify() error {
return errors.New("severities is required")
}
if len(s.NotifyRuleIds) > 0 {
if s.NotifyVersion == 1 {
if len(s.NotifyRuleIds) == 0 {
return errors.New("no notify rules selected")
}
s.UserGroupIds = ""
s.RedefineChannels = 0
s.NewChannels = ""
s.RedefineWebhooks = 0
s.Webhooks = ""
s.RedefineSeverity = 0
s.NewSeverity = 0
return nil
}
@@ -132,8 +143,8 @@ func (s *AlertSubscribe) Verify() error {
}
}
if s.NotifyVersion == 1 && len(s.NotifyRuleIds) == 0 {
return errors.New("no notify rules selected")
if s.NotifyVersion == 0 {
s.NotifyRuleIds = []int64{}
}
return nil
@@ -381,6 +392,17 @@ func (s *AlertSubscribe) MatchProd(prod string) bool {
return s.Prod == prod
}
func (s *AlertSubscribe) MatchCate(cate string) bool {
if s.Cate == "" {
return true
}
if s.Cate == "host" {
return cate == "host"
}
return true
}
func (s *AlertSubscribe) MatchCluster(dsId int64) bool {
// 没有配置数据源, 或者事件不需要关联数据源
// do not match any datasource or event not related to datasource

View File

@@ -5,6 +5,7 @@ import (
"log"
"os"
"regexp"
"sync"
"time"
"github.com/ccfos/nightingale/v6/pkg/ctx"
@@ -40,13 +41,68 @@ var (
)
const (
SALT = "salt"
RSA_PRIVATE_KEY = "rsa_private_key"
RSA_PUBLIC_KEY = "rsa_public_key"
RSA_PASSWORD = "rsa_password"
JWT_SIGNING_KEY = "jwt_signing_key"
SALT = "salt"
RSA_PRIVATE_KEY = "rsa_private_key"
RSA_PUBLIC_KEY = "rsa_public_key"
RSA_PASSWORD = "rsa_password"
JWT_SIGNING_KEY = "jwt_signing_key"
PHONE_ENCRYPTION_ENABLED = "phone_encryption_enabled" // 手机号加密开关
)
// 手机号加密配置缓存
var (
phoneEncryptionCache struct {
sync.RWMutex
enabled bool
privateKey []byte
publicKey []byte
password string
loaded bool
}
)
// LoadPhoneEncryptionConfig 加载手机号加密配置到缓存
func LoadPhoneEncryptionConfig(ctx *ctx.Context) error {
enabled, err := GetPhoneEncryptionEnabled(ctx)
if err != nil {
return errors.WithMessage(err, "failed to get phone encryption enabled")
}
privateKey, publicKey, password, err := GetRSAKeys(ctx)
if err != nil {
return errors.WithMessage(err, "failed to get RSA keys")
}
phoneEncryptionCache.Lock()
defer phoneEncryptionCache.Unlock()
phoneEncryptionCache.enabled = enabled
phoneEncryptionCache.privateKey = privateKey
phoneEncryptionCache.publicKey = publicKey
phoneEncryptionCache.password = password
phoneEncryptionCache.loaded = true
logger.Debugf("Phone encryption config loaded: enabled=%v", enabled)
return nil
}
// GetPhoneEncryptionConfigFromCache 从缓存获取手机号加密配置
func GetPhoneEncryptionConfigFromCache() (enabled bool, publicKey []byte, privateKey []byte, password string, loaded bool) {
phoneEncryptionCache.RLock()
defer phoneEncryptionCache.RUnlock()
return phoneEncryptionCache.enabled,
phoneEncryptionCache.publicKey,
phoneEncryptionCache.privateKey,
phoneEncryptionCache.password,
phoneEncryptionCache.loaded
}
// RefreshPhoneEncryptionCache 刷新缓存(在修改配置后调用)
func RefreshPhoneEncryptionCache(ctx *ctx.Context) error {
return LoadPhoneEncryptionConfig(ctx)
}
func InitJWTSigningKey(ctx *ctx.Context) string {
val, err := ConfigsGet(ctx, JWT_SIGNING_KEY)
if err != nil {
@@ -198,6 +254,41 @@ func ConfigsGetFlashDutyAppKey(ctx *ctx.Context) (string, error) {
return configs[0].Cval, nil
}
// GetPhoneEncryptionEnabled 获取手机号加密是否开启
func GetPhoneEncryptionEnabled(ctx *ctx.Context) (bool, error) {
val, err := ConfigsGet(ctx, PHONE_ENCRYPTION_ENABLED)
if err != nil {
return false, err
}
return val == "true" || val == "1", nil
}
// SetPhoneEncryptionEnabled 设置手机号加密开关
func SetPhoneEncryptionEnabled(ctx *ctx.Context, enabled bool) error {
val := "false"
if enabled {
val = "true"
}
return ConfigsSet(ctx, PHONE_ENCRYPTION_ENABLED, val)
}
// GetRSAKeys 获取RSA密钥对
func GetRSAKeys(ctx *ctx.Context) (privateKey []byte, publicKey []byte, password string, err error) {
privateKeyVal, err := ConfigsGet(ctx, RSA_PRIVATE_KEY)
if err != nil {
return nil, nil, "", errors.WithMessage(err, "failed to get RSA private key")
}
publicKeyVal, err := ConfigsGet(ctx, RSA_PUBLIC_KEY)
if err != nil {
return nil, nil, "", errors.WithMessage(err, "failed to get RSA public key")
}
passwordVal, err := ConfigsGet(ctx, RSA_PASSWORD)
if err != nil {
return nil, nil, "", errors.WithMessage(err, "failed to get RSA password")
}
return []byte(privateKeyVal), []byte(publicKeyVal), passwordVal, nil
}
func ConfigsSelectByCkey(ctx *ctx.Context, ckey string) ([]Configs, error) {
if !ctx.IsCenter {
return []Configs{}, nil

View File

@@ -13,6 +13,7 @@ import (
"github.com/pkg/errors"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/str"
"gorm.io/gorm"
)
type Datasource struct {
@@ -140,7 +141,7 @@ func (ds *Datasource) Update(ctx *ctx.Context, selectField interface{}, selectFi
if ds.UpdatedAt == 0 {
ds.UpdatedAt = time.Now().Unix()
}
return DB(ctx).Model(ds).Select(selectField, selectFields...).Updates(ds).Error
return DB(ctx).Model(ds).Session(&gorm.Session{SkipHooks: true}).Select(selectField, selectFields...).Updates(ds).Error
}
func (ds *Datasource) Add(ctx *ctx.Context) error {

View File

@@ -16,7 +16,7 @@ type EventPipeline struct {
TeamIds []int64 `json:"team_ids" gorm:"type:text;serializer:json"`
TeamNames []string `json:"team_names" gorm:"-"`
Description string `json:"description" gorm:"type:varchar(255)"`
FilterEnable bool `json:"filter_enable" gorm:"type:bigint"`
FilterEnable bool `json:"filter_enable" gorm:"type:boolean"`
LabelFilters []TagFilter `json:"label_filters" gorm:"type:text;serializer:json"`
AttrFilters []TagFilter `json:"attribute_filters" gorm:"type:text;serializer:json"`
ProcessorConfigs []ProcessorConfig `json:"processors" gorm:"type:text;serializer:json"`

View File

@@ -209,7 +209,6 @@ func (t MsgTplList) IfUsed(nr *NotifyRule) bool {
const (
DingtalkTitle = `{{if $event.IsRecovered}} Recovered {{else}}Triggered{{end}}: {{$event.RuleName}}`
FeishuCardTitle = `🔔 {{$event.RuleName}}`
FeishuAppTitle = `{{- if $event.IsRecovered }}🔔 ﹝恢复﹞ {{$event.RuleName}}{{- else }}🔔 ﹝告警﹞ {{$event.RuleName}}{{- end -}}`
LarkCardTitle = `🔔 {{$event.RuleName}}`
)
@@ -249,12 +248,7 @@ var NewTplMap = map[string]string{
{{- end}}
{{end}}
{{$domain := "http://127.0.0.1:17000" }}
{{$mutelink := print $domain "/alert-mutes/add?busiGroup=" $event.GroupId "&cate=" $event.Cate "&datasource_ids=" $event.DatasourceId "&prod=" $event.RuleProd}}
{{- range $key, $value := $event.TagsMap}}
{{- $encodedValue := $value | urlquery }}
{{- $mutelink = print $mutelink "&tags=" $key "%3D" $encodedValue}}
{{- end}}
[事件详情]({{$domain}}/alert-his-events/{{$event.Id}}) | [屏蔽1小时]({{$mutelink}}) | [查看曲线]({{$domain}}/metric/explorer?data_source_id={{$event.DatasourceId}}&data_source_name=prometheus&mode=graph&prom_ql={{$event.PromQl|urlquery}})`,
[事件详情]({{$domain}}/alert-his-events/{{$event.Id}}) | [屏蔽1小时]({{$domain}}/alert-mutes/add?__event_id={{$event.Id}}){{if eq $event.Cate "prometheus"}} | [查看曲线]({{$domain}}/metric/explorer?__event_id={{$event.Id}}&mode=graph){{end}}`,
Email: `<!DOCTYPE html>
<html lang="en">
<head>
@@ -483,8 +477,8 @@ var NewTplMap = map[string]string{
{{if $event.IsRecovered}}恢复时间:{{timeformat $event.LastEvalTime}}{{else}}触发时间: {{timeformat $event.TriggerTime}}
触发时值: {{$event.TriggerValue}}{{end}}
发送时间: {{timestamp}}{{$domain := "http://127.0.0.1:17000" }}
事件详情: {{$domain}}/alert-his-events/{{$event.Id}}{{$muteUrl := print $domain "/alert-mutes/add?busiGroup=" $event.GroupId "&cate=" $event.Cate "&datasource_ids=" $event.DatasourceId "&prod=" $event.RuleProd}}{{range $key, $value := $event.TagsMap}}{{$muteUrl = print $muteUrl "&tags=" $key "%3D" $value}}{{end}}
屏蔽1小时: {{ unescaped $muteUrl }}`,
事件详情: {{$domain}}/alert-his-events/{{$event.Id}}
屏蔽1小时: {{$domain}}/alert-mutes/add?__event_id={{$event.Id}}`,
FeishuCard: `{{- if $event.IsRecovered -}}
{{- if ne $event.Cate "host" -}}
**告警集群:** {{$event.Cluster}}{{end}}
@@ -511,7 +505,7 @@ var NewTplMap = map[string]string{
{{- end}}
{{- end}}
{{$domain := "http://请联系管理员修改通知模板将域名替换为实际的域名" }}
[事件详情]({{$domain}}/alert-his-events/{{$event.Id}})|[屏蔽1小时]({{$domain}}/alert-mutes/add?busiGroup={{$event.GroupId}}&cate={{$event.Cate}}&datasource_ids={{$event.DatasourceId}}&prod={{$event.RuleProd}}{{range $key, $value := $event.TagsMap}}&tags={{$key}}%3D{{$value}}{{end}})|[查看曲线]({{$domain}}/metric/explorer?data_source_id={{$event.DatasourceId}}&data_source_name=prometheus&mode=graph&prom_ql={{$event.PromQl|escape}})`,
[事件详情]({{$domain}}/alert-his-events/{{$event.Id}})|[屏蔽1小时]({{$domain}}/alert-mutes/add?__event_id={{$event.Id}}){{if eq $event.Cate "prometheus"}}|[查看曲线]({{$domain}}/metric/explorer?__event_id={{$event.Id}}&mode=graph){{end}}`,
EmailSubject: `{{if $event.IsRecovered}}Recovered{{else}}Triggered{{end}}: {{$event.RuleName}} {{$event.TagsJSON}}`,
Mm: `级别状态: S{{$event.Severity}} {{if $event.IsRecovered}}Recovered{{else}}Triggered{{end}}
规则名称: {{$event.RuleName}}{{if $event.RuleNote}}
@@ -540,7 +534,7 @@ var NewTplMap = map[string]string{
{{$time_duration := sub now.Unix $event.FirstTriggerTime }}{{if $event.IsRecovered}}{{$time_duration = sub $event.LastEvalTime $event.FirstTriggerTime }}{{end}}**距离首次告警**: {{humanizeDurationInterface $time_duration}}
**发送时间**: {{timestamp}}
{{$domain := "http://请联系管理员修改通知模板将域名替换为实际的域名" }}
[事件详情]({{$domain}}/alert-his-events/{{$event.Id}})|[屏蔽1小时]({{$domain}}/alert-mutes/add?busiGroup={{$event.GroupId}}&cate={{$event.Cate}}&datasource_ids={{$event.DatasourceId}}&prod={{$event.RuleProd}}{{range $key, $value := $event.TagsMap}}&tags={{$key}}%3D{{$value}}{{end}})|[查看曲线]({{$domain}}/metric/explorer?data_source_id={{$event.DatasourceId}}&data_source_name=prometheus&mode=graph&prom_ql={{$event.PromQl|escape}})`,
[事件详情]({{$domain}}/alert-his-events/{{$event.Id}})|[屏蔽1小时]({{$domain}}/alert-mutes/add?__event_id={{$event.Id}}){{if eq $event.Cate "prometheus"}}|[查看曲线]({{$domain}}/metric/explorer?__event_id={{$event.Id}}&mode=graph){{end}}`,
Lark: `级别状态: S{{$event.Severity}} {{if $event.IsRecovered}}Recovered{{else}}Triggered{{end}}
规则名称: {{$event.RuleName}}{{if $event.RuleNote}}
规则备注: {{$event.RuleNote}}{{end}}
@@ -550,7 +544,7 @@ var NewTplMap = map[string]string{
发送时间: {{timestamp}}
{{$domain := "http://请联系管理员修改通知模板将域名替换为实际的域名" }}
事件详情: {{$domain}}/alert-his-events/{{$event.Id}}
屏蔽1小时: {{$domain}}/alert-mutes/add?busiGroup={{$event.GroupId}}&cate={{$event.Cate}}&datasource_ids={{$event.DatasourceId}}&prod={{$event.RuleProd}}{{range $key, $value := $event.TagsMap}}&tags={{$key}}%3D{{$value}}{{end}}`,
屏蔽1小时: {{$domain}}/alert-mutes/add?__event_id={{$event.Id}}`,
LarkCard: `{{ if $event.IsRecovered }}
{{- if ne $event.Cate "host"}}
**告警集群:** {{$event.Cluster}}{{end}}
@@ -573,7 +567,7 @@ var NewTplMap = map[string]string{
{{if $event.RuleNote }}**告警描述:** **{{$event.RuleNote}}**{{end}}
{{- end -}}
{{$domain := "http://请联系管理员修改通知模板将域名替换为实际的域名" }}
[事件详情]({{$domain}}/alert-his-events/{{$event.Id}})|[屏蔽1小时]({{$domain}}/alert-mutes/add?busiGroup={{$event.GroupId}}&cate={{$event.Cate}}&datasource_ids={{$event.DatasourceId}}&prod={{$event.RuleProd}}{{range $key, $value := $event.TagsMap}}&tags={{$key}}%3D{{$value}}{{end}})|[查看曲线]({{$domain}}/metric/explorer?data_source_id={{$event.DatasourceId}}&data_source_name=prometheus&mode=graph&prom_ql={{$event.PromQl|escape}})`,
[事件详情]({{$domain}}/alert-his-events/{{$event.Id}})|[屏蔽1小时]({{$domain}}/alert-mutes/add?__event_id={{$event.Id}}){{if eq $event.Cate "prometheus"}}|[查看曲线]({{$domain}}/metric/explorer?__event_id={{$event.Id}}&mode=graph){{end}}`,
SlackWebhook: `{{ if $event.IsRecovered }}
{{- if ne $event.Cate "host"}}
*Alarm cluster:* {{$event.Cluster}}{{end}}
@@ -600,8 +594,8 @@ var NewTplMap = map[string]string{
{{$domain := "http://127.0.0.1:17000" }}
<{{$domain}}/alert-his-events/{{$event.Id}}|Event Details>
<{{$domain}}/alert-mutes/add?busiGroup={{$event.GroupId}}&cate={{$event.Cate}}&datasource_ids={{$event.DatasourceId}}&prod={{$event.RuleProd}}{{range $key, $value := $event.TagsMap}}&tags={{$key}}%3D{{$value}}{{end}}|Block for 1 hour>
<{{$domain}}/metric/explorer?data_source_id={{$event.DatasourceId}}&data_source_name=prometheus&mode=graph&prom_ql={{$event.PromQl|escape}}|View Curve>`,
<{{$domain}}/alert-mutes/add?__event_id={{$event.Id}}|Block for 1 hour>
<{{$domain}}/metric/explorer?__event_id={{$event.Id}}&mode=graph|View Curve>`,
Discord: `**Level Status**: {{if $event.IsRecovered}}S{{$event.Severity}} Recovered{{else}}S{{$event.Severity}} Triggered{{end}}
**Rule Title**: {{$event.RuleName}}{{if $event.RuleNote}}
**Rule Note**: {{$event.RuleNote}}{{end}}{{if $event.TargetIdent}}
@@ -613,12 +607,7 @@ var NewTplMap = map[string]string{
**Send Time**: {{timestamp}}
{{$domain := "http://127.0.0.1:17000" }}
{{$mutelink := print $domain "/alert-mutes/add?busiGroup=" $event.GroupId "&cate=" $event.Cate "&datasource_ids=" $event.DatasourceId "&prod=" $event.RuleProd}}
{{- range $key, $value := $event.TagsMap}}
{{- $encodedValue := $value | urlquery }}
{{- $mutelink = print $mutelink "&tags=" $key "%3D" $encodedValue}}
{{- end}}
[Event Details]({{$domain}}/alert-his-events/{{$event.Id}}) | [Silence 1h]({{$mutelink}}) | [View Graph]({{$domain}}/metric/explorer?data_source_id={{$event.DatasourceId}}&data_source_name=prometheus&mode=graph&prom_ql={{$event.PromQl|urlquery}})`,
[Event Details]({{$domain}}/alert-his-events/{{$event.Id}}) | [Silence 1h]({{$domain}}/alert-mutes/add?__event_id={{$event.Id}}) | [View Graph]({{$domain}}/metric/explorer?__event_id={{$event.Id}}&mode=graph)`,
MattermostWebhook: `{{ if $event.IsRecovered }}
{{- if ne $event.Cate "host"}}
@@ -640,26 +629,7 @@ var NewTplMap = map[string]string{
{{if $event.RuleNote }}**Alarm description:** **{{$event.RuleNote}}**{{end}}
{{- end -}}
{{$domain := "http://127.0.0.1:17000" }}
[Event Details]({{$domain}}/alert-his-events/{{$event.Id}})|[Block for 1 hour]({{$domain}}/alert-mutes/add?busiGroup={{$event.GroupId}}&cate={{$event.Cate}}&datasource_ids={{$event.DatasourceId}}&prod={{$event.RuleProd}}{{range $key, $value := $event.TagsMap}}&tags={{$key}}%3D{{$value}}{{end}})|[View Curve]({{$domain}}/metric/explorer?data_source_id={{$event.DatasourceId}}&data_source_name=prometheus&mode=graph&prom_ql={{$event.PromQl|escape}})`,
FeishuApp: `{{- if $event.IsRecovered -}}
{{- if ne $event.Cate "host" -}}
**告警集群:** {{$event.Cluster}}{{end}}
**级别状态:** S{{$event.Severity}} Recovered
**告警名称:** {{$event.RuleName}}
**事件标签:** {{$event.TagsJSON}}
**恢复时间:** {{timeformat $event.LastEvalTime}}
**告警描述:** **服务已恢复**
{{- else }}
{{- if ne $event.Cate "host"}}
**告警集群:** {{$event.Cluster}}{{end}}
**级别状态:** S{{$event.Severity}} Triggered
**告警名称:** {{$event.RuleName}}
**事件标签:** {{$event.TagsJSON}}
**触发时间:** {{timeformat $event.TriggerTime}}
**发送时间:** {{timestamp}}
**触发时值:** {{$event.TriggerValue}}
{{if $event.RuleNote }}**告警描述:** **{{$event.RuleNote}}**{{end}}
{{- end -}}`,
[Event Details]({{$domain}}/alert-his-events/{{$event.Id}})|[Block for 1 hour]({{$domain}}/alert-mutes/add?__event_id={{$event.Id}})|[View Curve]({{$domain}}/metric/explorer?__event_id={{$event.Id}}&mode=graph)`,
}
var MsgTplMap = []MessageTemplate{
@@ -678,7 +648,6 @@ var MsgTplMap = []MessageTemplate{
{Name: "Lark", Ident: Lark, Weight: 5, Content: map[string]string{"content": NewTplMap[Lark]}},
{Name: "Feishu", Ident: Feishu, Weight: 4, Content: map[string]string{"content": NewTplMap[Feishu]}},
{Name: "FeishuCard", Ident: FeishuCard, Weight: 4, Content: map[string]string{"title": FeishuCardTitle, "content": NewTplMap[FeishuCard]}},
{Name: "FeishuApp", Ident: FeishuApp, Weight: 4, Content: map[string]string{"title": FeishuAppTitle, "content": NewTplMap[FeishuApp]}},
{Name: "Wecom", Ident: Wecom, Weight: 3, Content: map[string]string{"content": NewTplMap[Wecom]}},
{Name: "Dingtalk", Ident: Dingtalk, Weight: 2, Content: map[string]string{"title": NewTplMap[EmailSubject], "content": NewTplMap[Dingtalk]}},
{Name: "Email", Ident: Email, Weight: 1, Content: map[string]string{"subject": NewTplMap[EmailSubject], "content": NewTplMap[Email]}},
@@ -724,19 +693,33 @@ func (t *MessageTemplate) Upsert(ctx *ctx.Context, ident string) error {
return tpl.Update(ctx, *t)
}
var GetDefs func(map[string]interface{}) []string
func getDefs(renderData map[string]interface{}) []string {
return []string{
"{{ $events := .events }}",
"{{ $event := index $events 0 }}",
"{{ $labels := $event.TagsMap }}",
"{{ $value := $event.TriggerValue }}",
}
}
func init() {
GetDefs = getDefs
}
func (t *MessageTemplate) RenderEvent(events []*AlertCurEvent) map[string]interface{} {
if t == nil {
return nil
}
renderData := make(map[string]interface{})
renderData["events"] = events
// event 内容渲染到 messageTemplate
tplContent := make(map[string]interface{})
for key, msgTpl := range t.Content {
var defs = []string{
"{{ $events := . }}",
"{{ $event := index $events 0 }}",
"{{ $labels := $event.TagsMap }}",
"{{ $value := $event.TriggerValue }}",
}
defs := GetDefs(renderData)
var body bytes.Buffer
if t.NotifyChannelIdent == "email" {
@@ -749,7 +732,7 @@ func (t *MessageTemplate) RenderEvent(events []*AlertCurEvent) map[string]interf
}
var body bytes.Buffer
if err = tpl.Execute(&body, events); err != nil {
if err = tpl.Execute(&body, renderData); err != nil {
logger.Errorf("failed to execute template: %v", err)
tplContent[key] = fmt.Sprintf("failed to execute template: %v", err)
continue
@@ -764,7 +747,7 @@ func (t *MessageTemplate) RenderEvent(events []*AlertCurEvent) map[string]interf
continue
}
if err = tpl.Execute(&body, events); err != nil {
if err = tpl.Execute(&body, renderData); err != nil {
logger.Errorf("failed to execute template: %v events: %v", err, events)
continue
}
@@ -785,7 +768,7 @@ func (t *MessageTemplate) RenderEvent(events []*AlertCurEvent) map[string]interf
continue
}
if err = tpl.Execute(&body, events); err != nil {
if err = tpl.Execute(&body, renderData); err != nil {
logger.Errorf("failed to execute template: %v events: %v", err, events)
tplContent[key] = fmt.Sprintf("failed to execute template: %v", err)
continue

View File

@@ -72,8 +72,10 @@ func MigrateTables(db *gorm.DB) error {
if isPostgres(db) {
dts = append(dts, &models.PostgresBuiltinComponent{})
DropUniqueFiledLimit(db, &models.PostgresBuiltinComponent{}, "idx_ident", "idx_ident")
} else {
dts = append(dts, &models.BuiltinComponent{})
DropUniqueFiledLimit(db, &models.BuiltinComponent{}, "idx_ident", "idx_ident")
}
if !db.Migrator().HasColumn(&imodels.TaskSchedulerHealth{}, "scheduler") {
@@ -124,11 +126,16 @@ func MigrateTables(db *gorm.DB) error {
DropUniqueFiledLimit(db, &Configs{}, "ckey", "configs_ckey_key")
// 删除 builtin_metrics 表的 idx_collector_typ_name 唯一索引
DropUniqueFiledLimit(db, &models.BuiltinMetric{}, "idx_collector_typ_name", "idx_collector_typ_name")
InsertPermPoints(db)
return nil
}
func DropUniqueFiledLimit(db *gorm.DB, dst interface{}, uniqueFiled string, pgUniqueFiled string) { // UNIQUE KEY (`ckey`)
// 先检查表是否存在,如果不存在则直接返回
if !db.Migrator().HasTable(dst) {
return
}
if db.Migrator().HasIndex(dst, uniqueFiled) {
err := db.Migrator().DropIndex(dst, uniqueFiled) //mysql DROP INDEX
if err != nil {
@@ -159,116 +166,13 @@ func columnHasIndex(db *gorm.DB, dst interface{}, indexColumn string) bool {
return false
}
func InsertPermPoints(db *gorm.DB) {
var ops []models.RoleOperation
ops = append(ops, models.RoleOperation{
RoleName: "Standard",
Operation: "/alert-mutes/put",
})
ops = append(ops, models.RoleOperation{
RoleName: "Standard",
Operation: "/log/index-patterns",
})
ops = append(ops, models.RoleOperation{
RoleName: "Standard",
Operation: "/help/variable-configs",
})
ops = append(ops, models.RoleOperation{
RoleName: "Standard",
Operation: "/ibex-settings",
})
ops = append(ops, models.RoleOperation{
RoleName: "Standard",
Operation: "/notification-templates",
})
ops = append(ops, models.RoleOperation{
RoleName: "Standard",
Operation: "/notification-templates/add",
})
ops = append(ops, models.RoleOperation{
RoleName: "Standard",
Operation: "/notification-templates/put",
})
ops = append(ops, models.RoleOperation{
RoleName: "Standard",
Operation: "/notification-templates/del",
})
ops = append(ops, models.RoleOperation{
RoleName: "Standard",
Operation: "/notification-rules",
})
ops = append(ops, models.RoleOperation{
RoleName: "Standard",
Operation: "/notification-rules/add",
})
ops = append(ops, models.RoleOperation{
RoleName: "Standard",
Operation: "/notification-rules/put",
})
ops = append(ops, models.RoleOperation{
RoleName: "Standard",
Operation: "/notification-rules/del",
})
ops = append(ops, models.RoleOperation{
RoleName: "Standard",
Operation: "/event-pipelines",
})
ops = append(ops, models.RoleOperation{
RoleName: "Standard",
Operation: "/event-pipelines/add",
})
ops = append(ops, models.RoleOperation{
RoleName: "Standard",
Operation: "/event-pipelines/put",
})
ops = append(ops, models.RoleOperation{
RoleName: "Standard",
Operation: "/event-pipelines/del",
})
for _, op := range ops {
var count int64
session := db.Session(&gorm.Session{}).Model(&models.RoleOperation{})
err := session.Where("operation = ? AND role_name = ?", op.Operation, op.RoleName).Count(&count).Error
if err != nil {
logger.Errorf("check role operation exists failed, %v", err)
continue
}
if count > 0 {
continue
}
err = session.Create(&op).Error
if err != nil {
logger.Errorf("insert role operation failed, %v", err)
}
}
}
type AlertRule struct {
ExtraConfig string `gorm:"type:text;column:extra_config"`
CronPattern string `gorm:"type:varchar(64);column:cron_pattern"`
DatasourceQueries []models.DatasourceQuery `gorm:"datasource_queries;type:text;serializer:json"` // datasource queries
NotifyRuleIds []int64 `gorm:"column:notify_rule_ids;type:varchar(1024)"`
NotifyVersion int `gorm:"column:notify_version;type:int;default:0"`
PipelineConfigs []models.PipelineConfig `gorm:"column:pipeline_configs;type:text;serializer:json"`
}
type AlertSubscribe struct {
@@ -356,8 +260,9 @@ type BoardBusigroup struct {
}
type Users struct {
Belong string `gorm:"column:belong;varchar(16);default:'';comment:belong"`
Belong string `gorm:"column:belong;type:varchar(16);default:'';comment:belong"`
LastActiveTime int64 `gorm:"column:last_active_time;type:int;default:0;comment:last_active_time"`
Phone string `gorm:"column:phone;type:varchar(1024);not null;default:''"`
}
type SsoConfig struct {
@@ -436,6 +341,7 @@ type NotifyRule struct {
UserGroupIds []int64 `gorm:"column:user_group_ids;type:varchar(255)"`
NotifyConfigs []models.NotifyConfig `gorm:"column:notify_configs;type:text"`
PipelineConfigs []models.PipelineConfig `gorm:"column:pipeline_configs;type:text"`
ExtraConfig interface{} `gorm:"column:extra_config;type:text"`
CreateAt int64 `gorm:"column:create_at;not null;default:0"`
CreateBy string `gorm:"column:create_by;type:varchar(64);not null;default:''"`
UpdateAt int64 `gorm:"column:update_at;not null;default:0"`

View File

@@ -2,7 +2,6 @@ package models
import (
"bytes"
"context"
"crypto/hmac"
"crypto/sha256"
"crypto/tls"
@@ -94,6 +93,9 @@ type UserInfo struct {
type FlashDutyRequestConfig struct {
Proxy string `json:"proxy"`
IntegrationUrl string `json:"integration_url"`
Timeout int `json:"timeout"` // 超时时间(毫秒)
RetryTimes int `json:"retry_times"` // 重试次数
RetrySleep int `json:"retry_sleep"` // 重试等待时间(毫秒)
}
// ParamItem 自定义参数项
@@ -196,7 +198,7 @@ func (ncc *NotifyChannelConfig) SendScript(events []*AlertCurEvent, tpl map[stri
cmd.Stderr = &buf
err, isTimeout := cmdx.RunTimeout(cmd, time.Duration(config.Timeout)*time.Millisecond)
logger.Infof("event_script_notify_result: exec %s output: %s isTimeout: %v err: %v", fpath, buf.String(), isTimeout, err)
logger.Infof("event_script_notify_result: exec %s output: %s isTimeout: %v err: %v stdin: %s", fpath, buf.String(), isTimeout, err, string(getStdinBytes(events, tpl, params, sendtos)))
res := buf.String()
@@ -315,9 +317,20 @@ func GetHTTPClient(nc *NotifyChannelConfig) (*http.Client, error) {
}
httpConfig := nc.RequestConfig.HTTPRequestConfig
if httpConfig.Timeout == 0 {
httpConfig.Timeout = 10000
// 对于 FlashDuty 类型,优先使用 FlashDuty 配置中的超时时间
timeout := httpConfig.Timeout
if nc.RequestType == "flashduty" && nc.RequestConfig.FlashDutyRequestConfig != nil {
flashDutyTimeout := nc.RequestConfig.FlashDutyRequestConfig.Timeout
if flashDutyTimeout > 0 {
timeout = flashDutyTimeout
}
}
if timeout == 0 {
timeout = 10000 // HTTP 默认 10 秒
}
if httpConfig.Concurrency == 0 {
httpConfig.Concurrency = 5
}
@@ -347,18 +360,78 @@ func GetHTTPClient(nc *NotifyChannelConfig) (*http.Client, error) {
Proxy: proxyFunc,
TLSClientConfig: tlsConfig,
DialContext: (&net.Dialer{
Timeout: time.Duration(httpConfig.Timeout) * time.Millisecond,
Timeout: time.Duration(timeout) * time.Millisecond,
}).DialContext,
}
client := &http.Client{
Transport: transport,
Timeout: time.Duration(httpConfig.Timeout) * time.Millisecond,
Timeout: time.Duration(timeout) * time.Millisecond,
}
return client, nil
}
func (ncc *NotifyChannelConfig) makeHTTPRequest(httpConfig *HTTPRequestConfig, url string, headers map[string]string, parameters map[string]string, body []byte) (*http.Request, error) {
req, err := http.NewRequest(httpConfig.Method, url, bytes.NewBuffer(body))
if err != nil {
logger.Errorf("failed to create request: %v", err)
return nil, err
}
query := req.URL.Query()
// 设置请求头 腾讯云短信、语音特殊处理
if ncc.Ident == "tx-sms" || ncc.Ident == "tx-voice" {
headers = ncc.setTxHeader(headers, body)
for key, value := range headers {
req.Header.Add(key, value)
}
} else if ncc.Ident == "ali-sms" || ncc.Ident == "ali-voice" {
req, err = http.NewRequest(httpConfig.Method, url, nil)
if err != nil {
return nil, err
}
query, headers = ncc.getAliQuery(ncc.Ident, query, httpConfig.Request.Parameters["AccessKeyId"], httpConfig.Request.Parameters["AccessKeySecret"], parameters)
for key, value := range headers {
req.Header.Set(key, value)
}
} else {
for key, value := range headers {
req.Header.Add(key, value)
}
}
if ncc.Ident != "ali-sms" && ncc.Ident != "ali-voice" {
for key, value := range parameters {
query.Add(key, value)
}
}
req.URL.RawQuery = query.Encode()
// 记录完整的请求信息
logger.Debugf("URL: %v, Method: %s, Headers: %+v, params: %+v, Body: %s", req.URL, req.Method, req.Header, query, string(body))
return req, nil
}
func (ncc *NotifyChannelConfig) makeFlashDutyRequest(url string, bodyBytes []byte, flashDutyChannelID int64) (*http.Request, error) {
req, err := http.NewRequest("POST", url, bytes.NewBuffer(bodyBytes))
if err != nil {
return nil, err
}
// 设置 URL 参数
query := req.URL.Query()
if flashDutyChannelID != 0 {
// 如果 flashduty 有配置协作空间(channel_id),则传入 channel_id 参数
query.Add("channel_id", strconv.FormatInt(flashDutyChannelID, 10))
}
req.URL.RawQuery = query.Encode()
req.Header.Add("Content-Type", "application/json")
return req, nil
}
func (ncc *NotifyChannelConfig) SendFlashDuty(events []*AlertCurEvent, flashDutyChannelID int64, client *http.Client) (string, error) {
// todo 每一个 channel 批量发送事件
if client == nil {
@@ -370,46 +443,57 @@ func (ncc *NotifyChannelConfig) SendFlashDuty(events []*AlertCurEvent, flashDuty
return "", err
}
req, err := http.NewRequest("POST", ncc.RequestConfig.FlashDutyRequestConfig.IntegrationUrl, bytes.NewBuffer(body))
if err != nil {
logger.Errorf("failed to create request: %v, event: %v", err, events)
return "", err
url := ncc.RequestConfig.FlashDutyRequestConfig.IntegrationUrl
retrySleep := time.Second
if ncc.RequestConfig.FlashDutyRequestConfig.RetrySleep > 0 {
retrySleep = time.Duration(ncc.RequestConfig.FlashDutyRequestConfig.RetrySleep) * time.Millisecond
}
// 设置 URL 参数
query := req.URL.Query()
if flashDutyChannelID != 0 {
// 如果 flashduty 有配置协作空间(channel_id),则传入 channel_id 参数
query.Add("channel_id", strconv.FormatInt(flashDutyChannelID, 10))
retryTimes := 3
if ncc.RequestConfig.FlashDutyRequestConfig.RetryTimes > 0 {
retryTimes = ncc.RequestConfig.FlashDutyRequestConfig.RetryTimes
}
req.URL.RawQuery = query.Encode()
req.Header.Add("Content-Type", "application/json")
// 重试机制
for i := 0; i <= 3; i++ {
logger.Infof("send flashduty req:%+v body:%+v", req, string(body))
// 把最后一次错误保存下来,后面返回,让用户在页面上也可以看到
var lastErrorMessage string
for i := 0; i <= retryTimes; i++ {
req, err := ncc.makeFlashDutyRequest(url, body, flashDutyChannelID)
if err != nil {
logger.Errorf("send_flashduty: failed to create request. url=%s request_body=%s error=%v", url, string(body), err)
return fmt.Sprintf("failed to create request. error: %v", err), err
}
// 直接使用客户端发送请求,超时时间已经在 client 中设置
resp, err := client.Do(req)
if err != nil {
logger.Errorf("send flashduty req:%+v err:%v", req, err)
time.Sleep(time.Duration(100) * time.Millisecond)
logger.Errorf("send_flashduty: http_call=fail url=%s request_body=%s error=%v times=%d", url, string(body), err, i+1)
if i < retryTimes {
// 重试等待时间,后面要放到页面上配置
time.Sleep(retrySleep)
}
lastErrorMessage = err.Error()
continue
}
defer resp.Body.Close()
// 读取响应
body, err := io.ReadAll(resp.Body)
if err != nil {
logger.Errorf("failed to read response: %v, event: %v", err, events)
// 走到这里,说明请求 Flashduty 成功,不管 Flashduty 返回了什么结果,都不判断,仅保存,给用户查看即可
// 比如服务端返回 5xx也不要重试重试可能会导致服务端数据有问题。告警事件这样的东西没有那么关键只要最终能在 UI 上看到调用结果就行
var resBody []byte
if resp.Body != nil {
defer resp.Body.Close()
resBody, err = io.ReadAll(resp.Body)
if err != nil {
logger.Errorf("send_flashduty: failed to read response. request_body=%s, error=%v", string(body), err)
resBody = []byte("failed to read response. error: " + err.Error())
}
}
logger.Infof("send flashduty req:%+v resp:%+v body:%+v err:%v", req, resp, string(body), err)
if resp.StatusCode == http.StatusOK {
return string(body), nil
}
time.Sleep(time.Duration(100) * time.Millisecond)
logger.Infof("send_flashduty: http_call=succ url=%s request_body=%s response_code=%d response_body=%s times=%d", url, string(body), resp.StatusCode, string(resBody), i+1)
return fmt.Sprintf("status_code:%d, response:%s", resp.StatusCode, string(resBody)), nil
}
return "", errors.New("failed to send request")
return lastErrorMessage, errors.New("failed to send request")
}
func (ncc *NotifyChannelConfig) SendHTTP(events []*AlertCurEvent, tpl map[string]interface{}, params map[string]string, sendtos []string, client *http.Client) (string, error) {
@@ -447,54 +531,21 @@ func (ncc *NotifyChannelConfig) SendHTTP(events []*AlertCurEvent, tpl map[string
url, headers, parameters := ncc.replaceVariables(fullTpl)
logger.Infof("url: %v, headers: %v, parameters: %v", url, headers, parameters)
req, err := http.NewRequest(httpConfig.Method, url, bytes.NewBuffer(body))
if err != nil {
logger.Errorf("failed to create request: %v, event: %v", err, events)
return "", err
}
query := req.URL.Query()
// 设置请求头 腾讯云短信、语音特殊处理
if ncc.Ident == "tx-sms" || ncc.Ident == "tx-voice" {
headers = ncc.setTxHeader(headers, body)
for key, value := range headers {
req.Header.Add(key, value)
}
} else if ncc.Ident == "ali-sms" || ncc.Ident == "ali-voice" {
req, err = http.NewRequest(httpConfig.Method, url, nil)
if err != nil {
return "", err
}
query, headers = ncc.getAliQuery(ncc.Ident, query, httpConfig.Request.Parameters["AccessKeyId"], httpConfig.Request.Parameters["AccessKeySecret"], parameters)
for key, value := range headers {
req.Header.Set(key, value)
}
} else {
for key, value := range headers {
req.Header.Add(key, value)
}
}
if ncc.Ident != "ali-sms" && ncc.Ident != "ali-voice" {
for key, value := range parameters {
query.Add(key, value)
}
}
req.URL.RawQuery = query.Encode()
// 记录完整的请求信息
logger.Debugf("URL: %v, Method: %s, Headers: %+v, params: %+v, Body: %s", req.URL, req.Method, req.Header, query, string(body))
// 重试机制
for i := 0; i <= httpConfig.RetryTimes; i++ {
var lastErrorMessage string
for i := 0; i < httpConfig.RetryTimes; i++ {
var resp *http.Response
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(httpConfig.Timeout)*time.Millisecond)
resp, err = client.Do(req.WithContext(ctx))
cancel() // 确保释放资源
req, err := ncc.makeHTTPRequest(httpConfig, url, headers, parameters, body)
if err != nil {
time.Sleep(time.Duration(httpConfig.RetryInterval) * time.Second)
logger.Errorf("send http request failed to send http notify: %v", err)
logger.Errorf("send_http: failed to create request. url=%s request_body=%s error=%v", url, string(body), err)
return fmt.Sprintf("failed to create request. error: %v", err), err
}
resp, err = client.Do(req)
if err != nil {
logger.Errorf("send_http: failed to send http notify. url=%s request_body=%s error=%v", url, string(body), err)
lastErrorMessage = err.Error()
time.Sleep(time.Duration(httpConfig.RetryInterval) * time.Millisecond)
continue
}
defer resp.Body.Close()
@@ -502,11 +553,9 @@ func (ncc *NotifyChannelConfig) SendHTTP(events []*AlertCurEvent, tpl map[string
// 读取响应
body, err := io.ReadAll(resp.Body)
logger.Debugf("send http request: %+v, response: %+v, body: %+v", req, resp, string(body))
if err != nil {
logger.Errorf("failed to send http notify: %v", err)
logger.Errorf("send_http: failed to read response. url=%s request_body=%s error=%v", url, string(body), err)
}
if resp.StatusCode == http.StatusOK {
return string(body), nil
}
@@ -514,8 +563,7 @@ func (ncc *NotifyChannelConfig) SendHTTP(events []*AlertCurEvent, tpl map[string
return "", fmt.Errorf("failed to send request, status code: %d, body: %s", resp.StatusCode, string(body))
}
return "", err
return lastErrorMessage, errors.New("all retries failed, last error: " + lastErrorMessage)
}
// getAliQuery 获取阿里云API的查询参数和请求头
@@ -922,11 +970,6 @@ func (ncc *NotifyChannelConfig) ValidateFlashDutyRequestConfig() error {
}
func (ncc *NotifyChannelConfig) Update(ctx *ctx.Context, ref NotifyChannelConfig) error {
// ref.FE2DB()
if ncc.Ident != ref.Ident {
return errors.New("cannot update ident")
}
ref.ID = ncc.ID
ref.CreateAt = ncc.CreateAt
ref.CreateBy = ncc.CreateBy
@@ -1253,8 +1296,7 @@ var NotiChMap = []*NotifyChannelConfig{
Method: "POST", Headers: map[string]string{"Content-Type": "application/json"},
Timeout: 10000, Concurrency: 5, RetryTimes: 3, RetryInterval: 100,
Request: RequestDetail{
Parameters: map[string]string{"token": "{{$params.token}}"},
Body: `{"msg_type": "text", "content": {"text": "{{$tpl.content}}"}}`,
Body: `{"msg_type": "text", "content": {"text": "{{$tpl.content}}"}}`,
},
},
},
@@ -1276,8 +1318,7 @@ var NotiChMap = []*NotifyChannelConfig{
Method: "POST", Headers: map[string]string{"Content-Type": "application/json"},
Timeout: 10000, Concurrency: 5, RetryTimes: 3, RetryInterval: 100,
Request: RequestDetail{
Parameters: map[string]string{"token": "{{$params.token}}"},
Body: `{"msg_type": "interactive", "card": {"config": {"wide_screen_mode": true}, "header": {"title": {"content": "{{$tpl.title}}", "tag": "plain_text"}, "template": "{{if $event.IsRecovered}}green{{else}}red{{end}}"}, "elements": [{"tag": "div", "text": {"tag": "lark_md","content": "{{$tpl.content}}"}}]}}`,
Body: `{"msg_type": "interactive", "card": {"config": {"wide_screen_mode": true}, "header": {"title": {"content": "{{$tpl.title}}", "tag": "plain_text"}, "template": "{{if $event.IsRecovered}}green{{else}}red{{end}}"}, "elements": [{"tag": "markdown", "content": "{{$tpl.content}}"}]}}`,
},
},
},
@@ -1290,27 +1331,6 @@ var NotiChMap = []*NotifyChannelConfig{
},
},
},
{
Name: "FeishuApp", Ident: FeishuApp, RequestType: "script", Weight: 5, Enable: false,
RequestConfig: &RequestConfig{
ScriptRequestConfig: &ScriptRequestConfig{
Timeout: 10000,
ScriptType: "script",
Script: FeishuAppBody,
},
},
ParamConfig: &NotifyParamConfig{
UserInfo: &UserInfo{
ContactKey: "email",
},
Custom: Params{
Params: []ParamItem{
{Key: "feishuapp_id", CName: "FeiShuAppID", Type: "string"},
{Key: "feishuapp_secret", CName: "FeiShuAppSecret", Type: "string"},
},
},
},
},
{
Name: "Feishu", Ident: Feishu, RequestType: "http", Weight: 5, Enable: true,
RequestConfig: &RequestConfig{
@@ -1340,7 +1360,7 @@ var NotiChMap = []*NotifyChannelConfig{
Method: "POST", Headers: map[string]string{"Content-Type": "application/json"},
Timeout: 10000, Concurrency: 5, RetryTimes: 3, RetryInterval: 100,
Request: RequestDetail{
Body: `{"msg_type": "interactive", "card": {"config": {"wide_screen_mode": true}, "header": {"title": {"content": "{{$tpl.title}}", "tag": "plain_text"}, "template": "{{if $event.IsRecovered}}green{{else}}red{{end}}"}, "elements": [{"tag": "div", "text": {"tag": "lark_md","content": "{{$tpl.content}}"}}]}}`,
Body: `{"msg_type": "interactive", "card": {"config": {"wide_screen_mode": true}, "header": {"title": {"content": "{{$tpl.title}}", "tag": "plain_text"}, "template": "{{if $event.IsRecovered}}green{{else}}red{{end}}"}, "elements": [{"tag": "markdown", "content": "{{$tpl.content}}"}]}}`,
},
},
},
@@ -1426,6 +1446,8 @@ var NotiChMap = []*NotifyChannelConfig{
},
FlashDutyRequestConfig: &FlashDutyRequestConfig{
IntegrationUrl: "flashduty integration url",
Timeout: 5000, // 默认5秒超时
RetryTimes: 3, // 默认重试3次
},
},
},
@@ -1463,460 +1485,3 @@ func (ncc *NotifyChannelConfig) Upsert(ctx *ctx.Context) error {
}
return ch.Update(ctx, *ncc)
}
var FeishuAppBody = `#!/usr/bin/env python
# -*- coding: UTF-8 -*-
import sys
import json
import requests
import logging
import re
import traceback
import os
import copy
from typing import Dict, Any, Optional
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# 常量
RECOVERED = "recovered"
TRIGGERED = "triggered"
def get_access_token(app_id: str, app_secret: str) -> str:
"""获取飞书访问令牌"""
logger.info(f"正在获取飞书访问令牌... AppID: {app_id}")
url = "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal"
data = {
"app_id": app_id,
"app_secret": app_secret
}
try:
logger.info(f"发送请求到 {url}")
response = requests.post(
url,
json=data,
timeout=30,
headers={"Content-Type": "application/json"}
)
logger.info(f"收到响应: 状态码={response.status_code}")
if response.status_code != 200:
logger.error(f"获取令牌失败,状态码: {response.status_code}")
logger.error(f"响应内容: {response.text}")
return ""
resp_json = response.json()
if resp_json.get("msg") != "ok":
logger.error(f"飞书获取AccessToken失败: error={resp_json.get('msg')}")
logger.error(f"完整响应: {resp_json}")
return ""
else:
token = resp_json.get("tenant_access_token", "")
logger.info(f"飞书获取AccessToken成功Token长度: {len(token)}")
return token
except Exception as e:
logger.error(f"飞书获取AccessToken异常: error={e}")
logger.error(f"错误详情: {traceback.format_exc()}")
return ""
def get_user_info(token: str, emails: list) -> dict:
"""从飞书API获取用户信息"""
url = "https://open.feishu.cn/open-apis/contact/v3/users/batch_get_id?user_id_type=user_id"
data = {"emails": emails}
try:
response = requests.post(
url,
json=data,
timeout=30,
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {token}"
}
)
return response.json()
except Exception as e:
logger.error(f"获取用户信息失败: {e}")
return {}
def send_urgent_app(message_id: str, user_id: str, title: str, token: str) -> Optional[Exception]:
"""发送紧急应用通知"""
if not message_id:
return Exception("消息ID为空")
if not user_id:
return Exception("用户ID为空")
user_body = {
"user_id_list": [user_id],
"content": {
"text": f"紧急告警: {title}",
"type": "text"
}
}
url = f"https://open.feishu.cn/open-apis/im/v1/messages/{message_id}/urgent_app?user_id_type=user_id"
try:
response = requests.patch(
url,
json=user_body,
timeout=30,
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {token}"
}
)
res = response.json()
if response.status_code != 200:
return Exception(f"请求失败,状态码 {response.status_code}")
if res.get("code", -1) != 0:
return Exception(f"飞书拒绝请求: {res.get('msg')}")
return None
except Exception as e:
return e
def create_feishu_app_body(title: str, content: str, color: str, event_url: str) -> dict:
"""创建飞书应用卡片消息体
直接使用传入的模板内容确保使用markdown解析
"""
# 修复双重转义的换行符问题
# 1. 将 \\n 转换为真正的换行符
markdown_content = content.replace('\\n', '\n')
# 2. 处理HTML转义字符
markdown_content = markdown_content.replace('&#34;', '"').replace('&gt;', '>')
# 3. 修复特殊格式问题
# 处理可能导致错误的大括号表达式
markdown_content = markdown_content.replace('{map[', '{ map[')
markdown_content = markdown_content.replace('map[v:{', 'map[v:{ ')
# 创建消息卡片
app_body = {
"config": {
"wide_screen_mode": True,
"enable_forward": True
},
"header": {
"title": {
"tag": "plain_text",
"content": title
},
"template": color
},
"elements": [
{
"tag": "markdown",
"content": markdown_content
},
{
"tag": "hr"
},
{
"tag": "action",
"actions": [
{
"tag": "button",
"text": {
"content": "告警详情",
"tag": "plain_text"
},
"type": "primary",
"url": event_url
}
]
},
{
"tag": "hr"
},
{
"tag": "note",
"elements": [
{
"tag": "lark_md",
"content": title
}
]
}
]
}
# 记录修复后的内容
logger.info(f"飞书卡片标题: {title}")
logger.info(f"飞书卡片颜色: {color}")
logger.info(f"修复后的内容预览: {markdown_content[:100]}...")
return app_body
def extract_data_from_string(stdin_data):
"""从字符串中提取关键数据返回构建的payload"""
payload = {"tpl": {}, "params": {}, "sendto": []}
# 提取tplContent
content_match = re.search(r'tplContent:map\[content:(.*?) title:(.*?)\]', stdin_data)
if content_match:
payload["tpl"]["content"] = content_match.group(1)
payload["tpl"]["title"] = content_match.group(2)
# 提取customParams
params_match = re.search(r'customParams:map\[(.*?)\]', stdin_data)
if params_match:
params_str = params_match.group(1)
# 提取domain_url
domain_match = re.search(r'domain_url:(.*?)(?: |$)', params_str)
if domain_match:
payload["params"]["domain_url"] = domain_match.group(1)
# 提取feishuapp_id
app_id_match = re.search(r'feishuapp_id:(.*?)(?: |$)', params_str)
if app_id_match:
payload["params"]["feishuapp_id"] = app_id_match.group(1)
# 提取feishuapp_secret
secret_match = re.search(r'feishuapp_secret:(.*?)(?:\s|$)', params_str)
if secret_match:
payload["params"]["feishuapp_secret"] = secret_match.group(1)
# 检查是否有err字段
err_match = re.search(r'err:(.*?)(?:,|\s|$)', stdin_data)
if err_match:
error_msg = err_match.group(1)
logger.error(f"检测到脚本错误: {error_msg}")
# 不设置默认发送目标,允许为空
return payload
def send_feishu_app(payload) -> None:
"""
发送飞书应用通知
Args:
payload: 包含告警信息的字典
"""
try:
# 提取必要参数
app_id = payload.get('params', {}).get('feishuapp_id')
app_secret = payload.get('params', {}).get('feishuapp_secret')
domain_url = "https://your-n9e-addr.com"
# 从sendto获取通知人列表
sendtos = payload.get('sendtos', [])
if isinstance(sendtos, str):
# 如果sendto是字符串按逗号分割
sendtos = [s.strip() for s in sendtos.split(',') if s.strip()]
# 检查必要参数
if not app_id or not app_secret:
logger.error("未提供有效的飞书应用凭证 (app_id 或 app_secret)")
return
# 检查发送目标,如果为空则直接返回
if not sendtos:
logger.warning("未提供发送目标,无法发送消息")
return
logger.info(f"发送目标: {sendtos}")
# 提取事件信息 - 优先使用单个event如果没有则使用events中的第一个
event = payload.get('event', {})
if not event and payload.get('events') and len(payload.get('events', [])) > 0:
event = payload.get('events')[0]
# 获取通知内容 - 使用已渲染的模板内容
content = payload.get('tpl', {}).get('content', '未找到告警内容')
title = payload.get('tpl', {}).get('title', '告警通知')
# 获取访问令牌
token = get_access_token(app_id, app_secret)
if not token:
logger.error("获取飞书访问令牌失败,无法继续")
return
# 获取用户信息
user_info = get_user_info(token, sendtos)
# 创建邮箱到用户ID的映射
user_id_map = {}
if user_info and "data" in user_info and "user_list" in user_info["data"]:
for user in user_info["data"]["user_list"]:
if user.get("email"):
user_id_map[user["email"]] = user.get("user_id", "")
# 提取事件信息
event_id = event.get('id', 0)
rule_name = event.get('rule_name', title)
severity = event.get('severity', 1) # 默认为严重级别
# 设置颜色和标题 - 根据事件是否已恢复或严重性级别
color = "red" # 默认严重告警使用红色
send_title = title
# 根据事件状态确定颜色
if "Recovered" in content:
color = "green" # 已恢复告警使用绿色
elif severity == 1:
color = "red" # 严重告警
elif severity == 2:
color = "orange" # 警告
elif severity == 3:
color = "blue" # 信息
event_url = f"{domain_url}/alert-his-events/{event_id}"
# 为每个接收者发送消息
for recipient in sendtos:
if not recipient:
continue
# 确定receive_id_type
if recipient.startswith("ou_"):
receive_type = "open_id"
elif recipient.startswith("on_"):
receive_type = "union_id"
elif recipient.startswith("oc_"):
receive_type = "chat_id"
elif "@" in recipient:
receive_type = "email"
else:
receive_type = "user_id"
fs_url = f"https://open.feishu.cn/open-apis/im/v1/messages?receive_id_type={receive_type}"
# 创建卡片消息体 - 直接使用模板生成的内容在create_feishu_app_body中处理转义问题
app_body = create_feishu_app_body(send_title, content, color, event_url)
content_str = json.dumps(app_body)
body = {
"msg_type": "interactive",
"receive_id": recipient,
"content": content_str
}
# 发送消息
try:
response = requests.post(
fs_url,
json=body,
timeout=30,
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {token}"
}
)
send_res = response.json()
if response.status_code != 200 or send_res.get("code", -1) != 0:
logger.error(f"飞书消息发送失败: 状态码={response.status_code}, 错误={send_res.get('msg', '未知错误')}")
continue
message_id = send_res.get("data", {}).get("message_id", "")
logger.info(f"发送成功 → {recipient} [消息ID: {message_id}]")
# 如果是高严重性,发送紧急消息
if severity == 1 and "Recovered" not in content:
user_id = user_id_map.get(recipient, "")
if user_id:
err = send_urgent_app(message_id, user_id, send_title, token)
if err:
logger.error(f"加急通知失败: {err}")
else:
logger.info(f"已发送加急通知 → {recipient}")
else:
logger.warning(f"无法发送加急: 未找到用户ID (email={recipient})")
except Exception as e:
logger.error(f"发送异常: {e}")
logger.error(f"错误详情: {traceback.format_exc()}")
except Exception as e:
logger.error(f"处理异常: {e}")
logger.error(f"错误详情: {traceback.format_exc()}")
def main():
"""主函数:读取输入数据,解析并发送飞书通知"""
try:
logger.info("开始执行飞书应用告警脚本")
payload = None
# 读取标准输入
try:
stdin_data = sys.stdin.read()
# 保存安全处理后的原始输入
try:
with open(".raw_input", 'w') as f:
sanitized_data = stdin_data.replace(r'feishuapp_secret:[^ ]*', 'feishuapp_secret:[REDACTED]')
f.write(sanitized_data)
except:
pass
# 优先尝试解析JSON
try:
payload = json.loads(stdin_data)
except json.JSONDecodeError:
# JSON解析失败尝试字符串提取
if "tplContent" in stdin_data:
payload = extract_data_from_string(stdin_data)
logger.info("从原始文本提取数据成功")
else:
logger.error("无法识别的数据格式")
payload = {
"tpl": {"content": "无法解析输入数据", "title": "告警通知"},
"params": {},
"sendto": []
}
except Exception as e:
logger.error(f"读取输入失败: {e}")
payload = {
"tpl": {"content": "读取输入失败", "title": "告警通知"},
"params": {},
"sendto": []
}
# 保存处理后的payload隐藏敏感信息
try:
with open(".payload", 'w') as f:
safe_payload = copy.deepcopy(payload)
if 'params' in safe_payload and 'feishuapp_secret' in safe_payload['params']:
safe_payload['params']['feishuapp_secret'] = '[REDACTED]'
f.write(json.dumps(safe_payload, indent=4))
except:
pass
# 处理发送
send_feishu_app(payload)
except Exception as e:
logger.error(f"处理异常: {e}")
logger.error(f"错误详情: {traceback.format_exc()}")
sys.exit(1) # 确保错误状态正确传递
# 脚本入口点 - 只有一个入口点
if __name__ == "__main__":
main()
`

View File

@@ -1,7 +1,10 @@
package models
import (
"crypto/sha256"
"encoding/hex"
"errors"
"fmt"
"time"
"github.com/ccfos/nightingale/v6/pkg/ctx"
@@ -19,6 +22,7 @@ type NotifyRule struct {
// 通知配置
NotifyConfigs []NotifyConfig `json:"notify_configs" gorm:"serializer:json"`
ExtraConfig interface{} `json:"extra_config,omitempty" gorm:"serializer:json"`
CreateAt int64 `json:"create_at"`
CreateBy string `json:"create_by"`
@@ -39,6 +43,7 @@ type NotifyConfig struct {
ChannelID int64 `json:"channel_id"` // 通知媒介(如:阿里云短信)
TemplateID int64 `json:"template_id"` // 通知模板
Params map[string]interface{} `json:"params"` // 通知参数
Type string `json:"type"`
Severities []int `json:"severities"` // 适用级别(一级告警、二级告警、三级告警)
TimeRanges []TimeRanges `json:"time_ranges"` // 适用时段
@@ -46,6 +51,12 @@ type NotifyConfig struct {
Attributes []TagFilter `json:"attributes"` // 适用属性
}
func (n *NotifyConfig) Hash() string {
hash := sha256.New()
hash.Write([]byte(fmt.Sprintf("%d%d%v%s%v%v%v%v", n.ChannelID, n.TemplateID, n.Params, n.Type, n.Severities, n.TimeRanges, n.LabelKeys, n.Attributes)))
return hex.EncodeToString(hash.Sum(nil))
}
type CustomParams struct {
UserIDs []int64 `json:"user_ids"`
UserGroupIDs []int64 `json:"user_group_ids"`
@@ -75,11 +86,6 @@ func GetNotifyRule(c *ctx.Context, id int64) (*NotifyRule, error) {
return &rule, nil
}
// 更新 NotifyRule
func UpdateNotifyRule(c *ctx.Context, rule *NotifyRule) error {
return DB(c).Save(rule).Error
}
// 删除 NotifyRule
func DeleteNotifyRule(c *ctx.Context, id int64) error {
return DB(c).Delete(&NotifyRule{}, id).Error
@@ -190,7 +196,12 @@ func (r *NotifyRule) Update(ctx *ctx.Context, ref NotifyRule) error {
if err != nil {
return err
}
return DB(ctx).Model(r).Select("*").Updates(ref).Error
db := DB(ctx).Model(r).Select("*")
if ref.ExtraConfig == nil {
db = db.Omit("ExtraConfig")
}
return db.Updates(ref).Error
}
func (r *NotifyRule) DB2FE() {

View File

@@ -232,7 +232,7 @@ var TplMap = map[string]string{
{{- end}}
{{- end}}
{{$domain := "http://请联系管理员修改通知模板将域名替换为实际的域名" }}
[事件详情]({{$domain}}/alert-his-events/{{.Id}})|[屏蔽1小时]({{$domain}}/alert-mutes/add?busiGroup={{.GroupId}}&cate={{.Cate}}&datasource_ids={{.DatasourceId}}&prod={{.RuleProd}}{{range $key, $value := .TagsMap}}&tags={{$key}}%3D{{$value}}{{end}})|[查看曲线]({{$domain}}/metric/explorer?data_source_id={{.DatasourceId}}&data_source_name=prometheus&mode=graph&prom_ql={{.PromQl|escape}})`,
[事件详情]({{$domain}}/alert-his-events/{{.Id}})|[屏蔽1小时]({{$domain}}/alert-mutes/add?__event_id={{.Id}}){{if eq .Cate "prometheus"}}|[查看曲线]({{$domain}}/metric/explorer?__event_id={{.Id}}&mode=graph}}){{end}}`,
Email: `<!DOCTYPE html>
<html lang="en">
<head>
@@ -459,7 +459,7 @@ var TplMap = map[string]string{
发送时间: {{timestamp}}
{{$domain := "http://请联系管理员修改通知模板将域名替换为实际的域名" }}
事件详情: {{$domain}}/alert-his-events/{{.Id}}
屏蔽1小时: {{$domain}}/alert-mutes/add?busiGroup={{.GroupId}}&cate={{.Cate}}&datasource_ids={{.DatasourceId}}&prod={{.RuleProd}}{{range $key, $value := .TagsMap}}&tags={{$key}}%3D{{$value}}{{end}}`,
屏蔽1小时: {{$domain}}/alert-mutes/add?__event_id={{.Id}}`,
FeishuCard: `{{ if .IsRecovered }}
{{- if ne .Cate "host"}}
**告警集群:** {{.Cluster}}{{end}}
@@ -478,7 +478,7 @@ var TplMap = map[string]string{
{{if .RuleNote }}**告警描述:** **{{.RuleNote}}**{{end}}
{{- end -}}
{{$domain := "http://请联系管理员修改通知模板将域名替换为实际的域名" }}
[事件详情]({{$domain}}/alert-his-events/{{.Id}})|[屏蔽1小时]({{$domain}}/alert-mutes/add?busiGroup={{.GroupId}}&cate={{.Cate}}&datasource_ids={{.DatasourceId}}&prod={{.RuleProd}}{{range $key, $value := .TagsMap}}&tags={{$key}}%3D{{$value}}{{end}})|[查看曲线]({{$domain}}/metric/explorer?data_source_id={{.DatasourceId}}&data_source_name=prometheus&mode=graph&prom_ql={{.PromQl|escape}})`,
[事件详情]({{$domain}}/alert-his-events/{{.Id}})|[屏蔽1小时]({{$domain}}/alert-mutes/add?__event_id={{.Id}}){{if eq .Cate "prometheus"}}|[查看曲线]({{$domain}}/metric/explorer?__event_id={{.Id}}&mode=graph}}){{end}}`,
EmailSubject: `{{if .IsRecovered}}Recovered{{else}}Triggered{{end}}: {{.RuleName}} {{.TagsJSON}}`,
Mm: `级别状态: S{{.Severity}} {{if .IsRecovered}}Recovered{{else}}Triggered{{end}}
规则名称: {{.RuleName}}{{if .RuleNote}}
@@ -506,7 +506,7 @@ var TplMap = map[string]string{
{{$time_duration := sub now.Unix .FirstTriggerTime }}{{if .IsRecovered}}{{$time_duration = sub .LastEvalTime .FirstTriggerTime }}{{end}}**距离首次告警**: {{humanizeDurationInterface $time_duration}}
**发送时间**: {{timestamp}}
{{$domain := "http://请联系管理员修改通知模板将域名替换为实际的域名" }}
[事件详情]({{$domain}}/alert-his-events/{{.Id}})|[屏蔽1小时]({{$domain}}/alert-mutes/add?busiGroup={{.GroupId}}&cate={{.Cate}}&datasource_ids={{.DatasourceId}}&prod={{.RuleProd}}{{range $key, $value := .TagsMap}}&tags={{$key}}%3D{{$value}}{{end}})|[查看曲线]({{$domain}}/metric/explorer?data_source_id={{.DatasourceId}}&data_source_name=prometheus&mode=graph&prom_ql={{.PromQl|escape}})`,
[事件详情]({{$domain}}/alert-his-events/{{.Id}})|[屏蔽1小时]({{$domain}}/alert-mutes/add?__event_id={{.Id}}){{if eq .Cate "prometheus"}}|[查看曲线]({{$domain}}/metric/explorer?__event_id={{.Id}}&mode=graph}}){{end}}`,
Lark: `级别状态: S{{.Severity}} {{if .IsRecovered}}Recovered{{else}}Triggered{{end}}
规则名称: {{.RuleName}}{{if .RuleNote}}
规则备注: {{.RuleNote}}{{end}}
@@ -516,7 +516,7 @@ var TplMap = map[string]string{
发送时间: {{timestamp}}
{{$domain := "http://请联系管理员修改通知模板将域名替换为实际的域名" }}
事件详情: {{$domain}}/alert-his-events/{{.Id}}
屏蔽1小时: {{$domain}}/alert-mutes/add?busiGroup={{.GroupId}}&cate={{.Cate}}&datasource_ids={{.DatasourceId}}&prod={{.RuleProd}}{{range $key, $value := .TagsMap}}&tags={{$key}}%3D{{$value}}{{end}}`,
屏蔽1小时: {{$domain}}/alert-mutes/add?__event_id={{.Id}}`,
LarkCard: `{{ if .IsRecovered }}
{{- if ne .Cate "host"}}
**告警集群:** {{.Cluster}}{{end}}
@@ -537,5 +537,5 @@ var TplMap = map[string]string{
{{if .RuleNote }}**告警描述:** **{{.RuleNote}}**{{end}}
{{- end -}}
{{$domain := "http://请联系管理员修改通知模板将域名替换为实际的域名" }}
[事件详情]({{$domain}}/alert-his-events/{{.Id}})|[屏蔽1小时]({{$domain}}/alert-mutes/add?busiGroup={{.GroupId}}&cate={{.Cate}}&datasource_ids={{.DatasourceId}}&prod={{.RuleProd}}{{range $key, $value := .TagsMap}}&tags={{$key}}%3D{{$value}}{{end}})|[查看曲线]({{$domain}}/metric/explorer?data_source_id={{.DatasourceId}}&data_source_name=prometheus&mode=graph&prom_ql={{.PromQl|escape}})`,
[事件详情]({{$domain}}/alert-his-events/{{.Id}})|[屏蔽1小时]({{$domain}}/alert-mutes/add?__event_id={{.Id}}){{if eq .Cate "prometheus"}}|[查看曲线]({{$domain}}/metric/explorer?__event_id={{.Id}}&mode=graph}}){{end}}`,
}

View File

@@ -27,8 +27,13 @@ func convertInterval(interval string) int {
duration, err := time.ParseDuration(interval)
if err != nil {
logger.Errorf("Error parsing interval `%s`, err: %v", interval, err)
return 0
return 60
}
if duration.Seconds() == 0 {
duration = 60 * time.Second
}
return int(duration.Seconds())
}
@@ -44,11 +49,11 @@ func ConvertAlert(rule PromRule, interval string, datasouceQueries []DatasourceQ
appendTags = append(appendTags, fmt.Sprintf("%s=%s", strings.ReplaceAll(k, " ", ""), strings.ReplaceAll(v, " ", "")))
} else {
switch v {
case "critical":
case "critical", "Critical", "CRITICAL", "error", "Error", "ERROR", "fatal", "Fatal", "FATAL", "page", "Page", "PAGE", "sev1", "SEV1", "Severity1", "severity1", "SEVERITY1":
severity = 1
case "warning":
case "warning", "Warning", "WARNING", "warn", "Warn", "WARN", "sev2", "SEV2", "Severity2", "severity2", "SEVERITY2":
severity = 2
case "info":
case "info", "Info", "INFO", "notice", "Notice", "NOTICE", "sev3", "SEV3", "Severity3", "severity3", "SEVERITY3":
severity = 3
}
ruleName += "-" + v
@@ -57,17 +62,12 @@ func ConvertAlert(rule PromRule, interval string, datasouceQueries []DatasourceQ
}
ar := AlertRule{
Name: rule.Alert,
Severity: severity,
Disabled: disabled,
PromForDuration: convertInterval(rule.For),
PromQl: rule.Expr,
PromEvalInterval: convertInterval(interval),
EnableStimeJSON: "00:00",
EnableEtimeJSON: "23:59",
EnableDaysOfWeekJSON: []string{
"1", "2", "3", "4", "5", "6", "0",
},
Name: rule.Alert,
Severity: severity,
Disabled: disabled,
PromForDuration: convertInterval(rule.For),
PromQl: rule.Expr,
CronPattern: fmt.Sprintf("@every %ds", convertInterval(interval)),
EnableInBG: AlertRuleEnableInGlobalBG,
NotifyRecovered: AlertRuleNotifyRecovered,
NotifyRepeatStep: AlertRuleNotifyRepeatStep60Min,
@@ -75,6 +75,8 @@ func ConvertAlert(rule PromRule, interval string, datasouceQueries []DatasourceQ
AnnotationsJSON: annotations,
AppendTagsJSON: appendTags,
DatasourceQueries: datasouceQueries,
NotifyVersion: 1,
NotifyRuleIds: []int64{},
}
return ar
@@ -86,7 +88,7 @@ func DealPromGroup(promRule []PromRuleGroup, dataSourceQueries []DatasourceQuery
for _, group := range promRule {
interval := group.Interval
if interval == "" {
interval = "15s"
interval = "60s"
}
for _, rule := range group.Rules {
if rule.Alert != "" {

View File

@@ -12,8 +12,8 @@ import (
"github.com/pkg/errors"
"github.com/toolkits/pkg/container/set"
"github.com/toolkits/pkg/slice"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/slice"
"gorm.io/gorm"
)
@@ -124,7 +124,7 @@ func TargetStatistics(ctx *ctx.Context) (*Statistics, error) {
func TargetDel(ctx *ctx.Context, idents []string, deleteHook TargetDeleteHookFunc) error {
if len(idents) == 0 {
panic("idents empty")
return errors.New("idents cannot be empty")
}
return DB(ctx).Transaction(func(tx *gorm.DB) error {
@@ -228,6 +228,9 @@ func TargetTotal(ctx *ctx.Context, options ...BuildTargetWhereOption) (int64, er
func TargetGets(ctx *ctx.Context, limit, offset int, order string, desc bool, options ...BuildTargetWhereOption) ([]*Target, error) {
var lst []*Target
order = validateOrderField(order, "ident")
if desc {
order += " desc"
} else {
@@ -661,7 +664,7 @@ func CanMigrateBg(ctx *ctx.Context) bool {
return false
}
if cnt == 0 {
log.Println("target table is empty, skip migration.")
logger.Debug("target table is empty, skip migration.")
return false
}

View File

@@ -3,6 +3,7 @@ package models
import (
"fmt"
"os"
"regexp"
"strconv"
"strings"
"time"
@@ -10,6 +11,7 @@ import (
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/ormx"
"github.com/ccfos/nightingale/v6/pkg/poster"
"github.com/ccfos/nightingale/v6/pkg/secu"
"github.com/ccfos/nightingale/v6/storage"
"github.com/redis/go-redis/v9"
@@ -26,7 +28,6 @@ const (
Wecom = "wecom"
Feishu = "feishu"
FeishuCard = "feishucard"
FeishuApp = "feishuapp"
Discord = "discord"
MattermostWebhook = "mattermostwebhook"
MattermostBot = "mattermostbot"
@@ -163,6 +164,10 @@ func (u *User) Verify() error {
return errors.New("Email invalid")
}
if u.Phone != "" {
return u.EncryptPhone()
}
return nil
}
@@ -322,6 +327,7 @@ func UserGet(ctx *ctx.Context, where string, args ...interface{}) (*User, error)
lst[0].RolesLst = strings.Fields(lst[0].Roles)
lst[0].Admin = lst[0].IsAdmin()
lst[0].DecryptPhone() // 解密手机号
return lst[0], nil
}
@@ -336,6 +342,7 @@ func UsersGet(ctx *ctx.Context, where string, args ...interface{}) ([]*User, err
for _, user := range lst {
user.RolesLst = strings.Fields(user.Roles)
user.Admin = user.IsAdmin()
user.DecryptPhone() // 解密手机号
}
return lst, nil
@@ -362,6 +369,12 @@ func UserGetById(ctx *ctx.Context, id int64) (*User, error) {
return UserGet(ctx, "id=?", id)
}
func CountAdminUsers(ctx *ctx.Context) (int64, error) {
var count int64
err := DB(ctx).Model(&User{}).Where("roles LIKE ?", "%"+AdminRole+"%").Count(&count).Error
return count, err
}
func UsersGetByGroupIds(ctx *ctx.Context, groupIds []int64) ([]User, error) {
if len(groupIds) == 0 {
return nil, nil
@@ -549,6 +562,47 @@ func UserTotal(ctx *ctx.Context, query string, stime, etime int64) (num int64, e
return num, nil
}
var (
// 预编译正则表达式,避免重复编译
whitespaceRegex = regexp.MustCompile(`\s+`)
validOrderRegex = regexp.MustCompile(`^[a-zA-Z_][a-zA-Z0-9_]*(\.[a-zA-Z_][a-zA-Z0-9_]*)?$`)
)
func validateOrderField(order string, defaultField string) string {
// 空值检查
if order == "" {
return defaultField
}
// 长度检查
if len(order) > 64 {
logger.Warningf("SQL injection attempt detected: order field too long (%d chars)", len(order))
return defaultField
}
// 移除所有空白字符
order = whitespaceRegex.ReplaceAllString(order, "")
if order == "" {
return defaultField
}
// 检查危险字符
orderLower := strings.ToLower(order)
if strings.ContainsAny(order, "();,'\"` --/*\\=+-*/><|&^~") ||
strings.Contains(orderLower, "0x") || strings.Contains(orderLower, "0b") {
logger.Warningf("SQL injection attempt detected: contains dangerous characters")
return defaultField
}
// 使用正则表达式验证格式:只允许字母开头的字段名,可选择性包含表名
if !validOrderRegex.MatchString(order) {
logger.Warningf("SQL injection attempt detected: invalid order field format")
return defaultField
}
return order
}
func UserGets(ctx *ctx.Context, query string, limit, offset int, stime, etime int64,
order string, desc bool, usernames, phones, emails []string) ([]User, error) {
@@ -558,6 +612,8 @@ func UserGets(ctx *ctx.Context, query string, limit, offset int, stime, etime in
session = session.Where("last_active_time between ? and ?", stime, etime)
}
order = validateOrderField(order, "username")
if desc {
order = order + " desc"
} else {
@@ -593,6 +649,7 @@ func UserGets(ctx *ctx.Context, query string, limit, offset int, stime, etime in
users[i].RolesLst = strings.Fields(users[i].Roles)
users[i].Admin = users[i].IsAdmin()
users[i].Password = ""
users[i].DecryptPhone() // 解密手机号
// query for user group information
var userGroupIDs []int64
@@ -634,6 +691,7 @@ func UserGetAll(ctx *ctx.Context) ([]*User, error) {
for i := 0; i < len(lst); i++ {
lst[i].RolesLst = strings.Fields(lst[i].Roles)
lst[i].Admin = lst[i].IsAdmin()
lst[i].DecryptPhone() // 解密手机号
}
}
return lst, err
@@ -650,6 +708,7 @@ func UserGetsByIds(ctx *ctx.Context, ids []int64) ([]User, error) {
for i := 0; i < len(lst); i++ {
lst[i].RolesLst = strings.Fields(lst[i].Roles)
lst[i].Admin = lst[i].IsAdmin()
lst[i].DecryptPhone() // 解密手机号
}
}
@@ -971,3 +1030,60 @@ func (u *User) AddUserAndGroups(ctx *ctx.Context, coverTeams bool) error {
return nil
}
func (u *User) EncryptPhone() (err error) {
// 从缓存获取手机号加密配置
enabled, publicKey, _, _, loaded := GetPhoneEncryptionConfigFromCache()
if !loaded {
// 如果缓存未加载,记录日志但不阻止保存
logger.Infof("Phone encryption config cache not loaded, user: %s", u.Username)
return nil
}
// 检查是否启用了手机号加密
if enabled && u.Phone != "" {
// 检查手机号是否已经加密(避免重复加密)
if len(u.Phone) > 4 && u.Phone[:4] == "enc:" {
// 已经加密,跳过
return nil
}
encryptedPhone, err := secu.EncryptValue(u.Phone, publicKey)
if err != nil {
logger.Warningf("Failed to encrypt phone: %v, user: %s", err, u.Username)
return nil
}
u.Phone = encryptedPhone
}
return nil
}
// DecryptPhone 解密用户手机号(如果已加密)
func (u *User) DecryptPhone() {
if u.Phone == "" {
return
}
// 检查手机号是否是加密格式(有 "enc:" 前缀)
if len(u.Phone) <= 4 || u.Phone[:4] != "enc:" {
// 不是加密格式,不需要解密
return
}
// 从缓存获取手机号加密配置
enabled, _, privateKey, password, loaded := GetPhoneEncryptionConfigFromCache()
if !loaded || !enabled {
// 如果缓存未加载或未启用加密,不解密
return
}
// 对手机号进行解密
decryptedPhone, err := secu.Decrypt(u.Phone, privateKey, password)
if err != nil {
// 如果解密失败,记录错误但保持原样
logger.Warningf("Failed to decrypt phone for user %s: %v", u.Username, err)
return
}
u.Phone = decryptedPhone
}

View File

@@ -12,15 +12,16 @@ import (
)
type UserGroup struct {
Id int64 `json:"id" gorm:"primaryKey"`
Name string `json:"name"`
Note string `json:"note"`
CreateAt int64 `json:"create_at"`
CreateBy string `json:"create_by"`
UpdateAt int64 `json:"update_at"`
UpdateBy string `json:"update_by"`
UserIds []int64 `json:"-" gorm:"-"`
Users []User `json:"users" gorm:"-"`
Id int64 `json:"id" gorm:"primaryKey"`
Name string `json:"name"`
Note string `json:"note"`
CreateAt int64 `json:"create_at"`
CreateBy string `json:"create_by"`
UpdateAt int64 `json:"update_at"`
UpdateBy string `json:"update_by"`
UserIds []int64 `json:"-" gorm:"-"`
Users []User `json:"users" gorm:"-"`
BusiGroupsRes []*BusiGroupRes `json:"busi_groups" gorm:"-"`
}
func (ug *UserGroup) TableName() string {

View File

@@ -8,11 +8,11 @@ import (
type UserToken struct {
Id int64 `json:"id" gorm:"primaryKey"`
Username string `json:"username" gorm:"type:varchar(255) not null default ''"`
TokenName string `json:"token_name" gorm:"type:varchar(255) not null default ''"`
Token string `json:"token" gorm:"type:varchar(255) not null default ''"`
CreateAt int64 `json:"create_at" gorm:"type:bigint not null default 0"`
LastUsed int64 `json:"last_used" gorm:"type:bigint not null default 0"`
Username string `json:"username" gorm:"type:varchar(255); not null; default ''"`
TokenName string `json:"token_name" gorm:"type:varchar(255); not null; default ''"`
Token string `json:"token" gorm:"type:varchar(255); not null; default ''"`
CreateAt int64 `json:"create_at" gorm:"type:bigint; not null; default 0"`
LastUsed int64 `json:"last_used" gorm:"type:bigint; not null; default 0"`
}
func (UserToken) TableName() string {

View File

@@ -89,7 +89,7 @@ func diffMap(m1, m2 map[int64]*models.User) []models.User {
func updateUser(appKey string, m1, m2 map[int64]*models.User) {
for i := range m1 {
if _, ok := m2[i]; ok {
if m1[i].Email != m2[i].Email || m1[i].Phone != m2[i].Phone || m1[i].Username != m2[i].Username {
if m1[i].Email != m2[i].Email || !PhoneIsSame(m1[i].Phone, m2[i].Phone) || m1[i].Username != m2[i].Username {
var flashdutyUser User
flashdutyUser = User{
@@ -110,6 +110,30 @@ func updateUser(appKey string, m1, m2 map[int64]*models.User) {
}
}
func PhoneIsSame(phone1, phone2 string) bool {
// 兼容不同国家/地区前缀,例如 +86、+1、+44 等,以及包含空格或短横线的格式
normalize := func(p string) string {
p = strings.TrimSpace(p)
p = strings.ReplaceAll(p, " ", "")
p = strings.ReplaceAll(p, "-", "")
p = strings.TrimPrefix(p, "+")
return p
}
p1 := normalize(phone1)
p2 := normalize(phone2)
if p1 == p2 {
return true
}
// 如果长度相差不超过 3 且较长的以较短的结尾,则认为是相同号码(忽略最多 3 位国家区号差异)
if len(p1) > len(p2) {
return len(p1)-len(p2) <= 3 && strings.HasSuffix(p1, p2)
}
return len(p2)-len(p1) <= 3 && strings.HasSuffix(p2, p1)
}
type User struct {
Email string `json:"email,omitempty"`
Phone string `json:"phone,omitempty"`

View File

@@ -0,0 +1,67 @@
package flashduty
import "testing"
func TestPhoneIsSame(t *testing.T) {
tests := []struct {
name string
phone1 string
phone2 string
same bool
}{
{
name: "blank",
phone1: "",
phone2: "",
same: true,
},
{
name: "China +86 prefix",
phone1: "+8613812345678",
phone2: "13812345678",
same: true,
},
{
name: "China +86 with spaces and hyphens",
phone1: "+86 138-1234-5678",
phone2: "13812345678",
same: true,
},
{
name: "USA +1 prefix",
phone1: "+1 234-567-8900",
phone2: "2345678900",
same: true,
},
{
name: "UK +44 prefix",
phone1: "+442078765432",
phone2: "2078765432",
same: true,
},
{
name: "India +91 prefix",
phone1: "+919876543210",
phone2: "9876543210",
same: true,
},
{
name: "Germany +49 prefix",
phone1: "+4915123456789",
phone2: "15123456789",
same: true,
},
{
name: "Different numbers",
phone1: "+8613812345678",
phone2: "13812345679",
same: false,
},
}
for _, tt := range tests {
if got := PhoneIsSame(tt.phone1, tt.phone2); got != tt.same {
t.Errorf("%s: expected %v, got %v", tt.name, tt.same, got)
}
}
}

View File

@@ -71,6 +71,7 @@ var I18N = `{
"no notify groups selected": "未选择通知组",
"all users missing notify channel configurations: %v": "所有用户缺少通知渠道配置: %v",
"event match subscribe and notify settings ok": "事件匹配订阅规则,通知设置正常",
"/loki suffix is miss, please add /loki to the url: %s": "缺少/loki后缀请在URL中添加/loki%s",
"event time not match time filter": "事件时间不匹配时间过滤器",
"event severity not match severity filter": "事件等级不匹配等级过滤器",
@@ -78,6 +79,11 @@ var I18N = `{
"event attributes not match attributes filter": "事件属性不匹配属性过滤器",
"failed to parse tag filter: %v": "解析标签过滤器失败: %v",
"event is dropped": "事件已被丢弃,不会进行通知",
"drop event success": "丢弃事件成功",
"drop event failed": "丢弃事件失败",
"callback success": "回调成功",
"Infrastructure": "基础设施",
"Host - View": "机器 - 查看",
"Host - Modify": "机器 - 修改",
@@ -186,6 +192,15 @@ var I18N = `{
"View Alerting Engines": "查看告警引擎列表",
"View Product Version": "查看产品版本",
"Some alert rules still in the BusiGroup": "业务组中仍有告警规则",
"Some alert mutes still in the BusiGroup": "业务组中仍有屏蔽规则",
"Some alert subscribes still in the BusiGroup": "业务组中仍有订阅规则",
"Some Board still in the BusiGroup": "业务组中仍有仪表盘",
"Some targets still in the BusiGroup": "业务组中仍有监控对象",
"Some recording rules still in the BusiGroup": "业务组中仍有记录规则",
"Some recovery scripts still in the BusiGroup": "业务组中仍有自愈脚本",
"Some target busigroups still in the BusiGroup": "业务组中仍有监控对象",
"---------zh_CN--------": "---------zh_CN--------"
},
"zh_HK": {
@@ -261,13 +276,18 @@ var I18N = `{
"no notify groups selected": "未選擇通知組",
"all users missing notify channel configurations: %v": "所有用戶缺少通知渠道配置: %v",
"event match subscribe and notify settings ok": "事件匹配訂閱規則,通知設置正常",
"/loki suffix is miss, please add /loki to the url: %s": "缺少/loki後綴請在URL中添加/loki%s",
"event time not match time filter": "事件時間不匹配時間過濾器",
"event severity not match severity filter": "事件等級不匹配等級過濾器",
"event tag not match tag filter": "事件標籤不匹配標籤過濾器",
"event attributes not match attributes filter": "事件屬性不匹配屬性過濾器",
"failed to parse tag filter: %v": "解析標籤過濾器失敗: %v",
"event is dropped": "事件已被丟棄,不會進行通知",
"drop event success": "丟棄事件成功",
"drop event failed": "丟棄事件失敗",
"callback success": "回調成功",
"Infrastructure": "基礎設施",
"Host - View": "機器 - 查看",
"Host - Modify": "機器 - 修改",
@@ -376,6 +396,15 @@ var I18N = `{
"View Alerting Engines": "查看告警引擎列表",
"View Product Version": "查看產品版本",
"Some alert rules still in the BusiGroup": "業務組中仍有告警規則",
"Some alert mutes still in the BusiGroup": "業務組中仍有屏蔽規則",
"Some alert subscribes still in the BusiGroup": "業務組中仍有訂閱規則",
"Some Board still in the BusiGroup": "業務組中仍有儀表板",
"Some targets still in the BusiGroup": "業務組中仍有監控對象",
"Some recording rules still in the BusiGroup": "業務組中仍有記錄規則",
"Some recovery scripts still in the BusiGroup": "業務組中仍有自愈腳本",
"Some target busigroups still in the BusiGroup": "業務組中仍有監控對象",
"---------zh_HK--------": "---------zh_HK--------"
},
"ja_JP": {
@@ -448,13 +477,18 @@ var I18N = `{
"no notify groups selected": "通知グループが選択されていません",
"all users missing notify channel configurations: %v": "すべてのユーザーに通知チャンネル設定がありません: %v",
"event match subscribe and notify settings ok": "イベントがサブスクライブルールに一致し、通知設定が正常です",
"/loki suffix is miss, please add /loki to the url: %s": "/lokiサフィックスがありません。URLに/lokiを追加してください: %s",
"event time not match time filter": "イベント時間が時間フィルタと一致しません",
"event severity not match severity filter": "イベント等級が等級フィルタと一致しません",
"event tag not match tag filter": "イベントタグがタグフィルタと一致しません",
"event attributes not match attributes filter": "イベント属性が属性フィルタと一致しません",
"failed to parse tag filter: %v": "タグフィルタの解析に失敗しました: %v",
"event is dropped": "イベントが破棄されました,通知は行われません",
"drop event success": "イベント破棄成功",
"drop event failed": "イベント破棄失敗",
"callback success": "コールバック成功",
"Infrastructure": "インフラストラクチャ",
"Host - View": "機器 - 閲覧",
"Host - Modify": "機器 - 修正",
@@ -563,6 +597,15 @@ var I18N = `{
"View Alerting Engines": "アラートエンジンの表示",
"View Product Version": "製品のバージョンを見る",
"Some alert rules still in the BusiGroup": "ビジネスグループにまだアラートルールがあります",
"Some alert mutes still in the BusiGroup": "ビジネスグループにまだミュートルールがあります",
"Some alert subscribes still in the BusiGroup": "ビジネスグループにまだサブスクライブルールがあります",
"Some Board still in the BusiGroup": "ビジネスグループにまだダッシュボードがあります",
"Some targets still in the BusiGroup": "ビジネスグループにまだ監視対象があります",
"Some recording rules still in the BusiGroup": "ビジネスグループにまだ記録ルールがあります",
"Some recovery scripts still in the BusiGroup": "ビジネスグループにまだ自己回復スクリプトがあります",
"Some target busigroups still in the BusiGroup": "ビジネスグループにまだ監視対象があります",
"---------ja_JP--------": "---------ja_JP--------"
},
"ru_RU": {
@@ -635,13 +678,18 @@ var I18N = `{
"no notify groups selected": "Группы уведомлений не выбраны",
"all users missing notify channel configurations: %v": "У всех пользователей отсутствуют настройки каналов уведомлений: %v",
"event match subscribe and notify settings ok": "Событие соответствует правилу подписки, настройки уведомлений в порядке",
"/loki suffix is miss, please add /loki to the url: %s": "Отсутствует суффикс /loki, пожалуйста, добавьте /loki к URL: %s",
"event time not match time filter": "Время события не соответствует временному фильтру",
"event severity not match severity filter": "Уровень события не соответствует фильтру уровня",
"event tag not match tag filter": "Теги события не соответствуют фильтру тегов",
"event attributes not match attributes filter": "Атрибуты события не соответствуют фильтру атрибутов",
"failed to parse tag filter: %v": "Не удалось разобрать фильтр тегов: %v",
"event is dropped": "Событие отброшено, уведомление не будет отправлено",
"drop event success": "Событие успешно отброшено",
"drop event failed": "Не удалось отбросить событие",
"callback success": "Обратный вызов успешен",
"Infrastructure": "Инфраструктура",
"Host - View": "Хост - Просмотр",
"Host - Modify": "Хост - Изменить",
@@ -750,6 +798,15 @@ var I18N = `{
"View Alerting Engines": "Просмотр списка алертинг-инженеров",
"View Product Version": "Просмотр версии продукта",
"Some alert rules still in the BusiGroup": "В бизнес-группе еще есть правила оповещений",
"Some alert mutes still in the BusiGroup": "В бизнес-группе еще есть правила отключения оповещений",
"Some alert subscribes still in the BusiGroup": "В бизнес-группе еще есть правила подписки",
"Some Board still in the BusiGroup": "В бизнес-группе еще есть панели мониторинга",
"Some targets still in the BusiGroup": "В бизнес-группе еще есть объекты мониторинга",
"Some recording rules still in the BusiGroup": "В бизнес-группе еще есть правила записи",
"Some recovery scripts still in the BusiGroup": "В бизнес-группе еще есть скрипты самоисцеления",
"Some target busigroups still in the BusiGroup": "В бизнес-группе еще есть объекты мониторинга",
"---------ru_RU--------": "---------ru_RU--------"
}
}`

View File

@@ -5,6 +5,7 @@ import (
"crypto/tls"
"fmt"
"net/http"
"strings"
"sync"
"time"
@@ -140,14 +141,22 @@ func (s *SsoClient) GetDisplayName() string {
return s.DisplayName
}
func (s *SsoClient) GetSsoLogoutAddr() string {
func (s *SsoClient) GetSsoLogoutAddr(idToken string) string {
s.RLock()
defer s.RUnlock()
if !s.Enable {
return ""
}
return s.SsoLogoutAddr
return s.replaceIdTokenTemplate(s.SsoLogoutAddr, idToken)
}
// replaceIdTokenTemplate 替换登出 URL 中的 {{$__id_token__}} 模板变量
func (s *SsoClient) replaceIdTokenTemplate(logoutAddr, idToken string) string {
if idToken == "" {
return logoutAddr
}
return strings.ReplaceAll(logoutAddr, "{{$__id_token__}}", idToken)
}
func wrapStateKey(key string) string {
@@ -201,6 +210,7 @@ type CallbackOutput struct {
Redirect string `json:"redirect"`
Msg string `json:"msg"`
AccessToken string `json:"accessToken"`
IdToken string `json:"idToken"`
Username string `json:"username"`
Nickname string `json:"nickname"`
Phone string `yaml:"phone"`
@@ -245,6 +255,7 @@ func (s *SsoClient) exchangeUser(code string) (*CallbackOutput, error) {
output := &CallbackOutput{
AccessToken: oauth2Token.AccessToken,
IdToken: rawIDToken,
Username: extractClaim(data, s.Attributes.Username),
Nickname: extractClaim(data, s.Attributes.Nickname),
Phone: extractClaim(data, s.Attributes.Phone),

View File

@@ -12,6 +12,11 @@ import (
)
func Decrypt(cipherText string, privateKeyByte []byte, password string) (decrypted string, err error) {
// 移除 "enc:" 前缀(如果存在)
if len(cipherText) > 4 && cipherText[:4] == "enc:" {
cipherText = cipherText[4:]
}
decodeCipher, _ := base64.StdEncoding.DecodeString(cipherText)
//pem解码
block, _ := pem.Decode(privateKeyByte)
@@ -53,7 +58,8 @@ func EncryptValue(value string, publicKeyData []byte) (string, error) {
if err != nil {
return "", fmt.Errorf("failed to encrypt value: %v", err)
}
return BASE64StdEncode(ciphertext), nil
// 添加 "enc:" 前缀标记这是加密数据
return "enc:" + BASE64StdEncode(ciphertext), nil
}
func GenerateRsaKeyPair(password string) (privateByte, publicByte []byte, err error) {

View File

@@ -739,3 +739,25 @@ func JsonMarshal(v interface{}) template.HTML {
}
return template.HTML(string(json))
}
func MapDifference(firstMap, secondMap map[string]string) (map[string]string, error) {
// 创建结果 map
result := make(map[string]string)
// 遍历第一个 map将不在第二个 map 中的键值对添加到结果中
for key, value := range firstMap {
if _, exists := secondMap[key]; !exists {
result[key] = value
}
}
return result, nil
}
func TagsMapToStr(m map[string]string) string {
strs := []string{}
for key, value := range m {
strs = append(strs, key+"="+value)
}
sort.Strings(strs)
return strings.Join(strs, ",")
}

View File

@@ -62,6 +62,8 @@ var TemplateFuncMap = template.FuncMap{
"batchContactsAtsInFeishuEmail": BatchContactsAtsInFeishuEmail,
"batchContactsAtsInFeishuId": BatchContactsAtsInFeishuId,
"jsonMarshal": JsonMarshal,
"mapDifference": MapDifference,
"tagsMapToStr": TagsMapToStr,
}
// NewTemplateFuncMap copy on write for TemplateFuncMap

View File

@@ -71,6 +71,24 @@ func ValueFormatter(unit string, decimals int, value float64) FormattedValue {
}
}
// Handle positive and negative infinity
if math.IsInf(value, 1) {
return FormattedValue{
Value: 9999999999,
Unit: "",
Text: "+Inf",
Stat: 9999999999,
}
}
if math.IsInf(value, -1) {
return FormattedValue{
Value: -9999999999,
Unit: "",
Text: "-Inf",
Stat: -9999999999,
}
}
// 处理时间单位
switch unit {
case "none":

View File

@@ -52,6 +52,10 @@ func (po *PromOption) Equal(target PromOption) bool {
return false
}
if po.InsecureSkipVerify != target.InsecureSkipVerify {
return false
}
if len(po.Headers) != len(target.Headers) {
return false
}

View File

@@ -117,7 +117,7 @@ func (pc *PromClientMap) loadFromDatabase() {
continue
}
logger.Info("setClientFromPromOption success: ", dsId)
logger.Infof("setClientFromPromOption success, datasourceId: %d", dsId)
PromOptions.Set(dsId, po)
continue
}

View File

@@ -13,6 +13,7 @@ import (
"github.com/ccfos/nightingale/v6/pushgw/pstat"
"github.com/ccfos/nightingale/v6/storage"
"github.com/toolkits/pkg/concurrent/semaphore"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/slice"
)
@@ -23,6 +24,7 @@ type Set struct {
redis storage.Redis
ctx *ctx.Context
configs pconf.Pushgw
sema *semaphore.Semaphore
}
func New(ctx *ctx.Context, redis storage.Redis, configs pconf.Pushgw) *Set {
@@ -32,6 +34,7 @@ func New(ctx *ctx.Context, redis storage.Redis, configs pconf.Pushgw) *Set {
ctx: ctx,
configs: configs,
}
set.sema = semaphore.NewSemaphore(configs.UpdateTargetByUrlConcurrency)
set.Init()
return set
@@ -113,8 +116,26 @@ func (s *Set) UpdateTargets(lst []string, now int64) error {
Lst: lst,
Now: now,
}
err := poster.PostByUrls(s.ctx, "/v1/n9e/target-update", t)
return err
if !s.sema.TryAcquire() {
logger.Warningf("update_targets: update target by url concurrency limit, skip update target: %v", lst)
return nil // 达到并发上限,放弃请求,只是页面上的机器时间不更新,不影响机器失联告警,降级处理下
}
go func() {
defer s.sema.Release()
// 修改为异步发送,防止机器太多,每个请求耗时比较长导致机器心跳时间更新不及时
err := poster.PostByUrls(s.ctx, "/v1/n9e/target-update", t)
if err != nil {
logger.Errorf("failed to post target update: %v", err)
}
}()
return nil
}
if s.configs.UpdateDBTargetTimestampDisable {
// 如果 mysql 压力太大,关闭更新 db 的操作
return nil
}
// there are some idents not found in db, so insert them
@@ -133,16 +154,34 @@ func (s *Set) UpdateTargets(lst []string, now int64) error {
}
// 从批量更新一批机器的时间戳改成逐台更新是为了避免批量更新时mysql的锁竞争问题
for i := 0; i < len(exists); i++ {
err = s.ctx.DB.Exec("UPDATE target SET update_at = ? WHERE ident = ?", now, exists[i]).Error
if err != nil {
logger.Error("upsert_target: failed to update target:", exists[i], "error:", err)
start := time.Now()
duration := time.Since(start).Seconds()
if len(exists) > 0 {
sema := semaphore.NewSemaphore(s.configs.UpdateDBTargetConcurrency)
wg := sync.WaitGroup{}
for i := 0; i < len(exists); i++ {
sema.Acquire()
wg.Add(1)
go func(ident string) {
defer sema.Release()
defer wg.Done()
s.updateDBTargetTs(ident, now)
}(exists[i])
}
wg.Wait()
}
pstat.DBOperationLatency.WithLabelValues("update_targets_ts").Observe(duration)
return nil
}
func (s *Set) updateDBTargetTs(ident string, now int64) {
err := s.ctx.DB.Exec("UPDATE target SET update_at = ? WHERE ident = ?", now, ident).Error
if err != nil {
logger.Error("update_target: failed to update target:", ident, "error:", err)
}
}
func (s *Set) updateTargetsUpdateTs(lst []string, now int64, redis storage.Redis) error {
if redis == nil {
return fmt.Errorf("redis is nil")

View File

@@ -18,6 +18,10 @@ type Pushgw struct {
UpdateTargetRetryIntervalMills int64
UpdateTargetTimeoutMills int64
UpdateTargetBatchSize int
UpdateDBTargetConcurrency int
UpdateDBTargetTimestampDisable bool
PushConcurrency int
UpdateTargetByUrlConcurrency int
BusiGroupLabelKey string
IdentMetrics []string
@@ -49,6 +53,7 @@ type WriterOptions struct {
Url string
BasicAuthUser string
BasicAuthPass string
AsyncWrite bool // 如果有多个转发 writer对应不重要的 writer可以设置为 true异步转发提供转发效率
Timeout int64
DialTimeout int64
@@ -124,6 +129,18 @@ func (p *Pushgw) PreCheck() {
p.UpdateTargetBatchSize = 20
}
if p.UpdateDBTargetConcurrency <= 0 {
p.UpdateDBTargetConcurrency = 16
}
if p.PushConcurrency <= 0 {
p.PushConcurrency = 16
}
if p.UpdateTargetByUrlConcurrency <= 0 {
p.UpdateTargetByUrlConcurrency = 10
}
if p.BusiGroupLabelKey == "" {
p.BusiGroupLabelKey = "busigroup"
}

View File

@@ -105,6 +105,17 @@ var (
},
[]string{"operation", "status"},
)
DBOperationLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "db_operation_latency_seconds",
Help: "Histogram of latencies for DB operations",
Buckets: []float64{.005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5},
},
[]string{"operation"},
)
)
func init() {
@@ -121,5 +132,6 @@ func init() {
GaugeSampleQueueSize,
CounterPushQueueOverLimitTotal,
RedisOperationLatency,
DBOperationLatency,
)
}

View File

@@ -1,27 +0,0 @@
package json
import (
"math"
"unsafe"
jsoniter "github.com/json-iterator/go"
)
func init() {
// 为了处理prom数据中的NaN值
jsoniter.RegisterTypeEncoderFunc("float64", func(ptr unsafe.Pointer, stream *jsoniter.Stream) {
f := *(*float64)(ptr)
if math.IsNaN(f) {
stream.WriteString("null")
} else {
stream.WriteFloat64(f)
}
}, func(ptr unsafe.Pointer) bool {
return true
})
}
func MarshalWithCustomFloat(items interface{}) ([]byte, error) {
var json = jsoniter.ConfigCompatibleWithStandardLibrary
return json.Marshal(items)
}

View File

@@ -3,7 +3,9 @@ package writer
import (
"bytes"
"context"
"encoding/json"
"fmt"
"math"
"net/http"
"strings"
"sync"
@@ -15,7 +17,6 @@ import (
"github.com/ccfos/nightingale/v6/pushgw/kafka"
"github.com/ccfos/nightingale/v6/pushgw/pconf"
"github.com/ccfos/nightingale/v6/pushgw/pstat"
"github.com/ccfos/nightingale/v6/pushgw/writer/json"
"github.com/golang/protobuf/proto"
"github.com/golang/snappy"
@@ -52,8 +53,49 @@ func beforeWrite(key string, items []prompb.TimeSeries, forceUseServerTS bool, e
return proto.Marshal(req)
}
// 如果是 json 格式,将 NaN 值的数据丢弃掉
return json.Marshal(filterNaNSamples(items))
}
return json.MarshalWithCustomFloat(items)
func filterNaNSamples(items []prompb.TimeSeries) []prompb.TimeSeries {
// 早期检查如果没有NaN值直接返回原始数据
hasNaN := false
for i := range items {
for j := range items[i].Samples {
if math.IsNaN(items[i].Samples[j].Value) {
hasNaN = true
break
}
}
if hasNaN {
break
}
}
if !hasNaN {
return items
}
// 有NaN值时进行过滤原地修改以减少内存分配
for i := range items {
samples := items[i].Samples
validCount := 0
// 原地过滤 samples避免额外的内存分配
for j := range samples {
if !math.IsNaN(samples[j].Value) {
if validCount != j {
samples[validCount] = samples[j]
}
validCount++
}
}
// 保留所有时间序列即使没有有效样本此时Samples为空
items[i].Samples = samples[:validCount]
}
return items
}
func (w WriterType) Write(key string, items []prompb.TimeSeries, headers ...map[string]string) {
@@ -139,7 +181,30 @@ func (w WriterType) Post(req []byte, headers ...map[string]string) error {
}
if resp.StatusCode >= 400 && resp.StatusCode < 500 {
logger.Warningf("push data with remote write:%s request got status code: %v, response body: %s", url, resp.StatusCode, string(body))
// 解码并解析 req 以便打印指标信息
decoded, decodeErr := snappy.Decode(nil, req)
metricsInfo := "failed to decode request"
if decodeErr == nil {
var writeReq prompb.WriteRequest
if unmarshalErr := proto.Unmarshal(decoded, &writeReq); unmarshalErr == nil {
metricsInfo = fmt.Sprintf("timeseries count: %d", len(writeReq.Timeseries))
logger.Warningf("push data with remote write:%s request got status code: %v, response body: %s, %s", url, resp.StatusCode, string(body), metricsInfo)
// 只打印前几条样本,避免日志泛滥
sampleCount := 5
if sampleCount > len(writeReq.Timeseries) {
sampleCount = len(writeReq.Timeseries)
}
for i := 0; i < sampleCount; i++ {
logger.Warningf("push data with remote write:%s timeseries: [%d] %s", url, i, writeReq.Timeseries[i].String())
}
} else {
metricsInfo = fmt.Sprintf("failed to unmarshal: %v", unmarshalErr)
logger.Warningf("push data with remote write:%s request got status code: %v, response body: %s, metrics: %s", url, resp.StatusCode, string(body), metricsInfo)
}
} else {
metricsInfo = fmt.Sprintf("failed to decode: %v", decodeErr)
logger.Warningf("push data with remote write:%s request got status code: %v, response body: %s, metrics: %s", url, resp.StatusCode, string(body), metricsInfo)
}
continue
}
@@ -157,10 +222,11 @@ func (w WriterType) Post(req []byte, headers ...map[string]string) error {
}
type WritersType struct {
pushgw pconf.Pushgw
backends map[string]Writer
queues map[string]*IdentQueue
AllQueueLen atomic.Value
pushgw pconf.Pushgw
backends map[string]Writer
queues map[string]*IdentQueue
AllQueueLen atomic.Value
PushConcurrency atomic.Int64
sync.RWMutex
}
@@ -210,6 +276,31 @@ func (ws *WritersType) Put(name string, writer Writer) {
ws.backends[name] = writer
}
func (ws *WritersType) isCriticalBackend(key string) bool {
backend, exists := ws.backends[key]
if !exists {
return false
}
// 使用类型断言判断
switch backend.(type) {
case WriterType:
if backend.(WriterType).Opts.AsyncWrite {
return false
}
// HTTP Writer 作为关键后端
return true
case KafkaWriterType:
// Kafka Writer 作为非关键后端
return false
default:
// 未知类型,保守起见作为关键后端
logger.Warningf("Unknown backend type: %T, treating as critical", backend)
return true
}
}
func (ws *WritersType) CleanExpQueue() {
for {
ws.Lock()
@@ -278,12 +369,64 @@ func (ws *WritersType) StartConsumer(identQueue *IdentQueue) {
continue
}
for key := range ws.backends {
ws.backends[key].Write(key, series)
if ws.isCriticalBackend(key) {
ws.backends[key].Write(key, series)
} else {
// 像 kafka 这种 writer 使用异步写入,防止因为写入太慢影响主流程
ws.writeToNonCriticalBackend(key, series)
}
}
}
}
}
func (ws *WritersType) writeToNonCriticalBackend(key string, series []prompb.TimeSeries) {
// 原子性地检查并增加并发数
currentConcurrency := ws.PushConcurrency.Add(1)
if currentConcurrency > int64(ws.pushgw.PushConcurrency) {
// 超过限制,立即减少计数并丢弃
ws.PushConcurrency.Add(-1)
logger.Warningf("push concurrency limit exceeded, current: %d, limit: %d, dropping %d series for backend: %s",
currentConcurrency-1, ws.pushgw.PushConcurrency, len(series), key)
pstat.CounterWirteErrorTotal.WithLabelValues(key).Add(float64(len(series)))
return
}
// 深拷贝数据,确保并发安全
seriesCopy := ws.deepCopySeries(series)
// 启动goroutine处理
go func(backendKey string, data []prompb.TimeSeries) {
defer func() {
ws.PushConcurrency.Add(-1)
if r := recover(); r != nil {
logger.Errorf("panic in non-critical backend %s: %v", backendKey, r)
}
}()
ws.backends[backendKey].Write(backendKey, data)
}(key, seriesCopy)
}
// 完整的深拷贝方法
func (ws *WritersType) deepCopySeries(series []prompb.TimeSeries) []prompb.TimeSeries {
seriesCopy := make([]prompb.TimeSeries, len(series))
for i := range series {
seriesCopy[i] = series[i]
if len(series[i].Samples) > 0 {
samples := make([]prompb.Sample, len(series[i].Samples))
copy(samples, series[i].Samples)
seriesCopy[i].Samples = samples
}
}
return seriesCopy
}
func (ws *WritersType) Init() error {
ws.AllQueueLen.Store(int64(0))