Compare commits

...

112 Commits

Author SHA1 Message Date
ning
15c2eadda6 refactor: optimize webhook send 2024-12-11 16:25:35 +08:00
ning
0aea38e564 refactor: write queue limit 2024-12-10 21:03:55 +08:00
CRISPpp
45e9253b2a feat: add global metric write rate control (#2347) 2024-12-10 20:43:02 +08:00
CRISPpp
9385ca9931 feat: add pre check for deleting busi_group (#2346) 2024-12-09 20:32:46 +08:00
ning
fdd3d14871 docs: change default db type to sqlite 2024-12-06 21:04:10 +08:00
Yening Qin
e890034c19 feat: auto init db (#2345)
Co-authored-by: CRISPpp <78430796+CRISPpp@users.noreply.github.com>
2024-12-06 20:32:17 +08:00
Yening Qin
3aaab9e6ad fix: event prom eval interval (#2343) 2024-12-06 20:24:49 +08:00
CRISPpp
7f7d707cfc fix: role_operation abnormal count (#2338) 2024-12-06 16:31:47 +08:00
Xu Bin
98402e9f8a fix: quotation mark for alert rule var (#2339) 2024-12-06 16:07:47 +08:00
Xu Bin
017094fd78 fix: var support for aggregate function (#2334) 2024-12-06 11:57:51 +08:00
Yening Qin
8b6b896362 feat: redis support miniredis type (#2337)
Co-authored-by: CRISPpp <78430796+CRISPpp@users.noreply.github.com>
2024-12-06 10:46:05 +08:00
ning
acaa00cfb6 refactor: migrate add more log 2024-12-05 17:55:27 +08:00
flashbo
87f3d8595d fix: targets filter logic (#2333) 2024-12-05 14:31:57 +08:00
flashbo
42791a374d feat: targets support sorting by time (#2331) 2024-12-05 14:20:30 +08:00
kongfei605
3855c25805 chore: update dashboards for mongodb (#2332) 2024-12-04 16:19:21 +08:00
Xu Bin
10ec0ccbd1 fix: alert rule cron eval (#2330) 2024-12-03 16:58:30 +08:00
flashbo
94cf304222 refactor: improve target bind bg logic (#2327) 2024-12-03 15:00:25 +08:00
ning
994de4635a docs: update n9e.sql 2024-12-02 20:18:32 +08:00
ning
9a0013a406 docs: update n9e.sql 2024-12-02 09:16:50 +08:00
CRISPpp
6dcd5dd01e docs: complete initialization n9e.sql (#2325) 2024-11-29 18:52:00 +08:00
ning
70126e3aec refactor: sync.map clear 2024-11-29 18:23:45 +08:00
ning
767482d358 refactor: optimize prom query 2024-11-28 15:58:31 +08:00
YangHgRi
9a46106cc0 refactor: specify parameter type in function to improve type safety and clarity (#2317) 2024-11-26 20:55:49 +08:00
Yening Qin
da9ea67cee feat: alert rule annotation support prom query template func (#2314)
Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
2024-11-22 16:58:00 +08:00
6666walnut
c13ecd780b fix: get busi-groups api err (#2313)
Co-authored-by: wangjing17 <wangjing17@foundersc.com>
2024-11-22 16:55:32 +08:00
ning
cab37c796a refactor: target_busi_group set utf8mb4_general_ci 2024-11-22 13:28:36 +08:00
ning
078578772b refactor: change builtin component logo type 2024-11-21 20:22:37 +08:00
Yening Qin
31883ec844 refactor: alert rule support cron (#2309)
Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
2024-11-21 13:14:19 +08:00
ning
6100cd084a refactor: event recovery notify 2024-11-21 11:00:30 +08:00
Yening Qin
b82e260d65 feat: alert rule support var (#2307)
Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
2024-11-20 20:34:33 +08:00
ning
3983386af3 fix: get notify-record api err 2024-11-20 20:02:38 +08:00
Ulric Qin
83f2054062 lock version of prom 2024-11-20 17:49:21 +08:00
Ulric Qin
83e0b3cb98 Merge branch 'main' of github.com:ccfos/nightingale 2024-11-20 17:23:29 +08:00
Ulric Qin
f6bfa17e2e update init sql 2024-11-20 17:23:17 +08:00
flashbo
3d8019b738 refactor: event notify record (#2296) 2024-11-19 20:25:17 +08:00
Ulric Qin
ee1be71be6 Merge branch 'main' of github.com:ccfos/nightingale 2024-11-19 19:47:14 +08:00
Ulric Qin
7f2fb459bb update mysql and redis dashboard 2024-11-19 19:47:00 +08:00
ning
fde6a9c75e refactor: change is_recovered type 2024-11-19 19:23:21 +08:00
Ulric Qin
a2b506e263 add input.redis in docker-compose 2024-11-19 17:28:56 +08:00
Ulric Qin
30024a4951 add redis dashboard 2024-11-19 17:26:33 +08:00
Ulric Qin
2c3996812a remove global labels: source=categraf 2024-11-19 17:24:10 +08:00
Ulric Qin
51d35900f2 add input.mysql in docker-compose/etc-categraf 2024-11-19 17:05:46 +08:00
Ulric Qin
852fd2ea6e add field is_recovered when call ibex 2024-11-19 16:49:28 +08:00
Ulric Qin
e1a57217ab update mysql dashboard 2024-11-19 11:28:22 +08:00
Ulric Qin
1e7dad1a67 Merge branch 'main' of github.com:ccfos/nightingale 2024-11-19 11:26:40 +08:00
Ulric Qin
534e40ad63 add mysql dashboard 2024-11-19 11:26:27 +08:00
CRISPpp
15daa3826c feat: add console log with n9e address and root username/passwd when init root (#2302) 2024-11-19 10:31:41 +08:00
ning
d5efb5b6d4 update go.mod 2024-11-19 10:29:39 +08:00
ning
7ebd776881 docs: update doris dashboard tpl 2024-11-18 20:03:29 +08:00
Ulric Qin
0e5cda1cee support proxy when call center 2024-11-18 16:04:15 +08:00
Ulric Qin
64dad19377 Merge branch 'main' of github.com:ccfos/nightingale 2024-11-18 16:01:12 +08:00
Ulric Qin
48f199f8f5 sender support ProxyFromEnvironment 2024-11-18 16:00:56 +08:00
Yening Qin
f7e4df7415 refactor: self monitor metric (#2285)
Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
2024-11-18 11:46:34 +08:00
ning
37fe01ab54 docs: update workflows 2024-11-15 17:45:17 +08:00
ning
cbfe661bce docs: update go version in mod 2024-11-15 17:35:54 +08:00
Yening Qin
890c12f0d4 feat: alert rule query add unit (#2299)
Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
2024-11-15 17:09:41 +08:00
Yening Qin
643c6c997c fix: proxy parse url (#2297) 2024-11-15 16:37:01 +08:00
robin
b201836b40 fix:create user info for notify_tpl (#2292) 2024-11-15 11:57:23 +08:00
robin
b5eced1540 docs: add doris template (#2260) 2024-11-14 22:50:45 +08:00
Yening Qin
a13004eab7 feat: allow override global webhook (#2257)
Co-authored-by: flashbo <36443248+lwb0214@users.noreply.github.com>
2024-11-14 22:35:23 +08:00
Yening Qin
a0c56548e5 refactor: migrate label (#2293) 2024-11-14 22:25:20 +08:00
ning
e3d97386a8 refactor: dash tpl uuid 2024-11-14 22:14:18 +08:00
ning
051b0ca045 Merge branch 'main' of github.com:ccfos/nightingale 2024-11-14 19:18:20 +08:00
ning
2941ced011 fix: import builtin board 2024-11-14 19:15:02 +08:00
Ulric Qin
97d6908edd fix mongodb dashboard 2024-11-14 18:28:52 +08:00
710leo
c7117b9461 fix: proxy api parse url 2024-11-13 23:00:49 +08:00
Yening Qin
78417b1d5b refactor: optimize rule datasource set (#2288)
Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
2024-11-13 20:28:50 +08:00
Yening Qin
79f3404810 refactor event notify (#2287) 2024-11-13 19:50:11 +08:00
ning
81e51c60eb refactor: subscribe add check 2024-11-12 20:26:56 +08:00
shardingHe
af9cd55ca5 docs: add metrics config for oracle (#2276)
Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2024-11-12 13:59:35 +08:00
710leo
d4afdb2b6e refactor: change log 2024-11-06 22:34:30 +08:00
flashbo
2befc8b0f1 refactor: migrate bg label (#2269) 2024-11-06 21:48:29 +08:00
Yening Qin
14fd2eb26d refactor: update tdengine query (#2270) 2024-11-06 20:27:21 +08:00
ning
0a938518d7 refactor: target_busi_group table name 2024-11-06 13:00:35 +08:00
ning
0eed5afa7e refactor: update target_busi_group character 2024-11-05 14:46:41 +08:00
Yening Qin
f82eaf0a1f refactor: optimize tdentine (#2262) 2024-11-04 17:33:18 +08:00
ning
f03278d68d refactor: append tags 2024-11-04 16:43:39 +08:00
shardingHe
7d1e143f60 docs: sync configurations for bind & ldap (#2253)
Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2024-11-02 16:49:49 +08:00
ning
078a0c7b1c refactor: prom query log 2024-11-01 15:28:23 +08:00
flashbo
d9cac65a18 refactor: improve prom_rule import (#2251) 2024-10-30 14:28:00 +08:00
ning
dd025ca87c refactor: migrate db and host_miss tag append 2024-10-30 14:20:16 +08:00
ning
04734b8940 Merge branch 'main' of github.com:ccfos/nightingale 2024-10-29 12:09:50 +08:00
ning
bf7bcf4196 docs: update notify tpl 2024-10-29 12:09:26 +08:00
ulricqin
16195abb89 Update docker-compose.yaml 2024-10-29 12:08:40 +08:00
ning
3f4891d65d refactor: event queue push 2024-10-28 20:51:21 +08:00
Yening Qin
102549c6a1 refactor: webhook send event (#2248)
Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
2024-10-28 20:33:29 +08:00
Yening Qin
5213b1d7f1 refactor: es update config (#2247)
Co-authored-by: flashbo <36443248+lwb0214@users.noreply.github.com>
2024-10-28 20:32:45 +08:00
Yening Qin
24de97fb1e refactor: update default engine name (#2245) 2024-10-28 15:50:52 +08:00
ning
9c2cf679e0 refactor: center set default engine_name 2024-10-28 13:37:55 +08:00
Yening Qin
2aa4941010 refactor: optimize recover notify(#2242)
Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
2024-10-25 16:53:44 +08:00
flashbo
a812f14442 refactor: record notify for callback (#2231) 2024-10-25 16:50:12 +08:00
flashbo
4fb7e8e2b5 refactor: fill group names in target (#2241) 2024-10-25 16:30:09 +08:00
ulricqin
113ad67104 Update README.md 2024-10-25 12:10:28 +08:00
flashbo
49d843540a refactor: add ExtraInfoMap in alert event (#2240) 2024-10-25 11:03:56 +08:00
Yening Qin
21f0e3310f fix: event relabel when target_label is blank (#2228)
Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
2024-10-24 14:09:41 +08:00
ulricqin
31b3434e87 Update README.md 2024-10-22 14:19:33 +08:00
ning
2576a0f815 fix: edge get all configs 2024-10-21 19:30:13 +08:00
ning
0ac4bc7421 docs: update linux dashboard tpl 2024-10-21 18:07:52 +08:00
ning
95e6ea98f4 refactor: prom client query api add retry 2024-10-21 17:57:31 +08:00
ning
dc60c74c0d docs: update automq dashboard tpl 2024-10-21 16:50:36 +08:00
ning
a15adc196d docs: update linux dashboard tpl 2024-10-21 16:35:53 +08:00
ning
f89ef04e85 refactor: optimize code robustness 2024-10-21 14:54:48 +08:00
Yening Qin
f55cd9b32e feat: config access log in web (#2227) 2024-10-21 12:11:19 +08:00
Xu Bin
305a898f8b feat: alert recover ckeck (#2226) 2024-10-21 12:07:54 +08:00
Yening Qin
60c31d8eb2 feat: support query set opration (#2225) 2024-10-20 21:18:12 +08:00
ning
7da49a8c68 refactor: update go.mod 2024-10-20 14:04:31 +08:00
flashbo
65b1410b09 refactor: support output logs to one file (#2209) 2024-10-20 14:02:44 +08:00
ning
3901671c0e docs: update n9e.sql 2024-10-18 15:24:33 +08:00
Xu Bin
9c02937e81 refactor: alert mute retain (#2223) 2024-10-18 12:08:31 +08:00
flashbo
0a255ee33a fix: unbind bgids when delete target (#2219) 2024-10-16 10:00:08 +08:00
Xu Bin
8dc198b4b1 fix: smtp update (#2213) 2024-10-12 11:37:14 +08:00
Yening Qin
9696f63a71 rename tpl name 2024-10-11 16:23:57 +08:00
130 changed files with 17260 additions and 1382 deletions

View File

@@ -5,7 +5,7 @@ on:
tags:
- 'v*'
env:
GO_VERSION: 1.18
GO_VERSION: 1.23
jobs:
goreleaser:

1
.gitignore vendored
View File

@@ -9,6 +9,7 @@
*.o
*.a
*.so
*.db
*.sw[po]
*.tar.gz
*.[568vq]

View File

@@ -38,6 +38,7 @@
- 👉 [文档中心](https://flashcat.cloud/docs/) | [下载中心](https://flashcat.cloud/download/nightingale/)
- ❤️ [报告 Bug](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=&projects=&template=question.yml)
- 为了提供更快速的访问体验,上述文档和下载站点托管于 [FlashcatCloud](https://flashcat.cloud)
- 💡 前后端代码分离,前端代码仓库:[https://github.com/n9e/fe](https://github.com/n9e/fe)
## 功能特点
@@ -90,7 +91,7 @@
- 推荐搜索关注夜莺公众号,第一时间获取社区动态:`夜莺监控Nightingale`
- 日常问题交流:
- QQ群730841964
- [加入微信群](https://download.flashcat.cloud/ulric/20241008153952.png),如果二维码过期了,可以联系我(我的微信:`picobyte`)拉群,备注: `夜莺互助群`
- [加入微信群](https://download.flashcat.cloud/ulric/20241022141621.png),如果二维码过期了,可以联系我(我的微信:`picobyte`)拉群,备注: `夜莺互助群`
## 广受关注
[![Stargazers over time](https://api.star-history.com/svg?repos=ccfos/nightingale&type=Date)](https://star-history.com/#ccfos/nightingale&Date)

View File

@@ -62,15 +62,18 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
userCache := memsto.NewUserCache(ctx, syncStats)
userGroupCache := memsto.NewUserGroupCache(ctx, syncStats)
taskTplsCache := memsto.NewTaskTplCache(ctx)
configCvalCache := memsto.NewCvalCache(ctx, syncStats)
promClients := prom.NewPromClient(ctx)
dispatch.InitRegisterQueryFunc(promClients)
tdengineClients := tdengine.NewTdengineClient(ctx, config.Alert.Heartbeat)
externalProcessors := process.NewExternalProcessors()
Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache, alertRuleCache, notifyConfigCache, taskTplsCache, dsCache, ctx, promClients, tdengineClients, userCache, userGroupCache)
r := httpx.GinEngine(config.Global.RunMode, config.HTTP)
r := httpx.GinEngine(config.Global.RunMode, config.HTTP,
configCvalCache.PrintBodyPaths, configCvalCache.PrintAccessLog)
rt := router.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors)
if config.Ibex.Enable {
@@ -100,7 +103,7 @@ func Start(alertc aconf.Alert, pushgwc pconf.Pushgw, syncStats *memsto.Stats, al
naming := naming.NewNaming(ctx, alertc.Heartbeat, alertStats)
writers := writer.NewWriters(pushgwc)
record.NewScheduler(alertc, recordingRuleCache, promClients, writers, alertStats)
record.NewScheduler(alertc, recordingRuleCache, promClients, writers, alertStats, datasourceCache)
eval.NewScheduler(alertc, externalProcessors, alertRuleCache, targetCache, targetsOfAlertRulesCache,
busiGroupCache, alertMuteCache, datasourceCache, promClients, tdendgineClients, naming, ctx, alertStats)

View File

@@ -38,7 +38,7 @@ func NewSyncStats() *Stats {
Subsystem: subsystem,
Name: "rule_eval_error_total",
Help: "Number of rule eval error.",
}, []string{"datasource", "stage"})
}, []string{"datasource", "stage", "busi_group", "rule_id"})
CounterQueryDataErrorTotal := prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,

View File

@@ -1,6 +1,7 @@
package dispatch
import (
"context"
"encoding/json"
"fmt"
"strings"
@@ -13,8 +14,10 @@ import (
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/poster"
promsdk "github.com/ccfos/nightingale/v6/pkg/prom"
"github.com/ccfos/nightingale/v6/pkg/tplx"
"github.com/ccfos/nightingale/v6/prom"
"github.com/prometheus/common/model"
"github.com/toolkits/pkg/concurrent/semaphore"
"github.com/toolkits/pkg/logger"
)
@@ -27,6 +30,18 @@ type Consumer struct {
promClients *prom.PromClientMap
}
func InitRegisterQueryFunc(promClients *prom.PromClientMap) {
tplx.RegisterQueryFunc(func(datasourceID int64, promql string) model.Value {
if promClients.IsNil(datasourceID) {
return nil
}
readerClient := promClients.GetCli(datasourceID)
value, _, _ := readerClient.Query(context.Background(), promql, time.Now())
return value
})
}
// 创建一个 Consumer 实例
func NewConsumer(alerting aconf.Alerting, ctx *ctx.Context, dispatch *Dispatch, promClients *prom.PromClientMap) *Consumer {
return &Consumer{
@@ -113,7 +128,7 @@ func (e *Consumer) persist(event *models.AlertCurEvent) {
event.Id, err = poster.PostByUrlsWithResp[int64](e.ctx, "/v1/n9e/event-persist", event)
if err != nil {
logger.Errorf("event:%+v persist err:%v", event, err)
e.dispatch.Astats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", event.DatasourceId), "persist_event").Inc()
e.dispatch.Astats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", event.DatasourceId), "persist_event", event.GroupName, fmt.Sprintf("%v", event.RuleId)).Inc()
}
return
}
@@ -121,7 +136,7 @@ func (e *Consumer) persist(event *models.AlertCurEvent) {
err := models.EventPersist(e.ctx, event)
if err != nil {
logger.Errorf("event%+v persist err:%v", event, err)
e.dispatch.Astats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", event.DatasourceId), "persist_event").Inc()
e.dispatch.Astats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", event.DatasourceId), "persist_event", event.GroupName, fmt.Sprintf("%v", event.RuleId)).Inc()
}
}
@@ -169,7 +184,7 @@ func (e *Consumer) queryRecoveryVal(event *models.AlertCurEvent) {
logger.Errorf("rule_eval:%s promql:%s, warnings:%v", getKey(event), promql, warnings)
}
anomalyPoints := common.ConvertAnomalyPoints(value)
anomalyPoints := models.ConvertAnomalyPoints(value)
if len(anomalyPoints) == 0 {
logger.Warningf("rule_eval:%s promql:%s, result is empty", getKey(event), promql)
event.AnnotationsJSON["recovery_promql_error"] = fmt.Sprintf("promql:%s error:%s", promql, "result is empty")

View File

@@ -139,6 +139,12 @@ func (e *Dispatch) HandleEventNotify(event *models.AlertCurEvent, isSubscribe bo
if rule == nil {
return
}
if e.blockEventNotify(rule, event) {
logger.Infof("block event notify: rule_id:%d event:%+v", rule.Id, event)
return
}
fillUsers(event, e.userCache, e.userGroupCache)
var (
@@ -175,6 +181,25 @@ func (e *Dispatch) HandleEventNotify(event *models.AlertCurEvent, isSubscribe bo
}
}
func (e *Dispatch) blockEventNotify(rule *models.AlertRule, event *models.AlertCurEvent) bool {
ruleType := rule.GetRuleType()
// 若为机器则先看机器是否删除
if ruleType == models.HOST {
host, ok := e.targetCache.Get(event.TagsMap["ident"])
if !ok || host == nil {
return true
}
}
// 恢复通知,检测规则配置是否改变
// if event.IsRecovered && event.RuleHash != rule.Hash() {
// return true
// }
return false
}
func (e *Dispatch) handleSubs(event *models.AlertCurEvent) {
// handle alert subscribes
subscribes := make([]*models.AlertSubscribe, 0)
@@ -266,10 +291,12 @@ func (e *Dispatch) Send(rule *models.AlertRule, event *models.AlertCurEvent, not
e.SendCallbacks(rule, notifyTarget, event)
// handle global webhooks
if e.alerting.WebhookBatchSend {
sender.BatchSendWebhooks(e.ctx, notifyTarget.ToWebhookList(), event, e.Astats)
} else {
sender.SingleSendWebhooks(e.ctx, notifyTarget.ToWebhookList(), event, e.Astats)
if !event.OverrideGlobalWebhook() {
if e.alerting.WebhookBatchSend {
sender.BatchSendWebhooks(e.ctx, notifyTarget.ToWebhookMap(), event, e.Astats)
} else {
sender.SingleSendWebhooks(e.ctx, notifyTarget.ToWebhookMap(), event, e.Astats)
}
}
// handle plugin call
@@ -283,10 +310,10 @@ func (e *Dispatch) Send(rule *models.AlertRule, event *models.AlertCurEvent, not
}
func (e *Dispatch) SendCallbacks(rule *models.AlertRule, notifyTarget *NotifyTarget, event *models.AlertCurEvent) {
uids := notifyTarget.ToUidList()
urls := notifyTarget.ToCallbackList()
whMap := notifyTarget.ToWebhookMap()
ogw := event.OverrideGlobalWebhook()
for _, urlStr := range urls {
if len(urlStr) == 0 {
continue
@@ -294,7 +321,7 @@ func (e *Dispatch) SendCallbacks(rule *models.AlertRule, notifyTarget *NotifyTar
cbCtx := sender.BuildCallBackContext(e.ctx, urlStr, rule, []*models.AlertCurEvent{event}, uids, e.userCache, e.alerting.WebhookBatchSend, e.Astats)
if wh, ok := whMap[cbCtx.CallBackURL]; ok && wh.Enable {
if wh, ok := whMap[cbCtx.CallBackURL]; !ogw && ok && wh.Enable {
logger.Debugf("SendCallbacks: webhook[%s] is in global conf.", cbCtx.CallBackURL)
continue
}

View File

@@ -76,52 +76,8 @@ func (s *NotifyTarget) ToCallbackList() []string {
return callbacks
}
func (s *NotifyTarget) ToWebhookList() []*models.Webhook {
webhooks := make([]*models.Webhook, 0, len(s.webhooks))
for _, wh := range s.webhooks {
if wh.Batch == 0 {
wh.Batch = 1000
}
if wh.Timeout == 0 {
wh.Timeout = 10
}
if wh.RetryCount == 0 {
wh.RetryCount = 10
}
if wh.RetryInterval == 0 {
wh.RetryInterval = 10
}
webhooks = append(webhooks, wh)
}
return webhooks
}
func (s *NotifyTarget) ToWebhookMap() map[string]*models.Webhook {
webhookMap := make(map[string]*models.Webhook, len(s.webhooks))
for _, wh := range s.webhooks {
if wh.Batch == 0 {
wh.Batch = 1000
}
if wh.Timeout == 0 {
wh.Timeout = 10
}
if wh.RetryCount == 0 {
wh.RetryCount = 10
}
if wh.RetryInterval == 0 {
wh.RetryInterval = 10
}
webhookMap[wh.Url] = wh
}
return webhookMap
return s.webhooks
}
func (s *NotifyTarget) ToUidList() []int64 {

View File

@@ -96,8 +96,7 @@ func (s *Scheduler) syncAlertRules() {
ruleType := rule.GetRuleType()
if rule.IsPrometheusRule() || rule.IsLokiRule() || rule.IsTdengineRule() {
datasourceIds := s.promClients.Hit(rule.DatasourceIdsJson)
datasourceIds = append(datasourceIds, s.tdengineClients.Hit(rule.DatasourceIdsJson)...)
datasourceIds := s.datasourceCache.GetIDsByDsCateAndQueries(rule.Cate, rule.DatasourceQueries)
for _, dsId := range datasourceIds {
if !naming.DatasourceHashRing.IsHit(strconv.FormatInt(dsId, 10), fmt.Sprintf("%d", rule.Id), s.aconf.Heartbeat.Endpoint) {
continue
@@ -133,7 +132,8 @@ func (s *Scheduler) syncAlertRules() {
} else {
// 如果 rule 不是通过 prometheus engine 来告警的,则创建为 externalRule
// if rule is not processed by prometheus engine, create it as externalRule
for _, dsId := range rule.DatasourceIdsJson {
dsIds := s.datasourceCache.GetIDsByDsCateAndQueries(rule.Cate, rule.DatasourceQueries)
for _, dsId := range dsIds {
ds := s.datasourceCache.GetById(dsId)
if ds == nil {
logger.Debugf("datasource %d not found", dsId)

File diff suppressed because it is too large Load Diff

View File

@@ -269,3 +269,190 @@ func allValueDeepEqual(got, want map[uint64][]uint64) bool {
}
return true
}
// allValueDeepEqualOmitOrder 判断两个字符串切片是否相等,不考虑顺序
func allValueDeepEqualOmitOrder(got, want []string) bool {
if len(got) != len(want) {
return false
}
slices.Sort(got)
slices.Sort(want)
for i := range got {
if got[i] != want[i] {
return false
}
}
return true
}
func Test_removeVal(t *testing.T) {
type args struct {
promql string
}
tests := []struct {
name string
args args
want string
}{
// TODO: Add test cases.
{
name: "removeVal1",
args: args{
promql: "mem{test1=\"$test1\",test2=\"$test2\",test3=\"$test3\"} > $val",
},
want: "mem{} > $val",
},
{
name: "removeVal2",
args: args{
promql: "mem{test1=\"test1\",test2=\"$test2\",test3=\"$test3\"} > $val",
},
want: "mem{test1=\"test1\"} > $val",
},
{
name: "removeVal3",
args: args{
promql: "mem{test1=\"$test1\",test2=\"test2\",test3=\"$test3\"} > $val",
},
want: "mem{test2=\"test2\"} > $val",
},
{
name: "removeVal4",
args: args{
promql: "mem{test1=\"$test1\",test2=\"$test2\",test3=\"test3\"} > $val",
},
want: "mem{test3=\"test3\"} > $val",
},
{
name: "removeVal5",
args: args{
promql: "mem{test1=\"$test1\",test2=\"test2\",test3=\"test3\"} > $val",
},
want: "mem{test2=\"test2\",test3=\"test3\"} > $val",
},
{
name: "removeVal6",
args: args{
promql: "mem{test1=\"test1\",test2=\"$test2\",test3=\"test3\"} > $val",
},
want: "mem{test1=\"test1\",test3=\"test3\"} > $val",
},
{
name: "removeVal7",
args: args{
promql: "mem{test1=\"test1\",test2=\"test2\",test3='$test3'} > $val",
},
want: "mem{test1=\"test1\",test2=\"test2\"} > $val",
},
{
name: "removeVal8",
args: args{
promql: "mem{test1=\"test1\",test2=\"test2\",test3=\"test3\"} > $val",
},
want: "mem{test1=\"test1\",test2=\"test2\",test3=\"test3\"} > $val",
},
{
name: "removeVal9",
args: args{
promql: "mem{test1=\"$test1\",test2=\"test2\"} > $val1 and mem{test3=\"test3\",test4=\"test4\"} > $val2",
},
want: "mem{test2=\"test2\"} > $val1 and mem{test3=\"test3\",test4=\"test4\"} > $val2",
},
{
name: "removeVal10",
args: args{
promql: "mem{test1=\"test1\",test2='$test2'} > $val1 and mem{test3=\"test3\",test4=\"test4\"} > $val2",
},
want: "mem{test1=\"test1\"} > $val1 and mem{test3=\"test3\",test4=\"test4\"} > $val2",
},
{
name: "removeVal11",
args: args{
promql: "mem{test1='test1',test2=\"test2\"} > $val1 and mem{test3=\"$test3\",test4=\"test4\"} > $val2",
},
want: "mem{test1='test1',test2=\"test2\"} > $val1 and mem{test4=\"test4\"} > $val2",
},
{
name: "removeVal12",
args: args{
promql: "mem{test1=\"test1\",test2=\"test2\"} > $val1 and mem{test3=\"test3\",test4=\"$test4\"} > $val2",
},
want: "mem{test1=\"test1\",test2=\"test2\"} > $val1 and mem{test3=\"test3\"} > $val2",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := removeVal(tt.args.promql); got != tt.want {
t.Errorf("removeVal() = %v, want %v", got, tt.want)
}
})
}
}
func TestExtractVarMapping(t *testing.T) {
tests := []struct {
name string
promql string
want map[string]string
}{
{
name: "单个花括号单个变量",
promql: `mem_used_percent{host="$my_host"} > $val`,
want: map[string]string{"my_host": "host"},
},
{
name: "单个花括号多个变量",
promql: `mem_used_percent{host="$my_host",region="$region",env="prod"} > $val`,
want: map[string]string{"my_host": "host", "region": "region"},
},
{
name: "多个花括号多个变量",
promql: `sum(rate(mem_used_percent{host="$my_host"})) by (instance) + avg(node_load1{region="$region"}) > $val`,
want: map[string]string{"my_host": "host", "region": "region"},
},
{
name: "相同变量出现多次",
promql: `sum(rate(mem_used_percent{host="$my_host"})) + avg(node_load1{host="$my_host"}) > $val`,
want: map[string]string{"my_host": "host"},
},
{
name: "没有变量",
promql: `mem_used_percent{host="localhost",region="cn"} > 80`,
want: map[string]string{},
},
{
name: "没有花括号",
promql: `80 > $val`,
want: map[string]string{},
},
{
name: "格式不规范的标签",
promql: `mem_used_percent{host=$my_host,region = $region} > $val`,
want: map[string]string{"my_host": "host", "region": "region"},
},
{
name: "空花括号",
promql: `mem_used_percent{} > $val`,
want: map[string]string{},
},
{
name: "不完整的花括号",
promql: `mem_used_percent{host="$my_host"`,
want: map[string]string{},
},
{
name: "复杂表达式",
promql: `sum(rate(http_requests_total{handler="$handler",code="$code"}[5m])) by (handler) / sum(rate(http_requests_total{handler="$handler"}[5m])) by (handler) * 100 > $threshold`,
want: map[string]string{"handler": "handler", "code": "code"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := ExtractVarMapping(tt.promql)
if !reflect.DeepEqual(got, tt.want) {
t.Errorf("ExtractVarMapping() = %v, want %v", got, tt.want)
}
})
}
}

View File

@@ -21,6 +21,7 @@ import (
"github.com/ccfos/nightingale/v6/pushgw/writer"
"github.com/prometheus/prometheus/prompb"
"github.com/robfig/cron/v3"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/str"
)
@@ -78,6 +79,9 @@ type Processor struct {
HandleFireEventHook HandleEventFunc
HandleRecoverEventHook HandleEventFunc
EventMuteHook EventMuteHookFunc
ScheduleEntry cron.Entry
PromEvalInterval int
}
func (p *Processor) Key() string {
@@ -89,9 +93,9 @@ func (p *Processor) DatasourceId() int64 {
}
func (p *Processor) Hash() string {
return str.MD5(fmt.Sprintf("%d_%d_%s_%d",
return str.MD5(fmt.Sprintf("%d_%s_%s_%d",
p.rule.Id,
p.rule.PromEvalInterval,
p.rule.CronPattern,
p.rule.RuleConfig,
p.datasourceId,
))
@@ -126,7 +130,7 @@ func NewProcessor(engineName string, rule *models.AlertRule, datasourceId int64,
return p
}
func (p *Processor) Handle(anomalyPoints []common.AnomalyPoint, from string, inhibit bool) {
func (p *Processor) Handle(anomalyPoints []models.AnomalyPoint, from string, inhibit bool) {
// 有可能rule的一些配置已经发生变化比如告警接收人、callbacks等
// 这些信息的修改是不会引起worker restart的但是确实会影响告警处理逻辑
// 所以这里直接从memsto.AlertRuleCache中获取并覆盖
@@ -134,10 +138,13 @@ func (p *Processor) Handle(anomalyPoints []common.AnomalyPoint, from string, inh
cachedRule := p.alertRuleCache.Get(p.rule.Id)
if cachedRule == nil {
logger.Errorf("rule not found %+v", anomalyPoints)
p.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", p.DatasourceId()), "handle_event").Inc()
p.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", p.DatasourceId()), "handle_event", p.BusiGroupCache.GetNameByBusiGroupId(p.rule.GroupId), fmt.Sprintf("%v", p.rule.Id)).Inc()
return
}
// 在 rule 变化之前取到 ruleHash
ruleHash := p.rule.Hash()
p.rule = cachedRule
now := time.Now().Unix()
alertingKeys := map[string]struct{}{}
@@ -145,7 +152,7 @@ func (p *Processor) Handle(anomalyPoints []common.AnomalyPoint, from string, inh
// 根据 event 的 tag 将 events 分组,处理告警抑制的情况
eventsMap := make(map[string][]*models.AlertCurEvent)
for _, anomalyPoint := range anomalyPoints {
event := p.BuildEvent(anomalyPoint, from, now)
event := p.BuildEvent(anomalyPoint, from, now, ruleHash)
// 如果 event 被 mute 了,本质也是 fire 的状态,这里无论如何都添加到 alertingKeys 中,防止 fire 的事件自动恢复了
hash := event.Hash
alertingKeys[hash] = struct{}{}
@@ -170,10 +177,12 @@ func (p *Processor) Handle(anomalyPoints []common.AnomalyPoint, from string, inh
p.handleEvent(events)
}
p.HandleRecover(alertingKeys, now, inhibit)
if from == "inner" {
p.HandleRecover(alertingKeys, now, inhibit)
}
}
func (p *Processor) BuildEvent(anomalyPoint common.AnomalyPoint, from string, now int64) *models.AlertCurEvent {
func (p *Processor) BuildEvent(anomalyPoint models.AnomalyPoint, from string, now int64, ruleHash string) *models.AlertCurEvent {
p.fillTags(anomalyPoint)
p.mayHandleIdent()
hash := Hash(p.rule.Id, p.datasourceId, anomalyPoint)
@@ -199,6 +208,7 @@ func (p *Processor) BuildEvent(anomalyPoint common.AnomalyPoint, from string, no
event.TargetNote = p.targetNote
event.TriggerValue = anomalyPoint.ReadableValue()
event.TriggerValues = anomalyPoint.Values
event.TriggerValuesJson = models.EventTriggerValues{ValuesWithUnit: anomalyPoint.ValuesUnit}
event.TagsJSON = p.tagsArr
event.Tags = strings.Join(p.tagsArr, ",,")
event.IsRecovered = false
@@ -211,9 +221,12 @@ func (p *Processor) BuildEvent(anomalyPoint common.AnomalyPoint, from string, no
event.Severity = anomalyPoint.Severity
event.ExtraConfig = p.rule.ExtraConfigJSON
event.PromQl = anomalyPoint.Query
event.RecoverConfig = anomalyPoint.RecoverConfig
event.RuleHash = ruleHash
if p.target != "" {
if pt, exist := p.TargetCache.Get(p.target); exist {
pt.GroupNames = p.BusiGroupCache.GetNamesByBusiGroupIds(pt.GroupIds)
event.Target = pt
} else {
logger.Infof("Target[ident: %s] doesn't exist in cache.", p.target)
@@ -290,7 +303,7 @@ func (p *Processor) HandleRecover(alertingKeys map[string]struct{}, now int64, i
}
hashArr := make([]string, 0, len(alertingKeys))
for hash := range p.fires.GetAll() {
for hash, _ := range p.fires.GetAll() {
if _, has := alertingKeys[hash]; has {
continue
}
@@ -309,7 +322,7 @@ func (p *Processor) HandleRecoverEvent(hashArr []string, now int64, inhibit bool
if !inhibit {
for _, hash := range hashArr {
p.RecoverSingle(hash, now, nil)
p.RecoverSingle(false, hash, now, nil)
}
return
}
@@ -337,11 +350,11 @@ func (p *Processor) HandleRecoverEvent(hashArr []string, now int64, inhibit bool
}
for _, event := range eventMap {
p.RecoverSingle(event.Hash, now, nil)
p.RecoverSingle(false, event.Hash, now, nil)
}
}
func (p *Processor) RecoverSingle(hash string, now int64, value *string, values ...string) {
func (p *Processor) RecoverSingle(byRecover bool, hash string, now int64, value *string, values ...string) {
cachedRule := p.rule
if cachedRule == nil {
return
@@ -367,6 +380,12 @@ func (p *Processor) RecoverSingle(hash string, now int64, value *string, values
}
}
// 如果设置了恢复条件,则不能在此处恢复,必须依靠 recoverPoint 来恢复
if event.RecoverConfig.JudgeType != models.Origin && !byRecover {
logger.Debugf("rule_eval:%s event:%v not recover", p.Key(), event)
return
}
if value != nil {
event.TriggerValue = *value
if len(values) > 0 {
@@ -405,6 +424,7 @@ func (p *Processor) handleEvent(events []*models.AlertCurEvent) {
p.pendingsUseByRecover.Set(event.Hash, event)
}
event.PromEvalInterval = p.PromEvalInterval
if p.rule.PromForDuration == 0 {
fireEvents = append(fireEvents, event)
if severity > event.Severity {
@@ -499,7 +519,7 @@ func (p *Processor) pushEventToQueue(e *models.AlertCurEvent) {
dispatch.LogEvent(e, "push_queue")
if !queue.EventQueue.PushFront(e) {
logger.Warningf("event_push_queue: queue is full, event:%+v", e)
p.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", p.DatasourceId()), "push_event_queue").Inc()
p.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", p.DatasourceId()), "push_event_queue", p.BusiGroupCache.GetNameByBusiGroupId(p.rule.GroupId), fmt.Sprintf("%v", p.rule.Id)).Inc()
}
}
@@ -510,7 +530,7 @@ func (p *Processor) RecoverAlertCurEventFromDb() {
curEvents, err := models.AlertCurEventGetByRuleIdAndDsId(p.ctx, p.rule.Id, p.datasourceId)
if err != nil {
logger.Errorf("recover event from db for rule:%s failed, err:%s", p.Key(), err)
p.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", p.DatasourceId()), "get_recover_event").Inc()
p.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", p.DatasourceId()), "get_recover_event", p.BusiGroupCache.GetNameByBusiGroupId(p.rule.GroupId), fmt.Sprintf("%v", p.rule.Id)).Inc()
p.fires = NewAlertCurEventMap(nil)
return
}
@@ -529,6 +549,7 @@ func (p *Processor) RecoverAlertCurEventFromDb() {
event.DB2Mem()
target, exists := p.TargetCache.Get(event.TargetIdent)
if exists {
target.GroupNames = p.BusiGroupCache.GetNamesByBusiGroupIds(target.GroupIds)
event.Target = target
}
@@ -543,7 +564,7 @@ func (p *Processor) RecoverAlertCurEventFromDb() {
p.pendingsUseByRecover = NewAlertCurEventMap(pendingsUseByRecoverMap)
}
func (p *Processor) fillTags(anomalyPoint common.AnomalyPoint) {
func (p *Processor) fillTags(anomalyPoint models.AnomalyPoint) {
// handle series tags
tagsMap := make(map[string]string)
for label, value := range anomalyPoint.Labels {
@@ -633,10 +654,10 @@ func labelMapToArr(m map[string]string) []string {
return labelStrings
}
func Hash(ruleId, datasourceId int64, vector common.AnomalyPoint) string {
func Hash(ruleId, datasourceId int64, vector models.AnomalyPoint) string {
return str.MD5(fmt.Sprintf("%d_%s_%d_%d_%s", ruleId, vector.Labels.String(), datasourceId, vector.Severity, vector.Query))
}
func TagHash(vector common.AnomalyPoint) string {
func TagHash(vector models.AnomalyPoint) string {
return str.MD5(vector.Labels.String())
}

View File

@@ -26,9 +26,11 @@ type Scheduler struct {
writers *writer.WritersType
stats *astats.Stats
datasourceCache *memsto.DatasourceCacheType
}
func NewScheduler(aconf aconf.Alert, rrc *memsto.RecordingRuleCacheType, promClients *prom.PromClientMap, writers *writer.WritersType, stats *astats.Stats) *Scheduler {
func NewScheduler(aconf aconf.Alert, rrc *memsto.RecordingRuleCacheType, promClients *prom.PromClientMap, writers *writer.WritersType, stats *astats.Stats, datasourceCache *memsto.DatasourceCacheType) *Scheduler {
scheduler := &Scheduler{
aconf: aconf,
recordRules: make(map[string]*RecordRuleContext),
@@ -39,6 +41,8 @@ func NewScheduler(aconf aconf.Alert, rrc *memsto.RecordingRuleCacheType, promCli
writers: writers,
stats: stats,
datasourceCache: datasourceCache,
}
go scheduler.LoopSyncRules(context.Background())
@@ -67,7 +71,7 @@ func (s *Scheduler) syncRecordRules() {
continue
}
datasourceIds := s.promClients.Hit(rule.DatasourceIdsJson)
datasourceIds := s.datasourceCache.GetIDsByDsCateAndQueries("prometheus", rule.DatasourceQueries)
for _, dsId := range datasourceIds {
if !naming.DatasourceHashRing.IsHit(strconv.FormatInt(dsId, 10), fmt.Sprintf("%d", rule.Id), s.aconf.Heartbeat.Endpoint) {
continue

View File

@@ -6,7 +6,6 @@ import (
"strings"
"time"
"github.com/ccfos/nightingale/v6/alert/common"
"github.com/ccfos/nightingale/v6/alert/dispatch"
"github.com/ccfos/nightingale/v6/alert/mute"
"github.com/ccfos/nightingale/v6/alert/naming"
@@ -92,7 +91,7 @@ func (rt *Router) eventPersist(c *gin.Context) {
type eventForm struct {
Alert bool `json:"alert"`
AnomalyPoints []common.AnomalyPoint `json:"vectors"`
AnomalyPoints []models.AnomalyPoint `json:"vectors"`
RuleId int64 `json:"rule_id"`
DatasourceId int64 `json:"datasource_id"`
Inhibit bool `json:"inhibit"`
@@ -129,7 +128,7 @@ func (rt *Router) makeEvent(c *gin.Context) {
} else {
for _, vector := range events[i].AnomalyPoints {
readableString := vector.ReadableValue()
go ruleWorker.RecoverSingle(process.Hash(events[i].RuleId, events[i].DatasourceId, vector), vector.Timestamp, &readableString)
go ruleWorker.RecoverSingle(false, process.Hash(events[i].RuleId, events[i].DatasourceId, vector), vector.Timestamp, &readableString)
}
}
}

View File

@@ -129,43 +129,39 @@ func (c *DefaultCallBacker) CallBack(ctx CallBackContext) {
return
}
ctx.Stats.AlertNotifyTotal.WithLabelValues("rule_callback").Inc()
resp, code, err := poster.PostJSON(ctx.CallBackURL, 5*time.Second, event, 3)
if err != nil {
logger.Errorf("event_callback_fail(rule_id=%d url=%s), event:%+v, resp: %s, err: %v, code: %d",
event.RuleId, ctx.CallBackURL, event, string(resp), err, code)
ctx.Stats.AlertNotifyErrorTotal.WithLabelValues("rule_callback").Inc()
} else {
logger.Infof("event_callback_succ(rule_id=%d url=%s), event:%+v, resp: %s, code: %d",
event.RuleId, ctx.CallBackURL, event, string(resp), code)
}
doSendAndRecord(ctx.Ctx, ctx.CallBackURL, ctx.CallBackURL, event, "callback", ctx.Stats, ctx.Events)
}
func doSendAndRecord(ctx *ctx.Context, url, token string, body interface{}, channel string,
stats *astats.Stats, event *models.AlertCurEvent) {
stats *astats.Stats, events []*models.AlertCurEvent) {
res, err := doSend(url, body, channel, stats)
NotifyRecord(ctx, event, channel, token, res, err)
NotifyRecord(ctx, events, channel, token, res, err)
}
func NotifyRecord(ctx *ctx.Context, evt *models.AlertCurEvent, channel, target, res string, err error) {
noti := models.NewNotificationRecord(evt, channel, target)
if err != nil {
noti.SetStatus(models.NotiStatusFailure)
noti.SetDetails(err.Error())
} else if res != "" {
noti.SetDetails(string(res))
func NotifyRecord(ctx *ctx.Context, evts []*models.AlertCurEvent, channel, target, res string, err error) {
// 一个通知可能对应多个 event都需要记录
notis := make([]*models.NotificaitonRecord, 0, len(evts))
for _, evt := range evts {
noti := models.NewNotificationRecord(evt, channel, target)
if err != nil {
noti.SetStatus(models.NotiStatusFailure)
noti.SetDetails(err.Error())
} else if res != "" {
noti.SetDetails(string(res))
}
notis = append(notis, noti)
}
if !ctx.IsCenter {
_, err := poster.PostByUrlsWithResp[int64](ctx, "/v1/n9e/notify-record", noti)
_, err := poster.PostByUrlsWithResp[[]int64](ctx, "/v1/n9e/notify-record", notis)
if err != nil {
logger.Errorf("add noti:%v failed, err: %v", noti, err)
logger.Errorf("add notis:%v failed, err: %v", notis, err)
}
return
}
if err := noti.Add(ctx); err != nil {
logger.Errorf("add noti:%v failed, err: %v", noti, err)
if err := models.DB(ctx).CreateInBatches(notis, 100).Error; err != nil {
logger.Errorf("add notis:%v failed, err: %v", notis, err)
}
}
@@ -195,8 +191,8 @@ func PushCallbackEvent(ctx *ctx.Context, webhook *models.Webhook, event *models.
if queue == nil {
queue = &WebhookQueue{
list: NewSafeListLimited(QueueMaxSize),
closeCh: make(chan struct{}),
eventQueue: NewSafeEventQueue(QueueMaxSize),
closeCh: make(chan struct{}),
}
CallbackEventQueueLock.Lock()
@@ -206,8 +202,8 @@ func PushCallbackEvent(ctx *ctx.Context, webhook *models.Webhook, event *models.
StartConsumer(ctx, queue, webhook.Batch, webhook, stats)
}
succ := queue.list.PushFront(event)
succ := queue.eventQueue.Push(event)
if !succ {
logger.Warningf("Write channel(%s) full, current channel size: %d event:%v", webhook.Url, queue.list.Len(), event)
logger.Warningf("Write channel(%s) full, current channel size: %d event:%v", webhook.Url, queue.eventQueue.Len(), event)
}
}

View File

@@ -67,7 +67,7 @@ func (ds *DingtalkSender) Send(ctx MessageContext) {
}
}
doSendAndRecord(ctx.Ctx, url, tokens[i], body, models.Dingtalk, ctx.Stats, ctx.Events[0])
doSendAndRecord(ctx.Ctx, url, tokens[i], body, models.Dingtalk, ctx.Stats, ctx.Events)
}
}
@@ -97,10 +97,7 @@ func (ds *DingtalkSender) CallBack(ctx CallBackContext) {
body.Markdown.Text = message
}
doSendAndRecord(ctx.Ctx, ctx.CallBackURL, ctx.CallBackURL, body,
"callback", ctx.Stats, ctx.Events[0])
ctx.Stats.AlertNotifyTotal.WithLabelValues("rule_callback").Inc()
doSendAndRecord(ctx.Ctx, ctx.CallBackURL, ctx.CallBackURL, body, "callback", ctx.Stats, ctx.Events)
}
// extract urls and ats from Users

View File

@@ -25,8 +25,8 @@ type EmailSender struct {
}
type EmailContext struct {
event *models.AlertCurEvent
mail *gomail.Message
events []*models.AlertCurEvent
mail *gomail.Message
}
func (es *EmailSender) Send(ctx MessageContext) {
@@ -42,7 +42,7 @@ func (es *EmailSender) Send(ctx MessageContext) {
subject = ctx.Events[0].RuleName
}
content := BuildTplMessage(models.Email, es.contentTpl, ctx.Events)
es.WriteEmail(subject, content, tos, ctx.Events[0])
es.WriteEmail(subject, content, tos, ctx.Events)
ctx.Stats.AlertNotifyTotal.WithLabelValues(models.Email).Add(float64(len(tos)))
}
@@ -79,8 +79,7 @@ func SendEmail(subject, content string, tos []string, stmp aconf.SMTPConfig) err
return nil
}
func (es *EmailSender) WriteEmail(subject, content string, tos []string,
event *models.AlertCurEvent) {
func (es *EmailSender) WriteEmail(subject, content string, tos []string, events []*models.AlertCurEvent) {
m := gomail.NewMessage()
m.SetHeader("From", es.smtp.From)
@@ -88,7 +87,7 @@ func (es *EmailSender) WriteEmail(subject, content string, tos []string,
m.SetHeader("Subject", subject)
m.SetBody("text/html", content)
mailch <- &EmailContext{event, m}
mailch <- &EmailContext{events, m}
}
func dialSmtp(d *gomail.Dialer) gomail.SendCloser {
@@ -123,7 +122,7 @@ func InitEmailSender(ctx *ctx.Context, ncc *memsto.NotifyConfigCacheType) {
mailch = make(chan *EmailContext, 100000)
go updateSmtp(ctx, ncc)
smtpConfig = ncc.GetSMTP()
startEmailSender(ctx, smtpConfig)
go startEmailSender(ctx, smtpConfig)
}
func updateSmtp(ctx *ctx.Context, ncc *memsto.NotifyConfigCacheType) {
@@ -143,6 +142,7 @@ func startEmailSender(ctx *ctx.Context, smtp aconf.SMTPConfig) {
conf := smtp
if conf.Host == "" || conf.Port == 0 {
logger.Warning("SMTP configurations invalid")
<-mailQuit
return
}
logger.Infof("start email sender... conf.Host:%+v,conf.Port:%+v", conf.Host, conf.Port)
@@ -201,7 +201,11 @@ func startEmailSender(ctx *ctx.Context, smtp aconf.SMTPConfig) {
}
for _, to := range m.mail.GetHeader("To") {
NotifyRecord(ctx, m.event, models.Email, to, "", err)
msg := ""
if err == nil {
msg = "ok"
}
NotifyRecord(ctx, m.events, models.Email, to, msg, err)
}
size++

View File

@@ -54,9 +54,7 @@ func (fs *FeishuSender) CallBack(ctx CallBackContext) {
},
}
doSendAndRecord(ctx.Ctx, ctx.CallBackURL, ctx.CallBackURL, body, "callback",
ctx.Stats, ctx.Events[0])
ctx.Stats.AlertNotifyTotal.WithLabelValues("rule_callback").Inc()
doSendAndRecord(ctx.Ctx, ctx.CallBackURL, ctx.CallBackURL, body, "callback", ctx.Stats, ctx.Events)
}
func (fs *FeishuSender) Send(ctx MessageContext) {
@@ -78,7 +76,7 @@ func (fs *FeishuSender) Send(ctx MessageContext) {
IsAtAll: false,
}
}
doSendAndRecord(ctx.Ctx, url, tokens[i], body, models.Feishu, ctx.Stats, ctx.Events[0])
doSendAndRecord(ctx.Ctx, url, tokens[i], body, models.Feishu, ctx.Stats, ctx.Events)
}
}

View File

@@ -135,8 +135,7 @@ func (fs *FeishuCardSender) CallBack(ctx CallBackContext) {
}
parsedURL.RawQuery = ""
doSendAndRecord(ctx.Ctx, parsedURL.String(), parsedURL.String(), body, "callback",
ctx.Stats, ctx.Events[0])
doSendAndRecord(ctx.Ctx, parsedURL.String(), parsedURL.String(), body, "callback", ctx.Stats, ctx.Events)
}
func (fs *FeishuCardSender) Send(ctx MessageContext) {
@@ -160,8 +159,7 @@ func (fs *FeishuCardSender) Send(ctx MessageContext) {
body.Card.Elements[0].Text.Content = message
body.Card.Elements[2].Elements[0].Content = SendTitle
for i, url := range urls {
doSendAndRecord(ctx.Ctx, url, tokens[i], body, models.FeishuCard,
ctx.Stats, ctx.Events[0])
doSendAndRecord(ctx.Ctx, url, tokens[i], body, models.FeishuCard, ctx.Stats, ctx.Events)
}
}

View File

@@ -118,6 +118,7 @@ func CallIbex(ctx *ctx.Context, id int64, host string,
// 附加告警级别 告警触发值标签
tagsMap["alert_severity"] = strconv.Itoa(event.Severity)
tagsMap["alert_trigger_value"] = event.TriggerValue
tagsMap["is_recovered"] = strconv.FormatBool(event.IsRecovered)
tags, err := json.Marshal(tagsMap)
if err != nil {
@@ -185,6 +186,11 @@ func canDoIbex(username string, tpl *models.TaskTpl, host string, targetCache *m
}
func TaskAdd(f models.TaskForm, authUser string, isCenter bool) (int64, error) {
if storage.Cache == nil {
logger.Warning("event_callback_ibex: redis cache is nil")
return 0, fmt.Errorf("redis cache is nil")
}
hosts := cleanHosts(f.Hosts)
if len(hosts) == 0 {
return 0, fmt.Errorf("arg(hosts) empty")

View File

@@ -27,30 +27,29 @@ func (lk *LarkSender) CallBack(ctx CallBackContext) {
},
}
doSendAndRecord(ctx.Ctx, ctx.CallBackURL, ctx.CallBackURL, body, "callback",
ctx.Stats, ctx.Events[0])
ctx.Stats.AlertNotifyTotal.WithLabelValues("rule_callback").Inc()
doSendAndRecord(ctx.Ctx, ctx.CallBackURL, ctx.CallBackURL, body, "callback", ctx.Stats, ctx.Events)
}
func (lk *LarkSender) Send(ctx MessageContext) {
if len(ctx.Users) == 0 || len(ctx.Events) == 0 {
return
}
urls := lk.extract(ctx.Users)
urls, tokens := lk.extract(ctx.Users)
message := BuildTplMessage(models.Lark, lk.tpl, ctx.Events)
for _, url := range urls {
for i, url := range urls {
body := feishu{
Msgtype: "text",
Content: feishuContent{
Text: message,
},
}
doSend(url, body, models.Lark, ctx.Stats)
doSendAndRecord(ctx.Ctx, url, tokens[i], body, models.Lark, ctx.Stats, ctx.Events)
}
}
func (lk *LarkSender) extract(users []*models.User) []string {
func (lk *LarkSender) extract(users []*models.User) ([]string, []string) {
urls := make([]string, 0, len(users))
tokens := make([]string, 0, len(users))
for _, user := range users {
if token, has := user.ExtractToken(models.Lark); has {
@@ -59,7 +58,8 @@ func (lk *LarkSender) extract(users []*models.User) []string {
url = "https://open.larksuite.com/open-apis/bot/v2/hook/" + token
}
urls = append(urls, url)
tokens = append(tokens, token)
}
}
return urls
return urls, tokens
}

View File

@@ -56,15 +56,14 @@ func (fs *LarkCardSender) CallBack(ctx CallBackContext) {
}
parsedURL.RawQuery = ""
doSendAndRecord(ctx.Ctx, ctx.CallBackURL, ctx.CallBackURL, body, "callback",
ctx.Stats, ctx.Events[0])
doSendAndRecord(ctx.Ctx, ctx.CallBackURL, ctx.CallBackURL, body, "callback", ctx.Stats, ctx.Events)
}
func (fs *LarkCardSender) Send(ctx MessageContext) {
if len(ctx.Users) == 0 || len(ctx.Events) == 0 {
return
}
urls, _ := fs.extract(ctx.Users)
urls, tokens := fs.extract(ctx.Users)
message := BuildTplMessage(models.LarkCard, fs.tpl, ctx.Events)
color := "red"
lowerUnicode := strings.ToLower(message)
@@ -80,14 +79,14 @@ func (fs *LarkCardSender) Send(ctx MessageContext) {
body.Card.Header.Template = color
body.Card.Elements[0].Text.Content = message
body.Card.Elements[2].Elements[0].Content = SendTitle
for _, url := range urls {
doSend(url, body, models.LarkCard, ctx.Stats)
for i, url := range urls {
doSendAndRecord(ctx.Ctx, url, tokens[i], body, models.LarkCard, ctx.Stats, ctx.Events)
}
}
func (fs *LarkCardSender) extract(users []*models.User) ([]string, []string) {
urls := make([]string, 0, len(users))
ats := make([]string, 0)
tokens := make([]string, 0)
for i := range users {
if token, has := users[i].ExtractToken(models.Lark); has {
url := token
@@ -95,7 +94,8 @@ func (fs *LarkCardSender) extract(users []*models.User) ([]string, []string) {
url = "https://open.larksuite.com/open-apis/bot/v2/hook/" + strings.TrimSpace(token)
}
urls = append(urls, url)
tokens = append(tokens, token)
}
}
return urls, ats
return urls, tokens
}

View File

@@ -43,7 +43,7 @@ func (ms *MmSender) Send(ctx MessageContext) {
Text: message,
Tokens: urls,
Stats: ctx.Stats,
}, ctx.Events[0])
}, ctx.Events, models.Mm)
}
func (ms *MmSender) CallBack(ctx CallBackContext) {
@@ -56,9 +56,7 @@ func (ms *MmSender) CallBack(ctx CallBackContext) {
Text: message,
Tokens: []string{ctx.CallBackURL},
Stats: ctx.Stats,
}, ctx.Events[0])
ctx.Stats.AlertNotifyTotal.WithLabelValues("rule_callback").Inc()
}, ctx.Events, "callback")
}
func (ms *MmSender) extract(users []*models.User) []string {
@@ -71,11 +69,12 @@ func (ms *MmSender) extract(users []*models.User) []string {
return tokens
}
func SendMM(ctx *ctx.Context, message MatterMostMessage, event *models.AlertCurEvent) {
func SendMM(ctx *ctx.Context, message MatterMostMessage, events []*models.AlertCurEvent, channel string) {
for i := 0; i < len(message.Tokens); i++ {
u, err := url.Parse(message.Tokens[i])
if err != nil {
logger.Errorf("mm_sender: failed to parse error=%v", err)
NotifyRecord(ctx, events, channel, message.Tokens[i], "", err)
continue
}
@@ -104,7 +103,7 @@ func SendMM(ctx *ctx.Context, message MatterMostMessage, event *models.AlertCurE
Username: username,
Text: txt + message.Text,
}
doSendAndRecord(ctx, ur, message.Tokens[i], body, models.Mm, message.Stats, event)
doSendAndRecord(ctx, ur, message.Tokens[i], body, channel, message.Stats, events)
}
}
}

View File

@@ -85,7 +85,7 @@ func alertingCallScript(ctx *ctx.Context, stdinBytes []byte, notifyScript models
}
err, isTimeout := sys.WrapTimeout(cmd, time.Duration(config.Timeout)*time.Second)
NotifyRecord(ctx, event, channel, cmd.String(), "", buildErr(err, isTimeout))
NotifyRecord(ctx, []*models.AlertCurEvent{event}, channel, cmd.String(), "", buildErr(err, isTimeout))
if isTimeout {
if err == nil {

View File

@@ -1,6 +1,7 @@
package sender
import (
"errors"
"html/template"
"strings"
@@ -40,9 +41,7 @@ func (ts *TelegramSender) CallBack(ctx CallBackContext) {
Text: message,
Tokens: []string{ctx.CallBackURL},
Stats: ctx.Stats,
}, ctx.Events[0])
ctx.Stats.AlertNotifyTotal.WithLabelValues("rule_callback").Inc()
}, ctx.Events, "callback")
}
func (ts *TelegramSender) Send(ctx MessageContext) {
@@ -56,7 +55,7 @@ func (ts *TelegramSender) Send(ctx MessageContext) {
Text: message,
Tokens: tokens,
Stats: ctx.Stats,
}, ctx.Events[0])
}, ctx.Events, models.Telegram)
}
func (ts *TelegramSender) extract(users []*models.User) []string {
@@ -69,10 +68,11 @@ func (ts *TelegramSender) extract(users []*models.User) []string {
return tokens
}
func SendTelegram(ctx *ctx.Context, message TelegramMessage, event *models.AlertCurEvent) {
func SendTelegram(ctx *ctx.Context, message TelegramMessage, events []*models.AlertCurEvent, channel string) {
for i := 0; i < len(message.Tokens); i++ {
if !strings.Contains(message.Tokens[i], "/") && !strings.HasPrefix(message.Tokens[i], "https://") {
logger.Errorf("telegram_sender: result=fail invalid token=%s", message.Tokens[i])
NotifyRecord(ctx, events, channel, message.Tokens[i], "", errors.New("invalid token"))
continue
}
var url string
@@ -93,6 +93,6 @@ func SendTelegram(ctx *ctx.Context, message TelegramMessage, event *models.Alert
Text: message.Text,
}
doSendAndRecord(ctx, url, message.Tokens[i], body, models.Telegram, message.Stats, event)
doSendAndRecord(ctx, url, message.Tokens[i], body, channel, message.Stats, events)
}
}

View File

@@ -59,17 +59,21 @@ func sendWebhook(webhook *models.Webhook, event interface{}, stats *astats.Stats
if webhook != nil {
insecureSkipVerify = webhook.SkipVerify
}
client := http.Client{
Timeout: time.Duration(conf.Timeout) * time.Second,
Transport: &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: insecureSkipVerify},
},
if conf.Client == nil {
logger.Warningf("event_%s, event:%s, url: [%s], error: [%s]", channel, string(bs), conf.Url, "client is nil")
conf.Client = &http.Client{
Timeout: time.Duration(conf.Timeout) * time.Second,
Transport: &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: insecureSkipVerify},
},
}
}
stats.AlertNotifyTotal.WithLabelValues(channel).Inc()
var resp *http.Response
var body []byte
resp, err = client.Do(req)
resp, err = conf.Client.Do(req)
if err != nil {
stats.AlertNotifyErrorTotal.WithLabelValues(channel).Inc()
@@ -91,12 +95,12 @@ func sendWebhook(webhook *models.Webhook, event interface{}, stats *astats.Stats
return false, string(body), nil
}
func SingleSendWebhooks(ctx *ctx.Context, webhooks []*models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
func SingleSendWebhooks(ctx *ctx.Context, webhooks map[string]*models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
for _, conf := range webhooks {
retryCount := 0
for retryCount < 3 {
needRetry, res, err := sendWebhook(conf, event, stats)
NotifyRecord(ctx, event, "webhook", conf.Url, res, err)
NotifyRecord(ctx, []*models.AlertCurEvent{event}, "webhook", conf.Url, res, err)
if !needRetry {
break
}
@@ -106,7 +110,7 @@ func SingleSendWebhooks(ctx *ctx.Context, webhooks []*models.Webhook, event *mod
}
}
func BatchSendWebhooks(ctx *ctx.Context, webhooks []*models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
func BatchSendWebhooks(ctx *ctx.Context, webhooks map[string]*models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
for _, conf := range webhooks {
logger.Infof("push event:%+v to queue:%v", event, conf)
PushEvent(ctx, conf, event, stats)
@@ -121,8 +125,8 @@ var EventQueueLock sync.RWMutex
const QueueMaxSize = 100000
type WebhookQueue struct {
list *SafeListLimited
closeCh chan struct{}
eventQueue *SafeEventQueue
closeCh chan struct{}
}
func PushEvent(ctx *ctx.Context, webhook *models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
@@ -132,8 +136,8 @@ func PushEvent(ctx *ctx.Context, webhook *models.Webhook, event *models.AlertCur
if queue == nil {
queue = &WebhookQueue{
list: NewSafeListLimited(QueueMaxSize),
closeCh: make(chan struct{}),
eventQueue: NewSafeEventQueue(QueueMaxSize),
closeCh: make(chan struct{}),
}
EventQueueLock.Lock()
@@ -143,10 +147,10 @@ func PushEvent(ctx *ctx.Context, webhook *models.Webhook, event *models.AlertCur
StartConsumer(ctx, queue, webhook.Batch, webhook, stats)
}
succ := queue.list.PushFront(event)
succ := queue.eventQueue.Push(event)
if !succ {
stats.AlertNotifyErrorTotal.WithLabelValues("push_event_queue").Inc()
logger.Warningf("Write channel(%s) full, current channel size: %d event:%v", webhook.Url, queue.list.Len(), event)
logger.Warningf("Write channel(%s) full, current channel size: %d event:%v", webhook.Url, queue.eventQueue.Len(), event)
}
}
@@ -157,7 +161,7 @@ func StartConsumer(ctx *ctx.Context, queue *WebhookQueue, popSize int, webhook *
logger.Infof("event queue:%v closed", queue)
return
default:
events := queue.list.PopBack(popSize)
events := queue.eventQueue.PopN(popSize)
if len(events) == 0 {
time.Sleep(time.Millisecond * 400)
continue
@@ -166,7 +170,7 @@ func StartConsumer(ctx *ctx.Context, queue *WebhookQueue, popSize int, webhook *
retryCount := 0
for retryCount < webhook.RetryCount {
needRetry, res, err := sendWebhook(webhook, events, stats)
go RecordEvents(ctx, webhook, events, stats, res, err)
go NotifyRecord(ctx, events, "webhook", webhook.Url, res, err)
if !needRetry {
break
}
@@ -176,10 +180,3 @@ func StartConsumer(ctx *ctx.Context, queue *WebhookQueue, popSize int, webhook *
}
}
}
func RecordEvents(ctx *ctx.Context, webhook *models.Webhook, events []*models.AlertCurEvent, stats *astats.Stats, res string, err error) {
for _, event := range events {
time.Sleep(time.Millisecond * 10)
NotifyRecord(ctx, event, "webhook", webhook.Url, res, err)
}
}

View File

@@ -0,0 +1,109 @@
package sender
import (
"container/list"
"sync"
"github.com/ccfos/nightingale/v6/models"
)
type SafeEventQueue struct {
lock sync.RWMutex
maxSize int
queueHigh *list.List
queueMiddle *list.List
queueLow *list.List
}
const (
High = 1
Middle = 2
Low = 3
)
func NewSafeEventQueue(maxSize int) *SafeEventQueue {
return &SafeEventQueue{
maxSize: maxSize,
lock: sync.RWMutex{},
queueHigh: list.New(),
queueMiddle: list.New(),
queueLow: list.New(),
}
}
func (spq *SafeEventQueue) Len() int {
spq.lock.RLock()
defer spq.lock.RUnlock()
return spq.queueHigh.Len() + spq.queueMiddle.Len() + spq.queueLow.Len()
}
// len 无锁读取长度,不要在本文件外调用
func (spq *SafeEventQueue) len() int {
return spq.queueHigh.Len() + spq.queueMiddle.Len() + spq.queueLow.Len()
}
func (spq *SafeEventQueue) Push(event *models.AlertCurEvent) bool {
spq.lock.Lock()
defer spq.lock.Unlock()
for spq.len() > spq.maxSize {
return false
}
switch event.Severity {
case High:
spq.queueHigh.PushBack(event)
case Middle:
spq.queueMiddle.PushBack(event)
case Low:
spq.queueLow.PushBack(event)
default:
return false
}
return true
}
// pop 无锁弹出事件,不要在本文件外调用
func (spq *SafeEventQueue) pop() *models.AlertCurEvent {
if spq.len() == 0 {
return nil
}
var elem interface{}
if spq.queueHigh.Len() > 0 {
elem = spq.queueHigh.Remove(spq.queueHigh.Front())
} else if spq.queueMiddle.Len() > 0 {
elem = spq.queueMiddle.Remove(spq.queueMiddle.Front())
} else {
elem = spq.queueLow.Remove(spq.queueLow.Front())
}
event, ok := elem.(*models.AlertCurEvent)
if !ok {
return nil
}
return event
}
func (spq *SafeEventQueue) Pop() *models.AlertCurEvent {
spq.lock.Lock()
defer spq.lock.Unlock()
return spq.pop()
}
func (spq *SafeEventQueue) PopN(n int) []*models.AlertCurEvent {
spq.lock.Lock()
defer spq.lock.Unlock()
events := make([]*models.AlertCurEvent, 0, n)
count := 0
for count < n && spq.len() > 0 {
event := spq.pop()
if event != nil {
events = append(events, event)
}
count++
}
return events
}

View File

@@ -0,0 +1,157 @@
package sender
import (
"sync"
"testing"
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/stretchr/testify/assert"
)
func TestSafePriorityQueue_ConcurrentPushPop(t *testing.T) {
spq := NewSafeEventQueue(100000)
var wg sync.WaitGroup
numGoroutines := 100
numEvents := 1000
// 并发 Push
wg.Add(numGoroutines)
for i := 0; i < numGoroutines; i++ {
go func(goroutineID int) {
defer wg.Done()
for j := 0; j < numEvents; j++ {
event := &models.AlertCurEvent{
Severity: goroutineID%3 + 1,
TriggerTime: time.Now().UnixNano(),
}
spq.Push(event)
}
}(i)
}
wg.Wait()
// 检查队列长度是否正确
expectedLen := numGoroutines * numEvents
assert.Equal(t, expectedLen, spq.Len(), "Queue length mismatch after concurrent pushes")
// 并发 Pop
wg.Add(numGoroutines)
for i := 0; i < numGoroutines; i++ {
go func() {
defer wg.Done()
for {
event := spq.Pop()
if event == nil {
return
}
}
}()
}
wg.Wait()
// 最终队列应该为空
assert.Equal(t, 0, spq.Len(), "Queue should be empty after concurrent pops")
}
func TestSafePriorityQueue_ConcurrentPopMax(t *testing.T) {
spq := NewSafeEventQueue(100000)
// 添加初始数据
for i := 0; i < 1000; i++ {
spq.Push(&models.AlertCurEvent{
Severity: i%3 + 1,
TriggerTime: time.Now().UnixNano(),
})
}
var wg sync.WaitGroup
numGoroutines := 10
popMax := 100
// 并发 PopN
wg.Add(numGoroutines)
for i := 0; i < numGoroutines; i++ {
go func() {
defer wg.Done()
events := spq.PopN(popMax)
assert.LessOrEqual(t, len(events), popMax, "PopN exceeded maximum")
}()
}
wg.Wait()
// 检查队列长度是否正确
expectedRemaining := 1000 - (numGoroutines * popMax)
if expectedRemaining < 0 {
expectedRemaining = 0
}
assert.Equal(t, expectedRemaining, spq.Len(), "Queue length mismatch after concurrent PopN")
}
func TestSafePriorityQueue_ConcurrentPushPopWithDifferentSeverities(t *testing.T) {
spq := NewSafeEventQueue(100000)
var wg sync.WaitGroup
numGoroutines := 50
numEvents := 500
// 并发 Push 不同优先级的事件
wg.Add(numGoroutines)
for i := 0; i < numGoroutines; i++ {
go func(goroutineID int) {
defer wg.Done()
for j := 0; j < numEvents; j++ {
event := &models.AlertCurEvent{
Severity: goroutineID%3 + 1, // 模拟不同的 Severity
TriggerTime: time.Now().UnixNano(),
}
spq.Push(event)
}
}(i)
}
wg.Wait()
// 检查队列长度是否正确
expectedLen := numGoroutines * numEvents
assert.Equal(t, expectedLen, spq.Len(), "Queue length mismatch after concurrent pushes")
// 检查事件的顺序是否按照优先级排列
var lastEvent *models.AlertCurEvent
for spq.Len() > 0 {
event := spq.Pop()
if lastEvent != nil {
assert.LessOrEqual(t, lastEvent.Severity, event.Severity, "Events are not in correct priority order")
}
lastEvent = event
}
}
func TestSafePriorityQueue_ExceedMaxSize(t *testing.T) {
spq := NewSafeEventQueue(5)
// 插入超过最大容量的事件
for i := 0; i < 10; i++ {
spq.Push(&models.AlertCurEvent{
Severity: i % 3,
TriggerTime: int64(i),
})
}
// 验证队列的长度是否不超过 maxSize
assert.LessOrEqual(t, spq.Len(), spq.maxSize)
// 验证队列中剩余事件的内容
expectedEvents := 5
if spq.Len() < 5 {
expectedEvents = spq.Len()
}
// 检查最后存入的事件是否是按优先级排序
for i := 0; i < expectedEvents; i++ {
event := spq.Pop()
if event != nil {
assert.LessOrEqual(t, event.Severity, 2)
}
}
}

View File

@@ -37,9 +37,7 @@ func (ws *WecomSender) CallBack(ctx CallBackContext) {
},
}
doSendAndRecord(ctx.Ctx, ctx.CallBackURL, ctx.CallBackURL, body, "callback",
ctx.Stats, ctx.Events[0])
ctx.Stats.AlertNotifyTotal.WithLabelValues("rule_callback").Inc()
doSendAndRecord(ctx.Ctx, ctx.CallBackURL, ctx.CallBackURL, body, "callback", ctx.Stats, ctx.Events)
}
func (ws *WecomSender) Send(ctx MessageContext) {
@@ -55,7 +53,7 @@ func (ws *WecomSender) Send(ctx MessageContext) {
Content: message,
},
}
doSendAndRecord(ctx.Ctx, url, tokens[i], body, models.Wecom, ctx.Stats, ctx.Events[0])
doSendAndRecord(ctx.Ctx, url, tokens[i], body, models.Wecom, ctx.Stats, ctx.Events)
}
}

View File

@@ -6,6 +6,7 @@ import (
"github.com/ccfos/nightingale/v6/alert"
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/alert/dispatch"
"github.com/ccfos/nightingale/v6/alert/process"
alertrt "github.com/ccfos/nightingale/v6/alert/router"
"github.com/ccfos/nightingale/v6/center/cconf"
@@ -48,6 +49,10 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
cconf.MergeOperationConf()
if config.Alert.Heartbeat.EngineName == "" {
config.Alert.Heartbeat.EngineName = "default"
}
logxClean, err := logx.Init(config.Log)
if err != nil {
return nil, err
@@ -63,7 +68,7 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
}
ctx := ctx.NewContext(context.Background(), db, true)
migrate.Migrate(db)
models.InitRoot(ctx)
isRootInit := models.InitRoot(ctx)
config.HTTP.JWTAuth.SigningKey = models.InitJWTSigningKey(ctx)
@@ -95,9 +100,13 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
userCache := memsto.NewUserCache(ctx, syncStats)
userGroupCache := memsto.NewUserGroupCache(ctx, syncStats)
taskTplCache := memsto.NewTaskTplCache(ctx)
configCvalCache := memsto.NewCvalCache(ctx, syncStats)
sso := sso.Init(config.Center, ctx, configCache)
promClients := prom.NewPromClient(ctx)
dispatch.InitRegisterQueryFunc(promClients)
tdengineClients := tdengine.NewTdengineClient(ctx, config.Alert.Heartbeat)
externalProcessors := process.NewExternalProcessors()
@@ -115,9 +124,13 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
redis, sso, ctx, metas, idents, targetCache, userCache, userGroupCache)
pushgwRouter := pushgwrt.New(config.HTTP, config.Pushgw, config.Alert, targetCache, busiGroupCache, idents, metas, writers, ctx)
go models.MigrateBg(ctx, pushgwRouter.Pushgw.BusiGroupLabelKey)
go func() {
if config.Center.MigrateBusiGroupLabel || models.CanMigrateBg(ctx) {
models.MigrateBg(ctx, pushgwRouter.Pushgw.BusiGroupLabelKey)
}
}()
r := httpx.GinEngine(config.Global.RunMode, config.HTTP)
r := httpx.GinEngine(config.Global.RunMode, config.HTTP, configCvalCache.PrintBodyPaths, configCvalCache.PrintAccessLog)
centerRouter.Config(r)
alertrtRouter.Config(r)
@@ -131,6 +144,11 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
httpClean := httpx.Init(config.HTTP, r)
fmt.Printf("please view n9e at http://%v:%v\n", config.Alert.Heartbeat.IP, config.HTTP.Port)
if isRootInit {
fmt.Println("username/password: root/root.2020")
}
return func() {
logxClean()
httpClean()

View File

@@ -113,6 +113,12 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
logger.Warning("delete builtin metrics fail ", err)
}
// 删除 uuid%1000 不为 0 uuid > 1000000000000000000 且 type 为 dashboard 的记录
err = models.DB(ctx).Exec("delete from builtin_payloads where uuid%1000 != 0 and uuid > 1000000000000000000 and type = 'dashboard' and updated_by = 'system'").Error
if err != nil {
logger.Warning("delete builtin payloads fail ", err)
}
// alerts
files, err = file.FilesUnder(componentDir + "/alerts")
if err == nil && len(files) > 0 {
@@ -218,7 +224,8 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
}
if dashboard.UUID == 0 {
dashboard.UUID = time.Now().UnixNano()
time.Sleep(time.Microsecond)
dashboard.UUID = time.Now().UnixMicro()
// 补全文件中的 uuid
bs, err = json.MarshalIndent(dashboard, "", " ")
if err != nil {

View File

@@ -184,6 +184,7 @@ func (rt *Router) Config(r *gin.Engine) {
pages.POST("/query-range-batch", rt.promBatchQueryRange)
pages.POST("/query-instant-batch", rt.promBatchQueryInstant)
pages.GET("/datasource/brief", rt.datasourceBriefs)
pages.POST("/datasource/query", rt.datasourceQuery)
pages.POST("/ds-query", rt.QueryData)
pages.POST("/logs-query", rt.QueryLog)
@@ -197,6 +198,7 @@ func (rt *Router) Config(r *gin.Engine) {
pages.POST("/query-range-batch", rt.auth(), rt.promBatchQueryRange)
pages.POST("/query-instant-batch", rt.auth(), rt.promBatchQueryInstant)
pages.GET("/datasource/brief", rt.auth(), rt.user(), rt.datasourceBriefs)
pages.POST("/datasource/query", rt.auth(), rt.user(), rt.datasourceQuery)
pages.POST("/ds-query", rt.auth(), rt.QueryData)
pages.POST("/logs-query", rt.auth(), rt.QueryLog)
@@ -278,6 +280,7 @@ func (rt *Router) Config(r *gin.Engine) {
pages.DELETE("/busi-group/:id/members", rt.auth(), rt.user(), rt.perm("/busi-groups/put"), rt.bgrw(), rt.busiGroupMemberDel)
pages.DELETE("/busi-group/:id", rt.auth(), rt.user(), rt.perm("/busi-groups/del"), rt.bgrw(), rt.busiGroupDel)
pages.GET("/busi-group/:id/perm/:perm", rt.auth(), rt.user(), rt.checkBusiGroupPerm)
pages.GET("/busi-groups/tags", rt.auth(), rt.user(), rt.busiGroupsGetTags)
pages.GET("/targets", rt.auth(), rt.user(), rt.targetGets)
pages.GET("/target/extra-meta", rt.auth(), rt.user(), rt.targetExtendInfoByIdent)
@@ -552,6 +555,7 @@ func (rt *Router) Config(r *gin.Engine) {
service.GET("/config/:id", rt.configGet)
service.GET("/configs", rt.configsGet)
service.GET("/config", rt.configGetByKey)
service.GET("/all-configs", rt.configGetAll)
service.PUT("/configs", rt.configsPut)
service.POST("/configs", rt.configsPost)
service.DELETE("/configs", rt.configsDel)

View File

@@ -77,6 +77,11 @@ func (rt *Router) alertRuleGetsByGids(c *gin.Context) {
for i := 0; i < len(ars); i++ {
ars[i].FillNotifyGroups(rt.Ctx, cache)
ars[i].FillSeverities()
if len(ars[i].DatasourceQueries) != 0 {
ars[i].DatasourceIdsJson = rt.DatasourceCache.GetIDsByDsCateAndQueries(ars[i].Cate, ars[i].DatasourceQueries)
}
rids = append(rids, ars[i].Id)
names = append(names, ars[i].UpdateBy)
}
@@ -123,6 +128,10 @@ func (rt *Router) alertRulesGetByService(c *gin.Context) {
cache := make(map[int64]*models.UserGroup)
for i := 0; i < len(ars); i++ {
ars[i].FillNotifyGroups(rt.Ctx, cache)
if len(ars[i].DatasourceQueries) != 0 {
ars[i].DatasourceIdsJson = rt.DatasourceCache.GetIDsByDsCateAndQueries(ars[i].Cate, ars[i].DatasourceQueries)
}
}
}
ginx.NewRender(c).Data(ars, err)
@@ -157,6 +166,14 @@ func (rt *Router) alertRuleAddByImport(c *gin.Context) {
ginx.Bomb(http.StatusBadRequest, "input json is empty")
}
for i := range lst {
if len(lst[i].DatasourceQueries) == 0 {
lst[i].DatasourceQueries = []models.DatasourceQuery{
models.DataSourceQueryAll,
}
}
}
bgid := ginx.UrlParamInt64(c, "id")
reterr := rt.alertRuleAdd(lst, username, bgid, c.GetHeader("X-Language"))
@@ -164,9 +181,9 @@ func (rt *Router) alertRuleAddByImport(c *gin.Context) {
}
type promRuleForm struct {
Payload string `json:"payload" binding:"required"`
DatasourceIds []int64 `json:"datasource_ids" binding:"required"`
Disabled int `json:"disabled" binding:"gte=0,lte=1"`
Payload string `json:"payload" binding:"required"`
DatasourceQueries []models.DatasourceQuery `json:"datasource_queries" binding:"required"`
Disabled int `json:"disabled" binding:"gte=0,lte=1"`
}
func (rt *Router) alertRuleAddByImportPromRule(c *gin.Context) {
@@ -185,7 +202,7 @@ func (rt *Router) alertRuleAddByImportPromRule(c *gin.Context) {
ginx.Bomb(http.StatusBadRequest, "input yaml is empty")
}
lst := models.DealPromGroup(pr.Groups, f.DatasourceIds, f.Disabled)
lst := models.DealPromGroup(pr.Groups, f.DatasourceQueries, f.Disabled)
username := c.MustGet("username").(string)
bgid := ginx.UrlParamInt64(c, "id")
ginx.NewRender(c).Data(rt.alertRuleAdd(lst, username, bgid, c.GetHeader("X-Language")), nil)
@@ -398,6 +415,16 @@ func (rt *Router) alertRulePutFields(c *gin.Context) {
}
}
if f.Action == "datasource_change" {
// 修改数据源
if datasourceQueries, has := f.Fields["datasource_queries"]; has {
bytes, err := json.Marshal(datasourceQueries)
ginx.Dangerous(err)
ginx.Dangerous(ar.UpdateFieldsMap(rt.Ctx, map[string]interface{}{"datasource_queries": bytes}))
continue
}
}
for k, v := range f.Fields {
ginx.Dangerous(ar.UpdateColumn(rt.Ctx, k, v))
}
@@ -417,6 +444,10 @@ func (rt *Router) alertRuleGet(c *gin.Context) {
return
}
if len(ar.DatasourceQueries) != 0 {
ar.DatasourceIdsJson = rt.DatasourceCache.GetIDsByDsCateAndQueries(ar.Cate, ar.DatasourceQueries)
}
err = ar.FillNotifyGroups(rt.Ctx, make(map[int64]*models.UserGroup))
ginx.Dangerous(err)
@@ -623,7 +654,7 @@ func (rt *Router) cloneToMachine(c *gin.Context) {
newRule.CreateAt = now
newRule.RuleConfig = alertRules[i].RuleConfig
exist, err := models.AlertRuleExists(rt.Ctx, 0, newRule.GroupId, newRule.DatasourceIdsJson, newRule.Name)
exist, err := models.AlertRuleExists(rt.Ctx, 0, newRule.GroupId, newRule.Name)
if err != nil {
errMsg[f.IdentList[j]] = err.Error()
continue

View File

@@ -43,7 +43,7 @@ func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
for _, rule := range alertRules {
if rule.UUID == 0 {
rule.UUID = time.Now().UnixNano()
rule.UUID = time.Now().UnixMicro()
}
contentBytes, err := json.Marshal(rule)
@@ -78,7 +78,13 @@ func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
}
if alertRule.UUID == 0 {
alertRule.UUID = time.Now().UnixNano()
alertRule.UUID = time.Now().UnixMicro()
}
contentBytes, err := json.Marshal(alertRule)
if err != nil {
reterr[alertRule.Name] = err.Error()
continue
}
bp := models.BuiltinPayload{
@@ -88,7 +94,7 @@ func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
Name: alertRule.Name,
Tags: alertRule.AppendTags,
UUID: alertRule.UUID,
Content: lst[i].Content,
Content: string(contentBytes),
CreatedBy: username,
UpdatedBy: username,
}
@@ -106,7 +112,7 @@ func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
for _, dashboard := range dashboards {
if dashboard.UUID == 0 {
dashboard.UUID = time.Now().UnixNano()
dashboard.UUID = time.Now().UnixMicro()
}
contentBytes, err := json.Marshal(dashboard)
@@ -141,7 +147,13 @@ func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
}
if dashboard.UUID == 0 {
dashboard.UUID = time.Now().UnixNano()
dashboard.UUID = time.Now().UnixMicro()
}
contentBytes, err := json.Marshal(dashboard)
if err != nil {
reterr[dashboard.Name] = err.Error()
continue
}
bp := models.BuiltinPayload{
@@ -151,7 +163,7 @@ func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
Name: dashboard.Name,
Tags: dashboard.Tags,
UUID: dashboard.UUID,
Content: lst[i].Content,
Content: string(contentBytes),
CreatedBy: username,
UpdatedBy: username,
}

View File

@@ -140,3 +140,12 @@ func (rt *Router) busiGroupGet(c *gin.Context) {
ginx.Dangerous(bg.FillUserGroups(rt.Ctx))
ginx.NewRender(c).Data(bg, nil)
}
func (rt *Router) busiGroupsGetTags(c *gin.Context) {
bgids := str.IdsInt64(ginx.QueryStr(c, "gids", ""), ",")
targetIdents, err := models.TargetIndentsGetByBgids(rt.Ctx, bgids)
ginx.Dangerous(err)
tags, err := models.TargetGetTags(rt.Ctx, targetIdents, true, "busigroup")
ginx.Dangerous(err)
ginx.NewRender(c).Data(tags, nil)
}

View File

@@ -24,6 +24,11 @@ func (rt *Router) configGet(c *gin.Context) {
ginx.NewRender(c).Data(configs, err)
}
func (rt *Router) configGetAll(c *gin.Context) {
config, err := models.ConfigsGetAll(rt.Ctx)
ginx.NewRender(c).Data(config, err)
}
func (rt *Router) configGetByKey(c *gin.Context) {
config, err := models.ConfigsGet(rt.Ctx, ginx.QueryStr(c, "key"))
ginx.NewRender(c).Data(config, err)

View File

@@ -122,12 +122,14 @@ func (rt *Router) datasourceUpsert(c *gin.Context) {
}
func DatasourceCheck(ds models.Datasource) error {
if ds.HTTPJson.Url == "" {
return fmt.Errorf("url is empty")
}
if ds.PluginType != models.ELASTICSEARCH {
if ds.HTTPJson.Url == "" {
return fmt.Errorf("url is empty")
}
if !strings.HasPrefix(ds.HTTPJson.Url, "http") {
return fmt.Errorf("url must start with http or https")
if !strings.HasPrefix(ds.HTTPJson.Url, "http") {
return fmt.Errorf("url must start with http or https")
}
}
client := &http.Client{
@@ -138,11 +140,11 @@ func DatasourceCheck(ds models.Datasource) error {
},
}
fullURL := ds.HTTPJson.Url
req, err := http.NewRequest("GET", fullURL, nil)
var fullURL string
req, err := ds.HTTPJson.NewReq(&fullURL)
if err != nil {
logger.Errorf("Error creating request: %v", err)
return fmt.Errorf("request url:%s failed", fullURL)
return fmt.Errorf("request urls:%v failed", ds.HTTPJson.GetUrls())
}
if ds.PluginType == models.PROMETHEUS {
@@ -249,3 +251,37 @@ func (rt *Router) getDatasourceIds(c *gin.Context) {
ginx.NewRender(c).Data(datasourceIds, err)
}
type datasourceQueryForm struct {
Cate string `json:"datasource_cate"`
DatasourceQueries []models.DatasourceQuery `json:"datasource_queries"`
}
type datasourceQueryResp struct {
ID int64 `json:"id"`
Name string `json:"name"`
}
func (rt *Router) datasourceQuery(c *gin.Context) {
var dsf datasourceQueryForm
ginx.BindJSON(c, &dsf)
datasources, err := models.GetDatasourcesGetsByTypes(rt.Ctx, []string{dsf.Cate})
ginx.Dangerous(err)
nameToID := make(map[string]int64)
IDToName := make(map[int64]string)
for _, ds := range datasources {
nameToID[ds.Name] = ds.Id
IDToName[ds.Id] = ds.Name
}
ids := models.GetDatasourceIDsByDatasourceQueries(dsf.DatasourceQueries, IDToName, nameToID)
var req []datasourceQueryResp
for _, id := range ids {
req = append(req, datasourceQueryResp{
ID: id,
Name: IDToName[id],
})
}
ginx.NewRender(c).Data(req, err)
}

View File

@@ -45,6 +45,10 @@ func (rt *Router) statistic(c *gin.Context) {
statistics, err = models.ConfigsUserVariableStatistics(rt.Ctx)
ginx.NewRender(c).Data(statistics, err)
return
case "cval":
statistics, err = models.ConfigCvalStatistics(rt.Ctx)
ginx.NewRender(c).Data(statistics, err)
return
default:
ginx.Bomb(http.StatusBadRequest, "invalid name")
}

View File

@@ -100,7 +100,7 @@ func HandleHeartbeat(c *gin.Context, ctx *ctx.Context, engineName string, metaSe
groupIds = append(groupIds, groupId)
}
err := models.TargetOverrideBgids(ctx, []string{target.Ident}, groupIds)
err := models.TargetOverrideBgids(ctx, []string{target.Ident}, groupIds, nil)
if err != nil {
logger.Warningf("update target:%s group ids failed, err: %v", target.Ident, err)
}
@@ -113,7 +113,7 @@ func HandleHeartbeat(c *gin.Context, ctx *ctx.Context, engineName string, metaSe
}
if !target.MatchGroupId(groupId) {
err := models.TargetBindBgids(ctx, []string{target.Ident}, []int64{groupId})
err := models.TargetBindBgids(ctx, []string{target.Ident}, []int64{groupId}, nil)
if err != nil {
logger.Warningf("update target:%s group ids failed, err: %v", target.Ident, err)
}

View File

@@ -50,7 +50,8 @@ func (rt *Router) alertMuteGets(c *gin.Context) {
prods := strings.Fields(ginx.QueryStr(c, "prods", ""))
bgid := ginx.QueryInt64(c, "bgid", -1)
query := ginx.QueryStr(c, "query", "")
lst, err := models.AlertMuteGets(rt.Ctx, prods, bgid, query)
disabled := ginx.QueryInt(c, "disabled", -1)
lst, err := models.AlertMuteGets(rt.Ctx, prods, bgid, disabled, query)
ginx.NewRender(c).Data(lst, err)
}

View File

@@ -35,11 +35,18 @@ type Record struct {
// notificationRecordAdd
func (rt *Router) notificationRecordAdd(c *gin.Context) {
var req models.NotificaitonRecord
var req []*models.NotificaitonRecord
ginx.BindJSON(c, &req)
err := req.Add(rt.Ctx)
err := models.DB(rt.Ctx).CreateInBatches(req, 100).Error
var ids []int64
if err == nil {
ids = make([]int64, len(req))
for i, noti := range req {
ids[i] = noti.Id
}
}
ginx.NewRender(c).Data(req.Id, err)
ginx.NewRender(c).Data(ids, err)
}
func (rt *Router) notificationRecordList(c *gin.Context) {

View File

@@ -161,7 +161,11 @@ func (rt *Router) notifyTplPreview(c *gin.Context) {
func (rt *Router) notifyTplAdd(c *gin.Context) {
var f models.NotifyTpl
ginx.BindJSON(c, &f)
f.Channel = strings.TrimSpace(f.Channel)
user := c.MustGet("user").(*models.User)
f.CreateBy = user.Username
f.Channel = strings.TrimSpace(f.Channel)
ginx.Dangerous(templateValidate(f))
count, err := models.Count(models.DB(rt.Ctx).Model(&models.NotifyTpl{}).Where("channel = ? or name = ?", f.Channel, f.Name))
@@ -169,6 +173,8 @@ func (rt *Router) notifyTplAdd(c *gin.Context) {
if count != 0 {
ginx.Bomb(200, "Refuse to create duplicate channel(unique)")
}
f.CreateAt = time.Now().Unix()
ginx.NewRender(c).Message(f.Create(rt.Ctx))
}

View File

@@ -7,7 +7,6 @@ import (
"net"
"net/http"
"net/http/httputil"
"net/url"
"strings"
"sync"
"time"
@@ -112,9 +111,9 @@ func (rt *Router) dsProxy(c *gin.Context) {
return
}
target, err := url.Parse(ds.HTTPJson.Url)
target, err := ds.HTTPJson.ParseUrl()
if err != nil {
c.String(http.StatusInternalServerError, "invalid url: %s", ds.HTTPJson.Url)
c.String(http.StatusInternalServerError, "invalid urls: %s", ds.HTTPJson.GetUrls())
return
}

View File

@@ -3,8 +3,6 @@ package router
import (
"encoding/json"
"net/http"
"strconv"
"strings"
"time"
"github.com/ccfos/nightingale/v6/models"
@@ -74,6 +72,14 @@ func (rt *Router) recordingRuleAddByFE(c *gin.Context) {
ginx.Bomb(http.StatusBadRequest, "input json is empty")
}
for i := range lst {
if len(lst[i].DatasourceQueries) == 0 {
lst[i].DatasourceQueries = []models.DatasourceQuery{
models.DataSourceQueryAll,
}
}
}
bgid := ginx.UrlParamInt64(c, "id")
reterr := make(map[string]string)
for i := 0; i < count; i++ {
@@ -137,23 +143,10 @@ func (rt *Router) recordingRulePutFields(c *gin.Context) {
f.Fields["update_by"] = c.MustGet("username").(string)
f.Fields["update_at"] = time.Now().Unix()
if _, ok := f.Fields["datasource_ids"]; ok {
// datasource_ids = "1 2 3"
idsStr := strings.Fields(f.Fields["datasource_ids"].(string))
ids := make([]int64, 0)
for _, idStr := range idsStr {
id, err := strconv.ParseInt(idStr, 10, 64)
if err != nil {
ginx.Bomb(http.StatusBadRequest, "datasource_ids error")
}
ids = append(ids, id)
}
bs, err := json.Marshal(ids)
if err != nil {
ginx.Bomb(http.StatusBadRequest, "datasource_ids error")
}
f.Fields["datasource_ids"] = string(bs)
if datasourceQueries, ok := f.Fields["datasource_queries"]; ok {
bytes, err := json.Marshal(datasourceQueries)
ginx.Dangerous(err)
f.Fields["datasource_queries"] = string(bytes)
}
for i := 0; i < len(f.Ids); i++ {

View File

@@ -169,7 +169,7 @@ func (rt *Router) targetGetTags(c *gin.Context) {
idents := ginx.QueryStr(c, "idents", "")
idents = strings.ReplaceAll(idents, ",", " ")
ignoreHostTag := ginx.QueryBool(c, "ignore_host_tag", false)
lst, err := models.TargetGetTags(rt.Ctx, strings.Fields(idents), ignoreHostTag)
lst, err := models.TargetGetTags(rt.Ctx, strings.Fields(idents), ignoreHostTag, "")
ginx.NewRender(c).Data(lst, err)
}
@@ -397,6 +397,7 @@ type targetBgidsForm struct {
Idents []string `json:"idents" binding:"required_without=HostIps"`
HostIps []string `json:"host_ips" binding:"required_without=Idents"`
Bgids []int64 `json:"bgids"`
Tags []string `json:"tags"`
Action string `json:"action"` // add del reset
}
@@ -452,11 +453,11 @@ func (rt *Router) targetBindBgids(c *gin.Context) {
switch f.Action {
case "add":
ginx.NewRender(c).Data(failedResults, models.TargetBindBgids(rt.Ctx, f.Idents, f.Bgids))
ginx.NewRender(c).Data(failedResults, models.TargetBindBgids(rt.Ctx, f.Idents, f.Bgids, f.Tags))
case "del":
ginx.NewRender(c).Data(failedResults, models.TargetUnbindBgids(rt.Ctx, f.Idents, f.Bgids))
case "reset":
ginx.NewRender(c).Data(failedResults, models.TargetOverrideBgids(rt.Ctx, f.Idents, f.Bgids))
ginx.NewRender(c).Data(failedResults, models.TargetOverrideBgids(rt.Ctx, f.Idents, f.Bgids, f.Tags))
default:
ginx.Bomb(http.StatusBadRequest, "invalid action")
}
@@ -478,7 +479,7 @@ func (rt *Router) targetUpdateBgidByService(c *gin.Context) {
ginx.Bomb(http.StatusBadRequest, err.Error())
}
ginx.NewRender(c).Data(failedResults, models.TargetOverrideBgids(rt.Ctx, f.Idents, []int64{f.Bgid}))
ginx.NewRender(c).Data(failedResults, models.TargetOverrideBgids(rt.Ctx, f.Idents, []int64{f.Bgid}, nil))
}
type identsForm struct {

View File

@@ -33,7 +33,7 @@ type ClusterOptions struct {
MaxIdleConnsPerHost int
}
func Parse(fpath string, configPtr interface{}) error {
func Parse(fpath string, configPtr *Config) error {
var (
tBuf []byte
)

View File

@@ -7,6 +7,7 @@ import (
"github.com/ccfos/nightingale/v6/alert"
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/alert/dispatch"
"github.com/ccfos/nightingale/v6/alert/process"
alertrt "github.com/ccfos/nightingale/v6/alert/router"
"github.com/ccfos/nightingale/v6/center/metas"
@@ -52,11 +53,13 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
targetCache := memsto.NewTargetCache(ctx, syncStats, redis)
busiGroupCache := memsto.NewBusiGroupCache(ctx, syncStats)
configCvalCache := memsto.NewCvalCache(ctx, syncStats)
idents := idents.New(ctx, redis)
metas := metas.New(redis)
writers := writer.NewWriters(config.Pushgw)
pushgwRouter := pushgwrt.New(config.HTTP, config.Pushgw, config.Alert, targetCache, busiGroupCache, idents, metas, writers, ctx)
r := httpx.GinEngine(config.Global.RunMode, config.HTTP)
r := httpx.GinEngine(config.Global.RunMode, config.HTTP, configCvalCache.PrintBodyPaths, configCvalCache.PrintAccessLog)
pushgwRouter.Config(r)
if !config.Alert.Disable {
@@ -71,6 +74,9 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
taskTplsCache := memsto.NewTaskTplCache(ctx)
promClients := prom.NewPromClient(ctx)
dispatch.InitRegisterQueryFunc(promClients)
tdengineClients := tdengine.NewTdengineClient(ctx, config.Alert.Heartbeat)
externalProcessors := process.NewExternalProcessors()

View File

@@ -1,5 +1,3 @@
version: "3.7"
networks:
nightingale:
driver: bridge

View File

@@ -19,8 +19,8 @@ precision = "ms"
# global collect interval
interval = 15
[global.labels]
source="categraf"
# [global.labels]
# source="categraf"
# region = "shanghai"
# env = "localhost"

View File

@@ -0,0 +1,42 @@
[[instances]]
address = "mysql:3306"
username = "root"
password = "1234"
# # set tls=custom to enable tls
# parameters = "tls=false"
# extra_status_metrics = true
# extra_innodb_metrics = false
# gather_processlist_processes_by_state = false
# gather_processlist_processes_by_user = false
# gather_schema_size = true
# gather_table_size = false
# gather_system_table_size = false
# gather_slave_status = true
# # timeout
# timeout_seconds = 3
# # interval = global.interval * interval_times
# interval_times = 1
# important! use global unique string to specify instance
labels = { instance="docker-compose-mysql" }
## Optional TLS Config
# use_tls = false
# tls_min_version = "1.2"
# tls_ca = "/etc/categraf/ca.pem"
# tls_cert = "/etc/categraf/cert.pem"
# tls_key = "/etc/categraf/key.pem"
## Use TLS but skip chain & host verification
# insecure_skip_verify = true
#[[instances.queries]]
# mesurement = "lock_wait"
# metric_fields = [ "total" ]
# timeout = "3s"
# request = '''
#SELECT count(*) as total FROM information_schema.innodb_trx WHERE trx_state='LOCK WAIT'
#'''

View File

@@ -0,0 +1,37 @@
[[instances]]
address = "redis:6379"
username = ""
password = ""
# pool_size = 2
## 是否开启slowlog 收集
# gather_slowlog = true
## 最多收集少条slowlog
# slowlog_max_len = 100
## 收集距离现在多少秒以内的slowlog
## 注意插件的采集周期,该参数不要小于采集周期否则会有slowlog查不到
# slowlog_time_window=30
# 指标
# redis_slow_log{ident=dev-01 client_addr=127.0.0.1:56364 client_name= cmd="info ALL" log_id=983} 74 (单位微秒)
# # Optional. Specify redis commands to retrieve values
# commands = [
# {command = ["get", "sample-key1"], metric = "custom_metric_name1"},
# {command = ["get", "sample-key2"], metric = "custom_metric_name2"}
# ]
# # interval = global.interval * interval_times
# interval_times = 1
# important! use global unique string to specify instance
labels = { instance="docker-compose-redis" }
## Optional TLS Config
# use_tls = false
# tls_min_version = "1.2"
# tls_ca = "/etc/categraf/ca.pem"
# tls_cert = "/etc/categraf/cert.pem"
# tls_key = "/etc/categraf/key.pem"
## Use TLS but skip chain & host verification
# insecure_skip_verify = true

View File

@@ -25,7 +25,7 @@ services:
network_mode: host
prometheus:
image: prom/prometheus
image: prom/prometheus:v2.55.1
container_name: prometheus
hostname: prometheus
restart: always

File diff suppressed because it is too large Load Diff

View File

@@ -1,3 +1,10 @@
GRANT ALL ON *.* TO 'root'@'127.0.0.1' IDENTIFIED BY '1234';
GRANT ALL ON *.* TO 'root'@'localhost' IDENTIFIED BY '1234';
GRANT ALL ON *.* TO 'root'@'%' IDENTIFIED BY '1234';
CREATE USER IF NOT EXISTS 'root'@'127.0.0.1' IDENTIFIED BY '1234';
GRANT ALL PRIVILEGES ON *.* TO 'root'@'127.0.0.1' WITH GRANT OPTION;
CREATE USER IF NOT EXISTS 'root'@'localhost' IDENTIFIED BY '1234';
GRANT ALL PRIVILEGES ON *.* TO 'root'@'localhost' WITH GRANT OPTION;
CREATE USER IF NOT EXISTS 'root'@'%' IDENTIFIED BY '1234';
GRANT ALL PRIVILEGES ON *.* TO 'root'@'%' WITH GRANT OPTION;
FLUSH PRIVILEGES;

View File

@@ -116,4 +116,8 @@ CREATE TABLE `target_busi_group` (
`update_at` bigint NOT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `idx_target_group` (`target_ident`,`group_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
/* v7.7.2 2024-12-02 */
ALTER TABLE alert_subscribe MODIFY COLUMN rule_ids varchar(1024);
ALTER TABLE alert_subscribe MODIFY COLUMN busi_groups varchar(4096);

View File

@@ -73,14 +73,14 @@ DefaultRoles = ["Standard"]
OpenRSA = false
[DB]
# mysql postgres sqlite
DBType = "sqlite"
# postgres: host=%s port=%s user=%s dbname=%s password=%s sslmode=%s
# postgres: DSN="host=127.0.0.1 port=5432 user=root dbname=n9e_v6 password=1234 sslmode=disable"
# sqlite: DSN="/path/to/filename.db"
DSN = "root:1234@tcp(127.0.0.1:3306)/n9e_v6?charset=utf8mb4&parseTime=True&loc=Local&allowNativePasswords=true"
DSN = "n9e.db"
# enable debug mode or not
Debug = false
# mysql postgres sqlite
DBType = "mysql"
# unit: s
MaxLifetime = 7200
# max open connections
@@ -98,8 +98,8 @@ Address = "127.0.0.1:6379"
# DB = 0
# UseTLS = false
# TLSMinVersion = "1.2"
# standalone cluster sentinel
RedisType = "standalone"
# standalone cluster sentinel miniredis
RedisType = "miniredis"
# Mastername for sentinel type
# MasterName = "mymaster"
# SentinelUsername = ""
@@ -138,6 +138,9 @@ ForceUseServerTS = true
# [Pushgw.WriterOpt]
# QueueMaxSize = 1000000
# QueuePopSize = 1000
# AllQueueMaxSize = 1000000
# fresh time, unit ms
# AllQueueMaxSizeInterval = 200
[[Pushgw.Writers]]
# Url = "http://127.0.0.1:8480/insert/0/prometheus/api/v1/write"

16
go.mod
View File

@@ -1,6 +1,6 @@
module github.com/ccfos/nightingale/v6
go 1.18
go 1.22
require (
github.com/BurntSushi/toml v0.3.1
@@ -32,8 +32,9 @@ require (
github.com/rakyll/statik v0.1.7
github.com/redis/go-redis/v9 v9.0.2
github.com/spaolacci/murmur3 v1.1.0
github.com/stretchr/testify v1.8.4
github.com/tidwall/gjson v1.14.0
github.com/toolkits/pkg v1.3.6
github.com/toolkits/pkg v1.3.8
golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1
golang.org/x/oauth2 v0.10.0
gopkg.in/gomail.v2 v2.0.0-20160411212932-81ebce5c23df
@@ -44,8 +45,15 @@ require (
gorm.io/gorm v1.25.7-0.20240204074919-46816ad31dde
)
require (
github.com/alicebob/gopher-json v0.0.0-20200520072559-a9ecdc9d1d3a // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/yuin/gopher-lua v1.1.1 // indirect
)
require (
github.com/Azure/go-ntlmssp v0.0.0-20220621081337-cb9428e4ac1e // indirect
github.com/alicebob/miniredis/v2 v2.33.0
github.com/beorn7/perks v1.0.1 // indirect
github.com/bytedance/sonic v1.9.1 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
@@ -87,7 +95,7 @@ require (
github.com/prometheus/client_model v0.4.0 // indirect
github.com/prometheus/procfs v0.11.0 // indirect
github.com/robfig/cron/v3 v3.0.1
github.com/tidwall/match v1.1.1 // indirect
github.com/tidwall/match v1.1.1
github.com/tidwall/pretty v1.2.0 // indirect
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
github.com/ugorji/go/codec v1.2.11 // indirect
@@ -97,7 +105,7 @@ require (
golang.org/x/crypto v0.21.0 // indirect
golang.org/x/image v0.18.0 // indirect
golang.org/x/net v0.23.0 // indirect
golang.org/x/sys v0.18.0 // indirect
golang.org/x/sys v0.21.0 // indirect
golang.org/x/text v0.16.0 // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/protobuf v1.33.0 // indirect

39
go.sum
View File

@@ -1,20 +1,32 @@
github.com/Azure/azure-sdk-for-go v65.0.0+incompatible h1:HzKLt3kIwMm4KeJYTdx9EbjRYTySD/t8i1Ee/W5EGXw=
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.7.0 h1:8q4SaHjFsClSvuVne0ID/5Ka8u3fcIHyqkLjcFpNRHQ=
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.7.0/go.mod h1:bjGvMhVMb+EEm3VRNQawDMUyMMjo+S5ewNjflkep/0Q=
github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.3.0 h1:vcYCAze6p19qBW7MhZybIsqD8sMV8js0NyQM8JDnVtg=
github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.3.0/go.mod h1:OQeznEEkTZ9OrhHJoDD8ZDq51FHgXjqtP9z6bEwBq9U=
github.com/Azure/azure-sdk-for-go/sdk/internal v1.3.0 h1:sXr+ck84g/ZlZUOZiNELInmMgOsuGwdjjVkEIde0OtY=
github.com/Azure/azure-sdk-for-go/sdk/internal v1.3.0/go.mod h1:okt5dMMTOFjX/aovMlrjvvXoPMBVSPzk9185BT0+eZM=
github.com/Azure/go-ntlmssp v0.0.0-20220621081337-cb9428e4ac1e h1:NeAW1fUYUEWhft7pkxDf6WoUvEZJ/uOKsvtpjLnn8MU=
github.com/Azure/go-ntlmssp v0.0.0-20220621081337-cb9428e4ac1e/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU=
github.com/AzureAD/microsoft-authentication-library-for-go v1.0.0 h1:OBhqkivkhkMqLPymWEppkm7vgPQY2XsHoEkaMQ0AdZY=
github.com/AzureAD/microsoft-authentication-library-for-go v1.0.0/go.mod h1:kgDmCTgBzIEPFElEF+FK0SdjAor06dRq2Go927dnQ6o=
github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/Masterminds/semver/v3 v3.1.1 h1:hLg3sBzpNErnxhQtUy/mmLR2I9foDujNK030IGemrRc=
github.com/Masterminds/semver/v3 v3.1.1/go.mod h1:VPu/7SZ7ePZ3QOrcuXROw5FAcLl4a0cBrbBpGY/8hQs=
github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137 h1:s6gZFSlWYmbqAuRjVTiNNhvNRfY2Wxp9nhfyel4rklc=
github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137/go.mod h1:OMCwj8VM1Kc9e19TLln2VL61YJF0x1XFtfdL4JdbSyE=
github.com/alicebob/gopher-json v0.0.0-20200520072559-a9ecdc9d1d3a h1:HbKu58rmZpUGpz5+4FfNmIU+FmZg2P3Xaj2v2bfNWmk=
github.com/alicebob/gopher-json v0.0.0-20200520072559-a9ecdc9d1d3a/go.mod h1:SGnFV6hVsYE877CKEZ6tDNTjaSXYUk6QqoIK6PrAtcc=
github.com/alicebob/miniredis/v2 v2.33.0 h1:uvTF0EDeu9RLnUEG27Db5I68ESoIxTiXbNUiji6lZrA=
github.com/alicebob/miniredis/v2 v2.33.0/go.mod h1:MhP4a3EU7aENRi9aO+tHfTBZicLqQevyi/DJpoj6mi0=
github.com/aws/aws-sdk-go v1.44.302 h1:ST3ko6GrJKn3Xi+nAvxjG3uk/V1pW8KC52WLeIxqqNk=
github.com/aws/aws-sdk-go v1.44.302/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/bsm/ginkgo/v2 v2.5.0 h1:aOAnND1T40wEdAtkGSkvSICWeQ8L3UASX7YVCqQx+eQ=
github.com/bsm/ginkgo/v2 v2.5.0/go.mod h1:AiKlXPm7ItEHNc/2+OkrNG4E0ITzojb9/xWzvQ9XZ9w=
github.com/bsm/gomega v1.20.0 h1:JhAwLmtRzXFTx2AkALSLa8ijZafntmhSoU63Ok18Uq8=
github.com/bsm/gomega v1.20.0/go.mod h1:JifAceMQ4crZIWYUKrlGcmbN3bqHogVTADMD2ATsbwk=
github.com/bytedance/sonic v1.5.0/go.mod h1:ED5hyg4y6t3/9Ku1R6dU/4KyJ48DZ4jPhfY1O2AihPM=
github.com/bytedance/sonic v1.9.1 h1:6iJ6NqdoxCDr6mbY8h18oSO+cShGSMRGCEo7F2h0x8s=
github.com/bytedance/sonic v1.9.1/go.mod h1:i736AoUSYt75HyZLoJW9ERYxcy6eaN6h4BZXU064P/U=
@@ -94,6 +106,7 @@ github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69
github.com/golang-jwt/jwt v3.2.2+incompatible h1:IfV12K8xAKAnZqdXVzCZ+TOjboZ2keLg81eXfW3O+oY=
github.com/golang-jwt/jwt v3.2.2+incompatible/go.mod h1:8pz2t5EyA70fFQQSrl6XZXzqecmYZeUEB8OUGHkxJ+I=
github.com/golang-jwt/jwt/v4 v4.5.0 h1:7cYmW1XlMY7h7ii7UhUyChSgS5wUJEnm9uZVTGqOWzg=
github.com/golang-jwt/jwt/v4 v4.5.0/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0=
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 h1:DACJavvAHhabrF08vX0COfcOBJRhZ8lUbR+ZWIs0Y5g=
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
@@ -105,6 +118,7 @@ github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
@@ -168,14 +182,17 @@ github.com/jinzhu/now v1.1.4/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/
github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ=
github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8=
github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg=
github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA=
github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I=
github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.2.4 h1:acbojRNwl3o09bUq+yDCtZFc1aiwaAAxtcn8YkZXnvk=
github.com/klauspost/cpuid/v2 v2.2.4/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY=
@@ -187,12 +204,14 @@ github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORN
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/pty v1.1.8/go.mod h1:O1sed60cT9XZ5uDucP5qwvh+TE3NnUj51EiZO/lmSfw=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
github.com/leodido/go-urn v1.2.1/go.mod h1:zt4jvISO2HfUBqxjfIshjdMTYS56ZS/qv49ictyFfxY=
github.com/leodido/go-urn v1.2.4 h1:XlAE/cm/ms7TE/VMVoduSpNBoyc2dOxHs5MZSwAN63Q=
github.com/leodido/go-urn v1.2.4/go.mod h1:7ZrI8mTSeBSHl/UaRyKQW1qZeMgak41ANeCNaVckg+4=
@@ -223,20 +242,25 @@ github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjY
github.com/mojocn/base64Captcha v1.3.6 h1:gZEKu1nsKpttuIAQgWHO+4Mhhls8cAKyiV2Ew03H+Tw=
github.com/mojocn/base64Captcha v1.3.6/go.mod h1:i5CtHvm+oMbj1UzEPXaA8IH/xHFZ3DGY3Wh3dBpZ28E=
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU=
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/oklog/ulid v1.3.1 h1:EGfNDEx6MqHz8B3uNV6QAib1UR2Lm97sHi3ocA6ESJ4=
github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U=
github.com/pelletier/go-toml/v2 v2.0.1/go.mod h1:r9LEWfGN8R5k0VXJ+0BkIe7MYkRdwZOjgMj2KwnJFUo=
github.com/pelletier/go-toml/v2 v2.0.8 h1:0ctb6s9mE31h0/lhu+J6OPmVeDxJn+kYnJc2jZR9tGQ=
github.com/pelletier/go-toml/v2 v2.0.8/go.mod h1:vuYfssBdrU2XDZ9bYydBu6t+6a6PYNcZljzZR9VXg+4=
github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8 h1:KoWmjvw+nsYOo29YJK9vDA65RGE3NrOnUtO7a+RF9HU=
github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8/go.mod h1:HKlIX3XHQyzLZPlr7++PzdhaXEj94dEiJgZDTsxEqUI=
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/pquerna/cachecontrol v0.1.0 h1:yJMy84ti9h/+OEWa752kBTKv4XC30OtVVHYv/8cTqKc=
github.com/pquerna/cachecontrol v0.1.0/go.mod h1:NrUG3Z7Rdu85UNR3vm7SOsl1nFIeSiQnrHV5K9mBcUI=
github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g=
github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U=
github.com/prometheus/client_golang v1.16.0 h1:yk/hx9hDbrGHovbci4BY+pRMfSuuat626eFsHb7tmT8=
github.com/prometheus/client_golang v1.16.0/go.mod h1:Zsulrv/L9oM40tJ7T815tM89lFEugiJ9HzIqaAx4LKc=
github.com/prometheus/client_model v0.4.0 h1:5lQXD3cAg1OXBf4Wq03gTrXHeaV0TQvGfUooCfx1yqY=
@@ -244,6 +268,7 @@ github.com/prometheus/client_model v0.4.0/go.mod h1:oMQmHW1/JoDwqLtg57MGgP/Fb1CJ
github.com/prometheus/common v0.44.0 h1:+5BrQJwiBB9xsMygAB3TNvpQKOwlkc25LbISbrdOOfY=
github.com/prometheus/common v0.44.0/go.mod h1:ofAIvZbQ1e/nugmZGz4/qCb9Ap1VoSTIO7x0VV9VvuY=
github.com/prometheus/common/sigv4 v0.1.0 h1:qoVebwtwwEhS85Czm2dSROY5fTo2PAPEVdDeppTwGX4=
github.com/prometheus/common/sigv4 v0.1.0/go.mod h1:2Jkxxk9yYvCkE5G1sQT7GuEXm57JrvHu9k5YwTjsNtI=
github.com/prometheus/procfs v0.11.0 h1:5EAgkfkMl659uZPbe9AS2N68a7Cc1TJbPEuGzFuRbyk=
github.com/prometheus/procfs v0.11.0/go.mod h1:nwNm2aOCAYw8uTR/9bWRREkZFxAUcWzPHWJq+XBB/FM=
github.com/prometheus/prometheus v0.47.1 h1:bd2LiZyxzHn9Oo2Ei4eK2D86vz/L/OiqR1qYo0XmMBo=
@@ -259,6 +284,7 @@ github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFR
github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc=
github.com/rogpeppe/go-internal v1.8.0/go.mod h1:WmiCO8CzOY8rg0OYDC4/i/2WRWAB6poM+XZ2dLUbcbE=
github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=
github.com/rs/xid v1.2.1/go.mod h1:+uKXf+4Djp6Md1KODXJxgGQPKngRmWyn10oCKFzNHOQ=
github.com/rs/zerolog v1.13.0/go.mod h1:YbFCdg8HfsridGWAh22vktObvhZbQsZXe4/zB0OKkWU=
github.com/rs/zerolog v1.15.0/go.mod h1:xYTKnLHcpfU2225ny5qZjxnj9NvkumZYjJHlAThCjNc=
@@ -288,14 +314,15 @@ github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o
github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/tidwall/gjson v1.14.0 h1:6aeJ0bzojgWLa82gDQHcx3S0Lr/O51I9bJ5nv6JFx5w=
github.com/tidwall/gjson v1.14.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs=
github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
github.com/toolkits/pkg v1.3.6 h1:47e1amsY6mJmcnF3Y2lIpkJXfoYY2RmgI09PtwdAEMU=
github.com/toolkits/pkg v1.3.6/go.mod h1:M9ecwFGW1vxCTUFM9sr2ZjXSKb04N+1sTQ6SA3RNAIU=
github.com/toolkits/pkg v1.3.8 h1:2yamC20c5mHRtbcGiLY99Lm/2mVitFn6onE8KKvMT1o=
github.com/toolkits/pkg v1.3.8/go.mod h1:M9ecwFGW1vxCTUFM9sr2ZjXSKb04N+1sTQ6SA3RNAIU=
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
github.com/ugorji/go v1.2.7/go.mod h1:nF9osbDWLy6bDVv/Rtoh6QgnvNDpmCalQV5urGCCS6M=
@@ -305,6 +332,8 @@ github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZ
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
github.com/yuin/gopher-lua v1.1.1 h1:kYKnWBjvbNP4XLT3+bPEwAXJx262OhaHDWDVOPjL46M=
github.com/yuin/gopher-lua v1.1.1/go.mod h1:GBR0iDaNXjAgGg9zfCvksxSRnQx76gclCIb7kdAd1Pw=
github.com/zenazn/goji v0.9.0/go.mod h1:7S9M489iMyHBNxwZnk9/EHS098H4/F6TATF2mIxtB1Q=
go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
@@ -316,6 +345,7 @@ go.uber.org/automaxprocs v1.4.0/go.mod h1:/mTEdr7LvHhs0v7mjdxDreTz1OG5zdZGqgOnhW
go.uber.org/automaxprocs v1.5.2 h1:2LxUOGiR3O6tw8ui5sZa2LAaHnsviZdVOUZw4fvbnME=
go.uber.org/automaxprocs v1.5.2/go.mod h1:eRbA25aqJrxAbsLO0xy5jVwPt7FQnRgjW+efnwa1WM0=
go.uber.org/goleak v1.2.1 h1:NBol2c7O1ZokfZ0LEU9K6Whx/KnwvepVetCUhtKja4A=
go.uber.org/goleak v1.2.1/go.mod h1:qlT2yGI9QafXHhZZLxlSuNsMw3FFLxBr+tBRlmO1xH4=
go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0=
go.uber.org/multierr v1.3.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4=
go.uber.org/multierr v1.5.0/go.mod h1:FeouvMocqHpRaaGuG9EjoKcStLC43Zu/fmqdUMPcKYU=
@@ -378,6 +408,7 @@ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJ
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M=
golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@@ -400,8 +431,8 @@ golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4=
golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws=
golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=

View File

Before

Width:  |  Height:  |  Size: 2.8 KiB

After

Width:  |  Height:  |  Size: 2.8 KiB

View File

@@ -1,6 +1,6 @@
## Appdynamics
## AppDynamics
Appdynamics 采集插件, 采集 Appdynamics 数据
AppDynamics 采集插件, 采集 AppDynamics 数据
## Configuration

View File

@@ -584,7 +584,7 @@
"links": [
{
"title": "下钻",
"url": "/dashboards/automq-group-metrics?TSDB=${DS_PROMETHEUS}\u0026cluster_id=${cluster_id}\u0026group_id=${__field.labels.consumer_group}\u0026partition=all\u0026topic=${__field.labels.topic}"
"url": "/built-in-components/dashboard/detail?__uuid__=1717556327172992000&TSDB=${DS_PROMETHEUS}\u0026cluster_id=${cluster_id}\u0026group_id=${__field.labels.consumer_group}\u0026partition=all\u0026topic=${__field.labels.topic}"
}
],
"showHeader": true
@@ -669,7 +669,7 @@
"links": [
{
"title": "下钻",
"url": "/dashboards/automq-topic-metrics?TSDB=${DS_PROMETHEUS}\u0026cluster_id=${cluster_id}\u0026topic=${__field.labels.topic}"
"url": "/built-in-components/dashboard/detail?__uuid__=1717556327174664000&TSDB=${DS_PROMETHEUS}\u0026cluster_id=${cluster_id}\u0026topic=${__field.labels.topic}"
}
],
"showHeader": true
@@ -781,7 +781,7 @@
"links": [
{
"title": "下钻",
"url": "/dashboards/automq-broker-metrics?DS_PROMETHEUS=${DS_PROMETHEUS}\u0026cluster_id=${cluster_id}\u0026node_id=${__field.labels.instance}"
"url": "/built-in-components/dashboard/detail?__uuid__=1717556327159415000&DS_PROMETHEUS=${DS_PROMETHEUS}\u0026cluster_id=${cluster_id}\u0026node_id=${__field.labels.instance}"
}
],
"showHeader": true

View File

@@ -0,0 +1,8 @@
[[instances]]
urls = [
# "http://localhost:8053/xml/v3",
]
gather_memory_contexts = true
gather_views = true
timeout = "5s"
# labels={app="bind"}

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.9 KiB

View File

@@ -0,0 +1,13 @@
forked from [telegraf/snmp](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/bind)
配置示例
```
[[instances]]
urls = [
#"http://localhost:8053/xml/v3",
]
timeout = "5s"
gather_memory_contexts = true
gather_views = true
```

View File

@@ -0,0 +1,3 @@
## canal
canal 默认提供了 prometheus 格式指标的接口 [Prometheus-QuickStart](https://github.com/alibaba/canal/wiki/Prometheus-QuickStart) ,所以可以直接通过[ prometheus 插件](https://flashcat.cloud/docs/content/flashcat-monitor/categraf/plugin/prometheus)采集。

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,22 @@
# doris_fe
[[instances]]
# 配置 fe metrics 服务地址
urls = [
"http://127.0.0.1:8030/metrics"
]
url_label_key = "instance"
url_label_value = "{{.Host}}"
# 指定 fe 服务 group 和 job 标签,这里是仪表盘变量调用,可根据实际需求修改。
labels = { group = "fe",job = "doris_cluster01"}
# doris_be
[[instances]]
# 配置 be metrics 服务地址
urls = [
"http://127.0.0.1:8040/metrics"
]
url_label_key = "instance"
url_label_value = "{{.Host}}"
# 指定 be 服务 group 和 job 标签,这里是仪表盘变量调用,可根据实际需求修改。
labels = { group = "be",job = "doris_cluster01"}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,26 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- Generator: Adobe Illustrator 27.6.1, SVG Export Plug-In . SVG Version: 6.00 Build 0) -->
<svg version="1.1" id="图层_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
viewBox="0 0 30 30" style="enable-background:new 0 0 30 30;" xml:space="preserve">
<style type="text/css">
.st0{fill:#00A5CA;}
.st1{fill:#3ACA9B;}
.st2{fill:#405AAD;}
</style>
<g>
<g>
<g>
<path class="st0" d="M17.4,4.6l-3.3-3.3c-0.4-0.4-0.9-0.8-1.5-1C12.1,0.1,11.5,0,11,0C9.9,0,8.8,0.4,8,1.2C7.6,1.6,7.3,2,7.1,2.6
c-0.3,0.5-0.4,1-0.4,1.6S6.8,5.3,7,5.9c0.2,0.5,0.5,1,0.9,1.4l5.9,5.9c0.1,0.1,0.3,0.2,0.5,0.2s0.3-0.1,0.5-0.2l2.6-2.6
C17.6,10.5,20.2,7.4,17.4,4.6z"/>
<path class="st1" d="M22.8,9.8c-0.6-0.6-1.3-1.2-1.9-1.9l0,0c0,0.1,0,0.1,0,0.2c-0.2,1.4-0.9,2.7-2,3.7
c-3.4,3.4-6.9,6.9-10.3,10.3l-0.5,0.5c-0.7,0.6-1.2,1.5-1.3,2.4c-0.1,0.7,0,1.3,0.2,2c0.2,0.6,0.5,1.2,1,1.7
c0.4,0.4,0.9,0.8,1.4,1c0.5,0.2,1.1,0.3,1.7,0.3c1.3,0,2-0.2,3-1.1c3.9-3.8,7.8-7.7,10.8-10.6c1.4-1.4,1.7-3.7,0.7-5.2
C24.8,11.8,23.8,10.8,22.8,9.8z"/>
<path class="st2" d="M3.8,7.8v14.5c0,0.2,0,0.3,0.1,0.4C4,22.8,4.1,22.9,4.3,23c0.1,0,0.3,0,0.5,0c0.2,0,0.3-0.1,0.4-0.2l7.3-7.3
c0.1-0.1,0.2-0.3,0.2-0.5s-0.1-0.4-0.2-0.5L5.2,7.2C5.1,7.1,5,7.1,4.9,7C4.8,7,4.7,7,4.6,7C4.4,7,4.2,7.1,4,7.2
C3.9,7.4,3.8,7.6,3.8,7.8z"/>
</g>
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 1.4 KiB

View File

@@ -0,0 +1,39 @@
# Doris
Doris 的进程都会暴露 `/metrics` 接口,通过这个接口暴露 Prometheus 协议的监控数据。
## 采集配置
categraf 的 `conf/input.prometheus/prometheus.toml`。因为 Doris 是暴露的 Prometheus 协议的监控数据,所以使用 categraf 的 prometheus 插件即可采集。
```toml
# doris_fe
[[instances]]
urls = [
"http://127.0.0.1:8030/metrics"
]
url_label_key = "instance"
url_label_value = "{{.Host}}"
labels = { group = "fe",job = "doris_cluster01"}
# doris_be
[[instances]]
urls = [
"http://127.0.0.1:8040/metrics"
]
url_label_key = "instance"
url_label_value = "{{.Host}}"
labels = { group = "be",job = "doris_cluster01"}
```
## 告警规则
夜莺内置了 Doris 的告警规则,克隆到自己的业务组下即可使用。
## 仪表盘
夜莺内置了 Doris 的仪表盘,克隆到自己的业务组下即可使用。

View File

@@ -459,5 +459,5 @@
}
]
},
"uuid": 1727587308068775200
"uuid": 1727587308068775000
}

View File

@@ -1702,5 +1702,5 @@
],
"version": "3.0.0"
},
"uuid": 1727335102129685800
"uuid": 1727335102129685000
}

View File

@@ -0,0 +1,37 @@
# # collect interval
# interval = 15
[[instances]]
# # append some labels for series
# labels = { region="cloud", product="n9e" }
# # interval = global.interval * interval_times
# interval_times = 1
## Server to monitor
## The scheme determines the mode to use for connection with
## ldap://... -- unencrypted (non-TLS) connection
## ldaps://... -- TLS connection
## starttls://... -- StartTLS connection
## If no port is given, the default ports, 389 for ldap and starttls and
## 636 for ldaps, are used.
#server = "ldap://localhost"
## Server dialect, can be "openldap" or "389ds"
# dialect = "openldap"
# DN and password to bind with
## If bind_dn is empty an anonymous bind is performed.
bind_dn = ""
bind_password = ""
## Reverse the field names constructed from the monitoring DN
# reverse_field_names = false
## Optional TLS Config
# use_tls = false
# tls_ca = "/etc/categraf/ca.pem"
# tls_cert = "/etc/categraf/cert.pem"
# tls_key = "/etc/categraf/key.pem"
## Use TLS but skip chain & host verification
# insecure_skip_verify = false

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.1 KiB

View File

@@ -0,0 +1,113 @@
# LDAP Input Plugin
This plugin gathers metrics from LDAP servers' monitoring (`cn=Monitor`)
backend. Currently this plugin supports [OpenLDAP](https://www.openldap.org/)
and [389ds](https://www.port389.org/) servers.
To use this plugin you must enable the monitoring backend/plugin of your LDAP
server. See
[OpenLDAP](https://www.openldap.org/devel/admin/monitoringslapd.html) or 389ds
documentation for details.
## Metrics
Depending on the server dialect, different metrics are produced. The metrics
are usually named according to the selected dialect.
### Tags
- server -- Server name or IP
- port -- Port used for connecting
## Example Output
Using the `openldap` dialect
```text
openldap_modify_operations_completed agent_hostname=zy-fat port=389 server=localhost 0
openldap_referrals_statistics agent_hostname=zy-fat port=389 server=localhost 0
openldap_unbind_operations_initiated agent_hostname=zy-fat port=389 server=localhost 0
openldap_delete_operations_completed agent_hostname=zy-fat port=389 server=localhost 0
openldap_extended_operations_completed agent_hostname=zy-fat port=389 server=localhost 0
openldap_pdu_statistics agent_hostname=zy-fat port=389 server=localhost 42
openldap_starting_threads agent_hostname=zy-fat port=389 server=localhost 0
openldap_active_threads agent_hostname=zy-fat port=389 server=localhost 1
openldap_uptime_time agent_hostname=zy-fat port=389 server=localhost 102
openldap_bytes_statistics agent_hostname=zy-fat port=389 server=localhost 3176
openldap_compare_operations_completed agent_hostname=zy-fat port=389 server=localhost 0
openldap_bind_operations_completed agent_hostname=zy-fat port=389 server=localhost 1
openldap_total_connections agent_hostname=zy-fat port=389 server=localhost 1002
openldap_search_operations_completed agent_hostname=zy-fat port=389 server=localhost 1
openldap_abandon_operations_initiated agent_hostname=zy-fat port=389 server=localhost 0
openldap_add_operations_initiated agent_hostname=zy-fat port=389 server=localhost 0
openldap_open_threads agent_hostname=zy-fat port=389 server=localhost 1
openldap_add_operations_completed agent_hostname=zy-fat port=389 server=localhost 0
openldap_operations_initiated agent_hostname=zy-fat port=389 server=localhost 3
openldap_write_waiters agent_hostname=zy-fat port=389 server=localhost 0
openldap_entries_statistics agent_hostname=zy-fat port=389 server=localhost 41
openldap_modrdn_operations_completed agent_hostname=zy-fat port=389 server=localhost 0
openldap_pending_threads agent_hostname=zy-fat port=389 server=localhost 0
openldap_max_pending_threads agent_hostname=zy-fat port=389 server=localhost 0
openldap_bind_operations_initiated agent_hostname=zy-fat port=389 server=localhost 1
openldap_max_file_descriptors_connections agent_hostname=zy-fat port=389 server=localhost 1024
openldap_compare_operations_initiated agent_hostname=zy-fat port=389 server=localhost 0
openldap_search_operations_initiated agent_hostname=zy-fat port=389 server=localhost 2
openldap_modrdn_operations_initiated agent_hostname=zy-fat port=389 server=localhost 0
openldap_read_waiters agent_hostname=zy-fat port=389 server=localhost 1
openldap_backload_threads agent_hostname=zy-fat port=389 server=localhost 1
openldap_current_connections agent_hostname=zy-fat port=389 server=localhost 1
openldap_unbind_operations_completed agent_hostname=zy-fat port=389 server=localhost 0
openldap_delete_operations_initiated agent_hostname=zy-fat port=389 server=localhost 0
openldap_extended_operations_initiated agent_hostname=zy-fat port=389 server=localhost 0
openldap_modify_operations_initiated agent_hostname=zy-fat port=389 server=localhost 0
openldap_max_threads agent_hostname=zy-fat port=389 server=localhost 16
openldap_abandon_operations_completed agent_hostname=zy-fat port=389 server=localhost 0
openldap_operations_completed agent_hostname=zy-fat port=389 server=localhost 2
openldap_database_2_databases agent_hostname=zy-fat port=389 server=localhost 0
```
Using the `389ds` dialect
```text
389ds_current_connections_at_max_threads agent_hostname=zy-fat port=389 server=localhost 0
389ds_connections_max_threads agent_hostname=zy-fat port=389 server=localhost 0
389ds_add_operations agent_hostname=zy-fat port=389 server=localhost 0
389ds_dtablesize agent_hostname=zy-fat port=389 server=localhost 63936
389ds_strongauth_binds agent_hostname=zy-fat port=389 server=localhost 13
389ds_modrdn_operations agent_hostname=zy-fat port=389 server=localhost 0
389ds_maxthreads_per_conn_hits agent_hostname=zy-fat port=389 server=localhost 0
389ds_current_connections agent_hostname=zy-fat port=389 server=localhost 2
389ds_security_errors agent_hostname=zy-fat port=389 server=localhost 0
389ds_entries_sent agent_hostname=zy-fat port=389 server=localhost 13
389ds_cache_entries agent_hostname=zy-fat port=389 server=localhost 0
389ds_backends agent_hostname=zy-fat port=389 server=localhost 0
389ds_threads agent_hostname=zy-fat port=389 server=localhost 17
389ds_connections agent_hostname=zy-fat port=389 server=localhost 2
389ds_read_operations agent_hostname=zy-fat port=389 server=localhost 0
389ds_entries_returned agent_hostname=zy-fat port=389 server=localhost 13
389ds_unauth_binds agent_hostname=zy-fat port=389 server=localhost 0
389ds_search_operations agent_hostname=zy-fat port=389 server=localhost 14
389ds_simpleauth_binds agent_hostname=zy-fat port=389 server=localhost 0
389ds_operations_completed agent_hostname=zy-fat port=389 server=localhost 51
389ds_connections_in_max_threads agent_hostname=zy-fat port=389 server=localhost 0
389ds_modify_operations agent_hostname=zy-fat port=389 server=localhost 0
389ds_wholesubtree_search_operations agent_hostname=zy-fat port=389 server=localhost 1
389ds_read_waiters agent_hostname=zy-fat port=389 server=localhost 0
389ds_compare_operations agent_hostname=zy-fat port=389 server=localhost 0
389ds_errors agent_hostname=zy-fat port=389 server=localhost 13
389ds_in_operations agent_hostname=zy-fat port=389 server=localhost 52
389ds_total_connections agent_hostname=zy-fat port=389 server=localhost 15
389ds_cache_hits agent_hostname=zy-fat port=389 server=localhost 0
389ds_list_operations agent_hostname=zy-fat port=389 server=localhost 0
389ds_referrals_returned agent_hostname=zy-fat port=389 server=localhost 0
389ds_copy_entries agent_hostname=zy-fat port=389 server=localhost 0
389ds_operations_initiated agent_hostname=zy-fat port=389 server=localhost 52
389ds_chainings agent_hostname=zy-fat port=389 server=localhost 0
389ds_bind_security_errors agent_hostname=zy-fat port=389 server=localhost 0
389ds_onelevel_search_operations agent_hostname=zy-fat port=389 server=localhost 0
389ds_bytes_sent agent_hostname=zy-fat port=389 server=localhost 1702
389ds_bytes_received agent_hostname=zy-fat port=389 server=localhost 0
389ds_referrals agent_hostname=zy-fat port=389 server=localhost 0
389ds_delete_operations agent_hostname=zy-fat port=389 server=localhost 0
389ds_anonymous_binds agent_hostname=zy-fat port=389 server=localhost 0
```

View File

@@ -1,13 +1,6 @@
{
"id": 0,
"group_id": 0,
"name": "机器台账表格视图",
"ident": "",
"tags": "",
"create_at": 0,
"create_by": "",
"update_at": 0,
"update_by": "",
"configs": {
"links": [
{
@@ -28,7 +21,7 @@
"colorRange": [
"thresholds"
],
"detailUrl": "/dashboards-built-in/detail?__built-in-cate=Linux\u0026__built-in-name=Linux%20Host%20by%20Categraf%20v2\u0026ident=${__field.labels.ident}",
"detailUrl": "/built-in-components/dashboard/detail?__uuid__=1717556327744505000&ident=${__field.labels.ident}",
"textMode": "valueAndName",
"valueField": "Value"
},
@@ -98,7 +91,7 @@
"colorRange": [
"thresholds"
],
"detailUrl": "/dashboards-built-in/detail?__built-in-cate=Linux\u0026__built-in-name=Linux%20Host%20by%20Categraf%20v2\u0026ident=${__field.labels.ident}",
"detailUrl": "/built-in-components/dashboard/detail?__uuid__=1717556327744505000&ident=${__field.labels.ident}",
"textMode": "valueAndName",
"valueField": "Value"
},
@@ -171,13 +164,16 @@
"linkMode": "appendLinkColumn",
"links": [
{
"targetBlank": true,
"title": "详情",
"url": "/dashboards-built-in/detail?__built-in-cate=Linux\u0026__built-in-name=Linux%20Host%20by%20Categraf%20v2\u0026ident=${__field.labels.ident}"
"url": "/built-in-components/dashboard/detail?__uuid__=1717556327744505000&ident=${__field.labels.ident}"
}
],
"nowrap": false,
"showHeader": true,
"sortColumn": "ident",
"sortOrder": "ascend"
"sortOrder": "ascend",
"tableLayout": "fixed"
},
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
@@ -385,10 +381,5 @@
],
"version": "3.0.0"
},
"public": 0,
"public_cate": 0,
"bgids": null,
"built_in": 0,
"hide": 0,
"uuid": 1717556327742611000
}

View File

@@ -1,13 +1,7 @@
{
"id": 0,
"group_id": 0,
"name": "MongoDB Overview by exporter",
"ident": "",
"tags": "Prometheus MongoDB",
"create_at": 0,
"create_by": "",
"update_at": 0,
"update_by": "",
"ident": "",
"configs": {
"panels": [
{
@@ -150,21 +144,7 @@
"version": "2.0.0"
},
{
"custom": {
"drawStyle": "lines",
"fillOpacity": 0.3,
"gradientMode": "opacity",
"lineInterpolation": "smooth",
"lineWidth": 2,
"scaleDistribution": {
"type": "linear"
},
"spanNulls": false,
"stack": "off"
},
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"description": "Memory usage (MiB)",
"type": "timeseries",
"id": "8446dded-9e11-4ee9-bdad-769b193ddf3e",
"layout": {
"h": 7,
@@ -174,10 +154,37 @@
"x": 12,
"y": 1
},
"version": "3.0.0",
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"targets": [
{
"expr": "mongodb_ss_mem_resident{instance='$instance'} * 1024 * 1024",
"legend": "{{type}}",
"refId": "A",
"maxDataPoints": 240
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"name": "Memory",
"description": "Memory usage (MiB)",
"maxPerRow": 4,
"options": {
"tooltip": {
"mode": "all",
"sort": "desc"
},
"legend": {
"displayMode": "hidden"
"displayMode": "hidden",
"heightInPercentage": 30,
"placement": "bottom",
"behaviour": "showItem",
"selectMode": "single"
},
"standardOptions": {
"util": "bytesIEC"
@@ -186,25 +193,36 @@
"steps": [
{
"color": "#634CD9",
"type": "base",
"value": null
"value": null,
"type": "base"
}
]
},
"tooltip": {
"mode": "all",
"sort": "desc"
}
},
"targets": [
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"spanNulls": false,
"lineWidth": 2,
"fillOpacity": 0.3,
"gradientMode": "opacity",
"stack": "off",
"scaleDistribution": {
"type": "linear"
},
"showPoints": "none",
"pointSize": 5
},
"overrides": [
{
"expr": "mongodb_ss_mem_resident{cluster='$cluster'} * 1024 * 1024",
"legend": "{{type}}",
"refId": "A"
"matcher": {
"id": "byFrameRefID"
},
"properties": {
"rightYAxisDisplay": "off"
}
}
],
"type": "timeseries",
"version": "2.0.0"
]
},
{
"custom": {
@@ -571,21 +589,7 @@
"version": "2.0.0"
},
{
"custom": {
"drawStyle": "lines",
"fillOpacity": 0.3,
"gradientMode": "opacity",
"lineInterpolation": "smooth",
"lineWidth": 2,
"scaleDistribution": {
"type": "linear"
},
"spanNulls": false,
"stack": "off"
},
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"description": "Number of document operations When used in combination with 'Command Operations', this graph can help identify write amplification. For example, when one insert or update command actually inserts or updates hundreds, thousands, or even millions of documents.",
"type": "timeseries",
"id": "7030d97a-d69f-4916-a415-ec57503ab1ed",
"layout": {
"h": 7,
@@ -595,52 +599,76 @@
"x": 12,
"y": 16
},
"version": "3.0.0",
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"targets": [
{
"expr": "rate(mongodb_ss_metrics_document{instance=\"$instance\"}[5m])",
"legend": "",
"refId": "A",
"maxDataPoints": 240
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"name": "Document Operations",
"description": "Number of document operations When used in combination with 'Command Operations', this graph can help identify write amplification. For example, when one insert or update command actually inserts or updates hundreds, thousands, or even millions of documents.",
"maxPerRow": 4,
"options": {
"tooltip": {
"mode": "all",
"sort": "none"
},
"legend": {
"displayMode": "hidden"
"displayMode": "hidden",
"heightInPercentage": 30,
"placement": "bottom",
"behaviour": "showItem",
"selectMode": "single"
},
"standardOptions": {},
"thresholds": {
"steps": [
{
"color": "#634CD9",
"type": "base",
"value": null
"value": null,
"type": "base"
}
]
},
"tooltip": {
"mode": "all",
"sort": "none"
}
},
"targets": [
{
"expr": "rate(mongodb_ss_metric_document{instance=\"$instance\"}[5m])",
"legend": "",
"refId": "A"
}
],
"type": "timeseries",
"version": "2.0.0"
},
{
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"spanNulls": false,
"lineWidth": 2,
"fillOpacity": 0.3,
"gradientMode": "opacity",
"lineInterpolation": "smooth",
"lineWidth": 2,
"stack": "off",
"scaleDistribution": {
"type": "linear"
},
"spanNulls": false,
"stack": "off"
"showPoints": "none",
"pointSize": 5
},
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"description": "Operation detail processing time (milliseconds)",
"overrides": [
{
"matcher": {
"id": "byFrameRefID"
},
"properties": {
"rightYAxisDisplay": "off"
}
}
]
},
{
"type": "timeseries",
"id": "1c3b73d5-c25c-449f-995d-26acc9c621e1",
"layout": {
"h": 7,
@@ -650,10 +678,37 @@
"x": 0,
"y": 23
},
"version": "3.0.0",
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"targets": [
{
"expr": "rate(mongodb_ss_opLatencies_latency{instance='$instance'}[5m]) / rate(mongodb_ss_opLatencies_latency{instance='$instance'}[5m]) / 1000",
"legend": "{{op_type}}",
"refId": "A",
"maxDataPoints": 240
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"name": "Response Time",
"description": "Operation detail processing time (milliseconds)",
"maxPerRow": 4,
"options": {
"tooltip": {
"mode": "all",
"sort": "none"
},
"legend": {
"displayMode": "hidden"
"displayMode": "hidden",
"heightInPercentage": 30,
"placement": "bottom",
"behaviour": "showItem",
"selectMode": "single"
},
"standardOptions": {
"util": "milliseconds"
@@ -662,25 +717,36 @@
"steps": [
{
"color": "#634CD9",
"type": "base",
"value": null
"value": null,
"type": "base"
}
]
},
"tooltip": {
"mode": "all",
"sort": "none"
}
},
"targets": [
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"spanNulls": false,
"lineWidth": 2,
"fillOpacity": 0.3,
"gradientMode": "opacity",
"stack": "off",
"scaleDistribution": {
"type": "linear"
},
"showPoints": "none",
"pointSize": 5
},
"overrides": [
{
"expr": "rate(mongodb_ss_opLatencies_latency{cluster='$cluster'}[5m]) / rate(mongodb_ss_opLatencies_latency{cluster='$cluster'}[5m]) / 1000",
"legend": "{{op_type}}",
"refId": "A"
"matcher": {
"id": "byFrameRefID"
},
"properties": {
"rightYAxisDisplay": "off"
}
}
],
"type": "timeseries",
"version": "2.0.0"
]
},
{
"custom": {
@@ -815,21 +881,7 @@
"type": "row"
},
{
"custom": {
"drawStyle": "lines",
"fillOpacity": 0.3,
"gradientMode": "opacity",
"lineInterpolation": "smooth",
"lineWidth": 2,
"scaleDistribution": {
"type": "linear"
},
"spanNulls": false,
"stack": "off"
},
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"description": "cache size (byte)",
"type": "timeseries",
"id": "bb0ae571-43a1-430b-8f63-256f6f1ebee6",
"layout": {
"h": 7,
@@ -839,10 +891,55 @@
"x": 0,
"y": 31
},
"version": "3.0.0",
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"targets": [
{
"expr": "mongodb_ss_wt_cache_bytes_currently_in_the_cache{instance='$instance'}",
"legend": "total",
"refId": "A",
"maxDataPoints": 240
},
{
"expr": "mongodb_ss_wt_cache_tracked_dirty_bytes_in_the_cache{cluster='$cluster'}",
"legend": "dirty",
"refId": "B",
"maxDataPoints": 240
},
{
"expr": "mongodb_ss_wt_cache_tracked_bytes_belonging_to_internal_pages_in_the_cache{cluster='$cluster'}",
"legend": "internal_pages",
"refId": "C",
"maxDataPoints": 240
},
{
"expr": "mongodb_ss_wt_cache_tracked_bytes_belonging_to_leaf_pages_in_the_cache{cluster='$cluster'}",
"legend": "leaf_pages",
"refId": "D",
"maxDataPoints": 240
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"name": "Cache Size",
"description": "cache size (byte)",
"maxPerRow": 4,
"options": {
"tooltip": {
"mode": "all",
"sort": "none"
},
"legend": {
"displayMode": "hidden"
"displayMode": "hidden",
"heightInPercentage": 30,
"placement": "bottom",
"behaviour": "showItem",
"selectMode": "single"
},
"standardOptions": {
"util": "bytesIEC"
@@ -851,57 +948,39 @@
"steps": [
{
"color": "#634CD9",
"type": "base",
"value": null
"value": null,
"type": "base"
}
]
},
"tooltip": {
"mode": "all",
"sort": "none"
}
},
"targets": [
{
"expr": "mongodb_ss_wt_cache_bytes_currently_in_the_cache{cluster='$cluster'}",
"legend": "total",
"refId": "A"
},
{
"expr": "mongodb_ss_wt_cache_tracked_dirty_bytes_in_the_cache{cluster='$cluster'}",
"legend": "dirty",
"refId": "B"
},
{
"expr": "mongodb_ss_wt_cache_tracked_bytes_belonging_to_internal_pages_in_the_cache{cluster='$cluster'}",
"legend": "internal_pages",
"refId": "C"
},
{
"expr": "mongodb_ss_wt_cache_tracked_bytes_belonging_to_leaf_pages_in_the_cache{cluster='$cluster'}",
"legend": "leaf_pages",
"refId": "D"
}
],
"type": "timeseries",
"version": "2.0.0"
},
{
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"spanNulls": false,
"lineWidth": 2,
"fillOpacity": 0.3,
"gradientMode": "opacity",
"lineInterpolation": "smooth",
"lineWidth": 2,
"stack": "off",
"scaleDistribution": {
"type": "linear"
},
"spanNulls": false,
"stack": "off"
"showPoints": "none",
"pointSize": 5
},
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"description": "size of cached data written or read (in bytes)",
"overrides": [
{
"matcher": {
"id": "byFrameRefID"
},
"properties": {
"rightYAxisDisplay": "off"
}
}
]
},
{
"type": "timeseries",
"id": "f1ffd169-2a1a-42bc-9647-0e6621be0fef",
"layout": {
"h": 7,
@@ -911,10 +990,43 @@
"x": 6,
"y": 31
},
"version": "3.0.0",
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"targets": [
{
"expr": "rate(mongodb_ss_wt_cache_bytes_read_into_cache{instance='$instance'}[5m])",
"legend": "read",
"refId": "A",
"maxDataPoints": 240
},
{
"expr": "rate(mongodb_ss_wt_cache_bytes_written_from_cache{cluster='$cluster'}[5m])",
"legend": "written",
"refId": "B",
"maxDataPoints": 240
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"name": "Cache I/O",
"description": "size of cached data written or read (in bytes)",
"maxPerRow": 4,
"options": {
"tooltip": {
"mode": "all",
"sort": "none"
},
"legend": {
"displayMode": "hidden"
"displayMode": "hidden",
"heightInPercentage": 30,
"placement": "bottom",
"behaviour": "showItem",
"selectMode": "single"
},
"standardOptions": {
"util": "bytesSI"
@@ -923,47 +1035,39 @@
"steps": [
{
"color": "#634CD9",
"type": "base",
"value": null
"value": null,
"type": "base"
}
]
},
"tooltip": {
"mode": "all",
"sort": "none"
}
},
"targets": [
{
"expr": "rate(mongodb_ss_wt_cache_bytes_read_into_cache{cluster='$cluster'}[5m])",
"legend": "read",
"refId": "A"
},
{
"expr": "rate(mongodb_ss_wt_cache_bytes_written_from_cache{cluster='$cluster'}[5m])",
"legend": "written",
"refId": "B"
}
],
"type": "timeseries",
"version": "2.0.0"
},
{
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"spanNulls": false,
"lineWidth": 2,
"fillOpacity": 0.3,
"gradientMode": "opacity",
"lineInterpolation": "smooth",
"lineWidth": 2,
"stack": "off",
"scaleDistribution": {
"type": "linear"
},
"spanNulls": false,
"stack": "off"
"showPoints": "none",
"pointSize": 5
},
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"description": "",
"overrides": [
{
"matcher": {
"id": "byFrameRefID"
},
"properties": {
"rightYAxisDisplay": "off"
}
}
]
},
{
"type": "timeseries",
"id": "43ee140d-ae6d-474a-9892-fa4743d7f97e",
"layout": {
"h": 7,
@@ -973,10 +1077,37 @@
"x": 12,
"y": 31
},
"version": "3.0.0",
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"targets": [
{
"expr": "100 * sum(mongodb_ss_wt_cache_tracked_dirty_pages_in_the_cache{instance='$instance'}) / sum(mongodb_ss_wt_cache_pages_currently_held_in_the_cache{instance='$instance'})",
"legend": "dirty rate",
"refId": "A",
"maxDataPoints": 240
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"name": "Cache Dirty Pages Rate",
"description": "",
"maxPerRow": 4,
"options": {
"tooltip": {
"mode": "all",
"sort": "none"
},
"legend": {
"displayMode": "hidden"
"displayMode": "hidden",
"heightInPercentage": 30,
"placement": "bottom",
"behaviour": "showItem",
"selectMode": "single"
},
"standardOptions": {
"util": "percent"
@@ -985,42 +1116,39 @@
"steps": [
{
"color": "#634CD9",
"type": "base",
"value": null
"value": null,
"type": "base"
}
]
},
"tooltip": {
"mode": "all",
"sort": "none"
}
},
"targets": [
{
"expr": "100 * sum(mongodb_ss_wt_cache_tracked_dirty_pages_in_the_cache{cluster='$cluster'}) / sum(mongodb_ss_wt_cache_pages_currently_held_in_the_cache{cluster='$cluster'})",
"legend": "dirty rate",
"refId": "A"
}
],
"type": "timeseries",
"version": "2.0.0"
},
{
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"spanNulls": false,
"lineWidth": 2,
"fillOpacity": 0.3,
"gradientMode": "opacity",
"lineInterpolation": "smooth",
"lineWidth": 2,
"stack": "off",
"scaleDistribution": {
"type": "linear"
},
"spanNulls": false,
"stack": "off"
"showPoints": "none",
"pointSize": 5
},
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"description": "",
"overrides": [
{
"matcher": {
"id": "byFrameRefID"
},
"properties": {
"rightYAxisDisplay": "off"
}
}
]
},
{
"type": "timeseries",
"id": "1a22c31a-859a-400c-af2a-ae83c308d0f2",
"layout": {
"h": 7,
@@ -1030,35 +1158,73 @@
"x": 18,
"y": 31
},
"version": "3.0.0",
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"targets": [
{
"expr": "rate(mongodb_mongod_wiredtiger_cache_evicted_total{instance='$instance'}[5m])",
"legend": "evicted pages",
"refId": "A",
"maxDataPoints": 240
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"name": "Cache Evicted Pages",
"description": "",
"maxPerRow": 4,
"options": {
"tooltip": {
"mode": "all",
"sort": "none"
},
"legend": {
"displayMode": "hidden"
"displayMode": "hidden",
"heightInPercentage": 30,
"placement": "bottom",
"behaviour": "showItem",
"selectMode": "single"
},
"standardOptions": {},
"thresholds": {
"steps": [
{
"color": "#634CD9",
"type": "base",
"value": null
"value": null,
"type": "base"
}
]
},
"tooltip": {
"mode": "all",
"sort": "none"
}
},
"targets": [
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"spanNulls": false,
"lineWidth": 2,
"fillOpacity": 0.3,
"gradientMode": "opacity",
"stack": "off",
"scaleDistribution": {
"type": "linear"
},
"showPoints": "none",
"pointSize": 5
},
"overrides": [
{
"expr": "rate(mongodb_mongod_wiredtiger_cache_evicted_total{cluster='$cluster'}[5m])",
"legend": "evicted pages",
"refId": "A"
"matcher": {
"id": "byFrameRefID"
},
"properties": {
"rightYAxisDisplay": "off"
}
}
],
"type": "timeseries",
"version": "2.0.0"
]
},
{
"collapsed": true,
@@ -1204,10 +1370,5 @@
],
"version": "3.0.0"
},
"public": 0,
"public_cate": 0,
"bgids": null,
"built_in": 0,
"hide": 0,
"uuid": 1717556328065329000
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,5 @@
{
"name": "MySQL 仪表盘(远端",
"name": "MySQL 仪表盘(使用 instance 筛选,需要采集时自行打上 instance 标签",
"tags": "",
"ident": "",
"configs": {
@@ -1802,5 +1802,5 @@
],
"version": "3.0.0"
},
"uuid": 1717556328087994322
"uuid": 1717556328087995000
}

View File

@@ -1,5 +1,5 @@
{
"name": "MySQL 仪表盘",
"name": "MySQL 仪表盘,适用于 Categraf 采集本机 MySQL 的场景",
"tags": "",
"ident": "",
"configs": {
@@ -1798,5 +1798,5 @@
],
"version": "3.0.0"
},
"uuid": 1717556328087994321
"uuid": 1717556328087994000
}

View File

@@ -139,3 +139,11 @@ timeout = "3s"
request = '''
select METRIC_NAME,VALUE from v$sysmetric where group_id=2
'''
[[metrics]]
mesurement = "applylag"
metric_fields = [ "value" ]
timeout = "3s"
request = '''
SELECT TO_NUMBER(EXTRACT(SECOND FROM TO_DSINTERVAL (value))) as value FROM v$dataguard_stats WHERE name = 'apply lag'
'''

View File

@@ -0,0 +1,925 @@
{
"name": "Redis by address",
"tags": "Redis Categraf",
"configs": {
"panels": [
{
"collapsed": true,
"id": "2ecb82c6-4d1a-41b5-8cdc-0284db16bd54",
"layout": {
"h": 1,
"i": "2ecb82c6-4d1a-41b5-8cdc-0284db16bd54",
"isResizable": false,
"w": 24,
"x": 0,
"y": 0
},
"name": "Basic Info",
"type": "row"
},
{
"custom": {
"alignItems": "center",
"bgColor": "rgba(0, 0, 0, 0)",
"content": "<img src=\"https://download.flashcat.cloud/ulric/redis.png\" width=128 />",
"justifyContent": "center",
"textColor": "#000000",
"textDarkColor": "#FFFFFF",
"textSize": 12
},
"id": "b5acc352-a2bd-4afc-b6cd-d6db0905f807",
"layout": {
"h": 3,
"i": "b5acc352-a2bd-4afc-b6cd-d6db0905f807",
"isResizable": true,
"w": 4,
"x": 0,
"y": 1
},
"maxPerRow": 4,
"name": "",
"type": "text",
"version": "3.0.0"
},
{
"custom": {
"calc": "lastNotNull",
"colSpan": 0,
"colorMode": "background",
"graphMode": "none",
"orientation": "vertical",
"textMode": "valueAndName",
"textSize": {},
"valueField": "Value"
},
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"id": "5eb6fbcf-4260-40d0-ad6a-540e54a1f922",
"layout": {
"h": 3,
"i": "2a02e1d4-2ed3-4bd2-9fa0-69bb10f13888",
"isResizable": true,
"w": 5,
"x": 4,
"y": 1
},
"maxPerRow": 4,
"name": "Redis Uptime",
"options": {
"standardOptions": {
"decimals": 2,
"util": "seconds"
},
"thresholds": {
"steps": [
{
"color": "rgba(63, 196, 83, 1)",
"type": "base",
"value": null
}
]
},
"valueMappings": [
{
"match": {
"to": 600
},
"result": {
"color": "rgba(255, 101, 107, 1)"
},
"type": "range"
},
{
"match": {
"from": 600
},
"result": {
"color": "rgba(63, 196, 83, 1)"
},
"type": "range"
}
]
},
"targets": [
{
"expr": "redis_uptime_in_seconds{address=~\"$address\"}",
"legend": "{{address}}",
"maxDataPoints": 240
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"type": "stat",
"version": "3.0.0"
},
{
"custom": {
"calc": "lastNotNull",
"colSpan": 0,
"colorMode": "background",
"graphMode": "none",
"orientation": "vertical",
"textMode": "valueAndName",
"textSize": {},
"valueField": "Value"
},
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"id": "8ccada5e-02f3-4efc-9b36-2a367612e4cb",
"layout": {
"h": 3,
"i": "8ccada5e-02f3-4efc-9b36-2a367612e4cb",
"isResizable": true,
"w": 5,
"x": 9,
"y": 1
},
"maxPerRow": 4,
"name": "Connected Clients",
"options": {
"standardOptions": {},
"thresholds": {
"steps": [
{
"color": "#6C53B1",
"type": "base",
"value": null
}
]
},
"valueMappings": [
{
"match": {
"to": 500
},
"result": {
"color": "rgba(63, 196, 83, 1)"
},
"type": "range"
},
{
"match": {
"from": 500
},
"result": {
"color": "rgba(255, 101, 107, 1)"
},
"type": "range"
}
]
},
"targets": [
{
"expr": "redis_connected_clients{address=~\"$address\"}",
"legend": "{{address}}",
"maxDataPoints": 240
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"type": "stat",
"version": "3.0.0"
},
{
"custom": {
"calc": "lastNotNull",
"colSpan": 0,
"colorMode": "background",
"graphMode": "none",
"orientation": "vertical",
"textMode": "valueAndName",
"textSize": {},
"valueField": "Value"
},
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"id": "716dc7e7-c9ec-4195-93f6-db1c572ae8b0",
"layout": {
"h": 3,
"i": "716dc7e7-c9ec-4195-93f6-db1c572ae8b0",
"isResizable": true,
"w": 5,
"x": 14,
"y": 1
},
"maxPerRow": 4,
"name": "Memory Used",
"options": {
"standardOptions": {
"decimals": 1,
"util": "bytesIEC"
},
"thresholds": {
"steps": [
{
"color": "#6C53B1",
"type": "base",
"value": null
}
]
},
"valueMappings": [
{
"match": {
"to": 128000000
},
"result": {
"color": "#079e05"
},
"type": "range"
},
{
"match": {
"from": 128000000
},
"result": {
"color": "#f10909"
},
"type": "range"
}
]
},
"targets": [
{
"expr": "redis_used_memory{address=~\"$address\"}",
"legend": "{{address}}",
"maxDataPoints": 240
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"type": "stat",
"version": "3.0.0"
},
{
"custom": {
"calc": "lastNotNull",
"colSpan": 0,
"colorMode": "background",
"graphMode": "none",
"orientation": "vertical",
"textMode": "valueAndName",
"textSize": {},
"valueField": "Value"
},
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"id": "c6948161-db07-42df-beb1-765ee9c071a9",
"layout": {
"h": 3,
"i": "c6948161-db07-42df-beb1-765ee9c071a9",
"isResizable": true,
"w": 5,
"x": 19,
"y": 1
},
"maxPerRow": 4,
"name": "Max Memory Limit",
"options": {
"standardOptions": {
"decimals": 1,
"util": "bytesIEC"
},
"thresholds": {
"steps": [
{
"color": "rgba(63, 196, 83, 1)",
"type": "base",
"value": null
}
]
}
},
"targets": [
{
"expr": "redis_maxmemory{address=~\"$address\"}",
"legend": "{{address}}",
"maxDataPoints": 240
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"type": "stat",
"version": "3.0.0"
},
{
"collapsed": true,
"id": "bd54cf4f-1abb-4945-8aab-f89aec16daef",
"layout": {
"h": 1,
"i": "bd54cf4f-1abb-4945-8aab-f89aec16daef",
"isResizable": false,
"w": 24,
"x": 0,
"y": 4
},
"name": "Commands",
"type": "row"
},
{
"custom": {
"drawStyle": "lines",
"fillOpacity": 0.3,
"gradientMode": "opacity",
"lineInterpolation": "smooth",
"lineWidth": 2,
"scaleDistribution": {
"type": "linear"
},
"spanNulls": false,
"stack": "off"
},
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"id": "3d5f8c4e-0ddf-4d68-9f6d-2cc57d864a8e",
"layout": {
"h": 5,
"i": "3d5f8c4e-0ddf-4d68-9f6d-2cc57d864a8e",
"isResizable": true,
"w": 8,
"x": 0,
"y": 5
},
"maxPerRow": 4,
"name": "Commands Executed / sec",
"options": {
"legend": {
"behaviour": "showItem",
"displayMode": "hidden"
},
"standardOptions": {
"decimals": 2
},
"thresholds": {
"steps": [
{
"color": "#6C53B1",
"type": "base",
"value": null
}
]
},
"tooltip": {
"mode": "all",
"sort": "none"
}
},
"overrides": [
{
"matcher": {
"id": "byFrameRefID"
},
"properties": {
"rightYAxisDisplay": "off"
}
}
],
"targets": [
{
"expr": "rate(redis_total_commands_processed{address=~\"$address\"}[5m])",
"legend": "{{address}}",
"maxDataPoints": 240
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"type": "timeseries",
"version": "3.0.0"
},
{
"custom": {
"drawStyle": "lines",
"fillOpacity": 0.3,
"gradientMode": "opacity",
"lineInterpolation": "smooth",
"lineWidth": 2,
"scaleDistribution": {
"type": "linear"
},
"spanNulls": false,
"stack": "noraml"
},
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"id": "344a874d-c34d-4d2d-9bb4-46e0912cd9f5",
"layout": {
"h": 5,
"i": "344a874d-c34d-4d2d-9bb4-46e0912cd9f5",
"isResizable": true,
"w": 8,
"x": 8,
"y": 5
},
"maxPerRow": 4,
"name": "Hits / Misses per Sec",
"options": {
"legend": {
"behaviour": "showItem",
"displayMode": "hidden"
},
"standardOptions": {
"decimals": 2
},
"thresholds": {
"steps": [
{
"color": "#6C53B1",
"type": "base",
"value": null
}
]
},
"tooltip": {
"mode": "all",
"sort": "none"
}
},
"overrides": [
{
"matcher": {
"id": "byFrameRefID"
},
"properties": {
"rightYAxisDisplay": "off"
}
}
],
"targets": [
{
"expr": "irate(redis_keyspace_hits{address=~\"$address\"}[5m])",
"legend": "{{address}} hits",
"maxDataPoints": 240
},
{
"expr": "irate(redis_keyspace_misses{address=~\"$address\"}[5m])",
"legend": "{{address}} misses",
"maxDataPoints": 240
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"type": "timeseries",
"version": "3.0.0"
},
{
"custom": {
"drawStyle": "lines",
"fillOpacity": 0.3,
"gradientMode": "opacity",
"lineInterpolation": "smooth",
"lineWidth": 2,
"scaleDistribution": {
"type": "linear"
},
"spanNulls": false,
"stack": "off"
},
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"id": "3c83cd35-585c-4070-a210-1f17345f13f4",
"layout": {
"h": 5,
"i": "3c83cd35-585c-4070-a210-1f17345f13f4",
"isResizable": true,
"w": 8,
"x": 16,
"y": 5
},
"maxPerRow": 4,
"name": "Top Commands",
"options": {
"legend": {
"behaviour": "showItem",
"displayMode": "hidden"
},
"standardOptions": {
"decimals": 2
},
"thresholds": {
"steps": [
{
"color": "#6C53B1",
"type": "base",
"value": null
}
]
},
"tooltip": {
"mode": "all",
"sort": "desc"
}
},
"overrides": [
{
"matcher": {
"id": "byFrameRefID"
},
"properties": {
"rightYAxisDisplay": "off"
}
}
],
"targets": [
{
"expr": "topk(5, irate(redis_cmdstat_calls{address=~\"$address\"}[1m]))",
"legend": "{{address}} {{command}}",
"maxDataPoints": 240
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"type": "timeseries",
"version": "3.0.0"
},
{
"collapsed": true,
"id": "1ea61073-a46d-4d7c-b072-fcdcbc5ac084",
"layout": {
"h": 1,
"i": "1ea61073-a46d-4d7c-b072-fcdcbc5ac084",
"isResizable": false,
"w": 24,
"x": 0,
"y": 10
},
"name": "Keys",
"type": "row"
},
{
"custom": {
"drawStyle": "lines",
"fillOpacity": 0.3,
"gradientMode": "opacity",
"lineInterpolation": "smooth",
"lineWidth": 2,
"scaleDistribution": {
"type": "linear"
},
"spanNulls": false,
"stack": "off"
},
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"id": "b2b4451c-4f8a-438a-8c48-69c95c68361e",
"layout": {
"h": 5,
"i": "b2b4451c-4f8a-438a-8c48-69c95c68361e",
"isResizable": true,
"w": 8,
"x": 0,
"y": 11
},
"maxPerRow": 4,
"name": "Total Items per DB",
"options": {
"legend": {
"behaviour": "showItem",
"displayMode": "hidden"
},
"standardOptions": {
"decimals": 2
},
"thresholds": {
"steps": [
{
"color": "#6C53B1",
"type": "base",
"value": null
}
]
},
"tooltip": {
"mode": "all",
"sort": "desc"
}
},
"overrides": [
{
"matcher": {
"id": "byFrameRefID"
},
"properties": {
"rightYAxisDisplay": "off"
}
}
],
"targets": [
{
"expr": "sum(redis_keyspace_keys{address=~\"$address\"}) by (address, db)",
"legend": "{{address}} {{db}}",
"maxDataPoints": 240
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"type": "timeseries",
"version": "3.0.0"
},
{
"custom": {
"drawStyle": "lines",
"fillOpacity": 0.3,
"gradientMode": "opacity",
"lineInterpolation": "smooth",
"lineWidth": 2,
"scaleDistribution": {
"type": "linear"
},
"spanNulls": false,
"stack": "off"
},
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"id": "894b9beb-e764-441c-ae04-13e5dbbb901d",
"layout": {
"h": 5,
"i": "894b9beb-e764-441c-ae04-13e5dbbb901d",
"isResizable": true,
"w": 8,
"x": 8,
"y": 11
},
"maxPerRow": 4,
"name": "Expired / Evicted",
"options": {
"legend": {
"behaviour": "showItem",
"displayMode": "hidden"
},
"standardOptions": {
"decimals": 2
},
"thresholds": {
"steps": [
{
"color": "#6C53B1",
"type": "base",
"value": null
}
]
},
"tooltip": {
"mode": "all",
"sort": "desc"
}
},
"overrides": [
{
"matcher": {
"id": "byFrameRefID"
},
"properties": {
"rightYAxisDisplay": "off"
}
}
],
"targets": [
{
"expr": "sum(rate(redis_expired_keys{address=~\"$address\"}[5m])) by (address)",
"legend": "{{address}} expired",
"maxDataPoints": 240
},
{
"expr": "sum(rate(redis_evicted_keys{address=~\"$address\"}[5m])) by (address)",
"legend": "{{address}} evicted",
"maxDataPoints": 240
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"type": "timeseries",
"version": "3.0.0"
},
{
"custom": {
"drawStyle": "lines",
"fillOpacity": 0.3,
"gradientMode": "opacity",
"lineInterpolation": "smooth",
"lineWidth": 2,
"scaleDistribution": {
"type": "linear"
},
"spanNulls": false,
"stack": "noraml"
},
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"id": "f721a641-28c7-4e82-a37c-ec17704a0c57",
"layout": {
"h": 5,
"i": "f721a641-28c7-4e82-a37c-ec17704a0c57",
"isResizable": true,
"w": 8,
"x": 16,
"y": 11
},
"maxPerRow": 4,
"name": "Expiring vs Not-Expiring Keys",
"options": {
"legend": {
"behaviour": "showItem",
"displayMode": "hidden"
},
"standardOptions": {
"decimals": 2
},
"thresholds": {
"steps": [
{
"color": "#6C53B1",
"type": "base",
"value": null
}
]
},
"tooltip": {
"mode": "all",
"sort": "none"
}
},
"overrides": [
{
"matcher": {
"id": "byFrameRefID"
},
"properties": {
"rightYAxisDisplay": "off"
}
}
],
"targets": [
{
"expr": "sum(redis_keyspace_keys{address=~\"$address\"}) - sum(redis_keyspace_expires{address=~\"$address\"}) ",
"legend": "{{address}} not expiring",
"maxDataPoints": 240
},
{
"expr": "sum(redis_keyspace_expires{address=~\"$address\"}) ",
"legend": "{{address}} expiring",
"maxDataPoints": 240
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"type": "timeseries",
"version": "3.0.0"
},
{
"collapsed": true,
"id": "60ff41ed-9d41-40ee-a13b-c968f3ca49d0",
"layout": {
"h": 1,
"i": "60ff41ed-9d41-40ee-a13b-c968f3ca49d0",
"isResizable": false,
"w": 24,
"x": 0,
"y": 16
},
"name": "Network",
"type": "row"
},
{
"custom": {
"drawStyle": "lines",
"fillOpacity": 0.3,
"gradientMode": "opacity",
"lineInterpolation": "smooth",
"lineWidth": 2,
"scaleDistribution": {
"type": "linear"
},
"spanNulls": false,
"stack": "off"
},
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"id": "1841950c-e867-4a62-b846-78754dc0e34d",
"layout": {
"h": 7,
"i": "1841950c-e867-4a62-b846-78754dc0e34d",
"isResizable": true,
"w": 24,
"x": 0,
"y": 17
},
"maxPerRow": 4,
"name": "Network I/O",
"options": {
"legend": {
"behaviour": "showItem",
"displayMode": "hidden"
},
"standardOptions": {
"decimals": 2,
"util": "bytesIEC"
},
"thresholds": {
"steps": [
{
"color": "#6C53B1",
"type": "base",
"value": null
}
]
},
"tooltip": {
"mode": "all",
"sort": "desc"
}
},
"overrides": [
{
"matcher": {
"id": "byFrameRefID"
},
"properties": {
"rightYAxisDisplay": "off"
}
}
],
"targets": [
{
"expr": "sum(rate(redis_total_net_input_bytes{address=~\"$address\"}[5m]))",
"legend": "input",
"maxDataPoints": 240
},
{
"expr": "sum(rate(redis_total_net_output_bytes{address=~\"$address\"}[5m]))",
"legend": "output",
"maxDataPoints": 240
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"type": "timeseries",
"version": "3.0.0"
}
],
"var": [
{
"definition": "prometheus",
"name": "prom",
"type": "datasource"
},
{
"allOption": true,
"datasource": {
"cate": "prometheus",
"value": "${prom}"
},
"definition": "label_values(redis_uptime_in_seconds,address)",
"hide": false,
"multi": true,
"name": "address",
"type": "query"
}
],
"version": "3.0.0"
},
"uuid": 1732008163114399
}

View File

@@ -1,6 +1,6 @@
{
"name": "Redis Overview - categraf",
"tags": "Redis Prometheus",
"name": "Redis by instance",
"tags": "Redis Categraf",
"ident": "",
"configs": {
"panels": [

View File

@@ -60,6 +60,18 @@ func (c *BusiGroupCacheType) GetByBusiGroupId(id int64) *models.BusiGroup {
return c.ugs[id]
}
func (c *BusiGroupCacheType) GetNamesByBusiGroupIds(ids []int64) []string {
c.RLock()
defer c.RUnlock()
names := make([]string, 0, len(ids))
for _, id := range ids {
if ug, exists := c.ugs[id]; exists {
names = append(names, ug.Name)
}
}
return names
}
func (c *BusiGroupCacheType) SyncBusiGroups() {
err := c.syncBusiGroups()
if err != nil {
@@ -112,3 +124,14 @@ func (c *BusiGroupCacheType) syncBusiGroups() error {
return nil
}
func (c *BusiGroupCacheType) GetNameByBusiGroupId(id int64) string {
c.RLock()
defer c.RUnlock()
busiGroup := c.ugs[id]
if busiGroup == nil {
return ""
}
return busiGroup.Name
}

150
memsto/config_cval_cache.go Normal file
View File

@@ -0,0 +1,150 @@
package memsto
import (
"encoding/json"
"log"
"sync"
"time"
"github.com/ccfos/nightingale/v6/dumper"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/pkg/errors"
"github.com/toolkits/pkg/logger"
)
type CvalCache struct {
statTotal int64
statLastUpdated int64
ctx *ctx.Context
stats *Stats
mu sync.RWMutex
cvals map[string]string
}
func NewCvalCache(ctx *ctx.Context, stats *Stats) *CvalCache {
cvalCache := &CvalCache{
statTotal: -1,
statLastUpdated: -1,
ctx: ctx,
stats: stats,
cvals: make(map[string]string),
}
cvalCache.initSyncConfigs()
return cvalCache
}
func (c *CvalCache) initSyncConfigs() {
err := c.syncConfigs()
if err != nil {
log.Fatalln("failed to sync configs:", err)
}
go c.loopSyncConfigs()
}
func (c *CvalCache) loopSyncConfigs() {
duration := time.Duration(9000) * time.Millisecond
for {
time.Sleep(duration)
if err := c.syncConfigs(); err != nil {
logger.Warning("failed to sync configs:", err)
}
}
}
func (c *CvalCache) syncConfigs() error {
start := time.Now()
stat, err := models.ConfigCvalStatistics(c.ctx)
if err != nil {
dumper.PutSyncRecord("cvals", start.Unix(), -1, -1, "failed to query statistics: "+err.Error())
return errors.WithMessage(err, "failed to call ConfigCvalStatistics")
}
if !c.statChanged(stat.Total, stat.LastUpdated) {
c.stats.GaugeCronDuration.WithLabelValues("sync_cvals").Set(0)
c.stats.GaugeSyncNumber.WithLabelValues("sync_cvals").Set(0)
dumper.PutSyncRecord("cvals", start.Unix(), -1, -1, "not changed")
return nil
}
cvals, err := models.ConfigsGetAll(c.ctx)
if err != nil {
dumper.PutSyncRecord("cvals", start.Unix(), -1, -1, "failed to query records: "+err.Error())
return errors.WithMessage(err, "failed to call ConfigsGet")
}
c.Set(cvals, stat.Total, stat.LastUpdated)
ms := time.Since(start).Milliseconds()
c.stats.GaugeCronDuration.WithLabelValues("sync_cvals").Set(float64(ms))
c.stats.GaugeSyncNumber.WithLabelValues("sync_cvals").Set(float64(len(c.cvals)))
logger.Infof("timer: sync cvals done, cost: %dms", ms)
dumper.PutSyncRecord("cvals", start.Unix(), ms, len(c.cvals), "success")
return nil
}
func (c *CvalCache) statChanged(total int64, updated int64) bool {
if c.statTotal == total && c.statLastUpdated == updated {
return false
}
return true
}
func (c *CvalCache) Set(cvals []*models.Configs, total int64, updated int64) {
c.mu.Lock()
defer c.mu.Unlock()
c.statTotal = total
c.statLastUpdated = updated
for _, cfg := range cvals {
c.cvals[cfg.Ckey] = cfg.Cval
}
}
func (c *CvalCache) Get(ckey string) string {
c.mu.RLock()
defer c.mu.RUnlock()
return c.cvals[ckey]
}
func (c *CvalCache) GetLastUpdateTime() int64 {
c.mu.RLock()
defer c.mu.RUnlock()
return c.statLastUpdated
}
type SiteInfo struct {
PrintBodyPaths []string `json:"print_body_paths"`
PrintAccessLog bool `json:"print_access_log"`
}
func (c *CvalCache) GetSiteInfo() *SiteInfo {
c.mu.RLock()
defer c.mu.RUnlock()
si := SiteInfo{}
if siteInfoStr := c.Get("site_info"); siteInfoStr != "" {
if err := json.Unmarshal([]byte(siteInfoStr), &si); err != nil {
logger.Errorf("Failed to unmarshal site info: %v", err)
}
}
return &si
}
func (c *CvalCache) PrintBodyPaths() map[string]struct{} {
printBodyPaths := c.GetSiteInfo().PrintBodyPaths
pbp := make(map[string]struct{}, len(printBodyPaths))
for _, p := range printBodyPaths {
pbp[p] = struct{}{}
}
return pbp
}
func (c *CvalCache) PrintAccessLog() bool {
return c.GetSiteInfo().PrintAccessLog
}

View File

@@ -23,7 +23,9 @@ type DatasourceCacheType struct {
DatasourceFilter func([]*models.Datasource, *models.User) []*models.Datasource
sync.RWMutex
ds map[int64]*models.Datasource // key: id
ds map[int64]*models.Datasource // key: id value: datasource
CateToIDs map[string]map[int64]*models.Datasource // key1: cate key2: id value: datasource
CateToNames map[string]map[string]int64 // key1: cate key2: name value: id
}
func NewDatasourceCache(ctx *ctx.Context, stats *Stats) *DatasourceCacheType {
@@ -33,6 +35,8 @@ func NewDatasourceCache(ctx *ctx.Context, stats *Stats) *DatasourceCacheType {
ctx: ctx,
stats: stats,
ds: make(map[int64]*models.Datasource),
CateToIDs: make(map[string]map[int64]*models.Datasource),
CateToNames: make(map[string]map[string]int64),
DatasourceCheckHook: func(ctx *gin.Context) bool { return false },
DatasourceFilter: func(ds []*models.Datasource, user *models.User) []*models.Datasource { return ds },
}
@@ -40,6 +44,12 @@ func NewDatasourceCache(ctx *ctx.Context, stats *Stats) *DatasourceCacheType {
return ds
}
func (d *DatasourceCacheType) GetIDsByDsCateAndQueries(cate string, datasourceQueries []models.DatasourceQuery) []int64 {
d.Lock()
defer d.Unlock()
return models.GetDatasourceIDsByDatasourceQueries(datasourceQueries, d.CateToIDs[cate], d.CateToNames[cate])
}
func (d *DatasourceCacheType) StatChanged(total, lastUpdated int64) bool {
if d.statTotal == total && d.statLastUpdated == lastUpdated {
return false
@@ -49,8 +59,22 @@ func (d *DatasourceCacheType) StatChanged(total, lastUpdated int64) bool {
}
func (d *DatasourceCacheType) Set(ds map[int64]*models.Datasource, total, lastUpdated int64) {
cateToDs := make(map[string]map[int64]*models.Datasource)
cateToNames := make(map[string]map[string]int64)
for _, datasource := range ds {
if _, exists := cateToDs[datasource.PluginType]; !exists {
cateToDs[datasource.PluginType] = make(map[int64]*models.Datasource)
}
cateToDs[datasource.PluginType][datasource.Id] = datasource
if _, exists := cateToNames[datasource.PluginType]; !exists {
cateToNames[datasource.PluginType] = make(map[string]int64)
}
cateToNames[datasource.PluginType][datasource.Name] = datasource.Id
}
d.Lock()
d.CateToIDs = cateToDs
d.ds = ds
d.CateToNames = cateToNames
d.Unlock()
// only one goroutine used, so no need lock
@@ -99,20 +123,20 @@ func (d *DatasourceCacheType) syncDatasources() error {
return nil
}
m, err := models.DatasourceGetMap(d.ctx)
ds, err := models.DatasourceGetMap(d.ctx)
if err != nil {
dumper.PutSyncRecord("datasources", start.Unix(), -1, -1, "failed to query records: "+err.Error())
return errors.WithMessage(err, "failed to call DatasourceGetMap")
}
d.Set(m, stat.Total, stat.LastUpdated)
d.Set(ds, stat.Total, stat.LastUpdated)
ms := time.Since(start).Milliseconds()
d.stats.GaugeCronDuration.WithLabelValues("sync_datasources").Set(float64(ms))
d.stats.GaugeSyncNumber.WithLabelValues("sync_datasources").Set(float64(len(m)))
d.stats.GaugeSyncNumber.WithLabelValues("sync_datasources").Set(float64(len(ds)))
logger.Infof("timer: sync datasources done, cost: %dms, number: %d", ms, len(m))
dumper.PutSyncRecord("datasources", start.Unix(), ms, len(m), "success")
logger.Infof("timer: sync datasources done, cost: %dms, number: %d", ms, len(ds))
dumper.PutSyncRecord("datasources", start.Unix(), ms, len(ds), "success")
return nil
}

View File

@@ -1,7 +1,9 @@
package memsto
import (
"crypto/tls"
"encoding/json"
"net/http"
"strings"
"sync"
"time"
@@ -19,7 +21,7 @@ import (
type NotifyConfigCacheType struct {
ctx *ctx.Context
ConfigCache *ConfigCache
webhooks []*models.Webhook
webhooks map[string]*models.Webhook
smtp aconf.SMTPConfig
script models.NotifyScript
@@ -47,6 +49,7 @@ func NewNotifyConfigCache(ctx *ctx.Context, configCache *ConfigCache) *NotifyCon
w := &NotifyConfigCacheType{
ctx: ctx,
ConfigCache: configCache,
webhooks: make(map[string]*models.Webhook),
}
w.SyncNotifyConfigs()
return w
@@ -85,11 +88,55 @@ func (w *NotifyConfigCacheType) syncNotifyConfigs() error {
}
if strings.TrimSpace(cval) != "" {
err = json.Unmarshal([]byte(cval), &w.webhooks)
var webhooks []*models.Webhook
err = json.Unmarshal([]byte(cval), &webhooks)
if err != nil {
dumper.PutSyncRecord("webhooks", start.Unix(), -1, -1, "failed to unmarshal configs.webhook: "+err.Error())
logger.Errorf("failed to unmarshal webhooks:%s error:%v", cval, err)
}
newWebhooks := make(map[string]*models.Webhook, len(webhooks))
for i := 0; i < len(webhooks); i++ {
if webhooks[i].Batch == 0 {
webhooks[i].Batch = 1000
}
if webhooks[i].Timeout == 0 {
webhooks[i].Timeout = 10
}
if webhooks[i].RetryCount == 0 {
webhooks[i].RetryCount = 10
}
if webhooks[i].RetryInterval == 0 {
webhooks[i].RetryInterval = 10
}
if webhooks[i].Client == nil {
webhooks[i].Client = &http.Client{
Timeout: time.Second * time.Duration(webhooks[i].Timeout),
Transport: &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: webhooks[i].SkipVerify},
},
}
}
newWebhooks[webhooks[i].Url] = webhooks[i]
}
for url, wh := range newWebhooks {
if oldWh, has := w.webhooks[url]; has && oldWh.Hash() != wh.Hash() {
w.webhooks[url] = wh
} else {
w.webhooks[url] = wh
}
}
for url := range w.webhooks {
if _, has := newWebhooks[url]; !has {
delete(w.webhooks, url)
}
}
}
dumper.PutSyncRecord("webhooks", start.Unix(), time.Since(start).Milliseconds(), len(w.webhooks), "success, webhooks:\n"+cval)
@@ -133,7 +180,7 @@ func (w *NotifyConfigCacheType) syncNotifyConfigs() error {
return nil
}
func (w *NotifyConfigCacheType) GetWebhooks() []*models.Webhook {
func (w *NotifyConfigCacheType) GetWebhooks() map[string]*models.Webhook {
w.RWMutex.RLock()
defer w.RWMutex.RUnlock()
return w.webhooks

View File

@@ -12,62 +12,71 @@ import (
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/poster"
"github.com/ccfos/nightingale/v6/pkg/tplx"
"github.com/ccfos/nightingale/v6/pkg/unit"
"github.com/toolkits/pkg/logger"
)
type AlertCurEvent struct {
Id int64 `json:"id" gorm:"primaryKey"`
Cate string `json:"cate"`
Cluster string `json:"cluster"`
DatasourceId int64 `json:"datasource_id"`
GroupId int64 `json:"group_id"` // busi group id
GroupName string `json:"group_name"` // busi group name
Hash string `json:"hash"` // rule_id + vector_key
RuleId int64 `json:"rule_id"`
RuleName string `json:"rule_name"`
RuleNote string `json:"rule_note"`
RuleProd string `json:"rule_prod"`
RuleAlgo string `json:"rule_algo"`
Severity int `json:"severity"`
PromForDuration int `json:"prom_for_duration"`
PromQl string `json:"prom_ql"`
RuleConfig string `json:"-" gorm:"rule_config"` // rule config
RuleConfigJson interface{} `json:"rule_config" gorm:"-"` // rule config for fe
PromEvalInterval int `json:"prom_eval_interval"`
Callbacks string `json:"-"` // for db
CallbacksJSON []string `json:"callbacks" gorm:"-"` // for fe
RunbookUrl string `json:"runbook_url"`
NotifyRecovered int `json:"notify_recovered"`
NotifyChannels string `json:"-"` // for db
NotifyChannelsJSON []string `json:"notify_channels" gorm:"-"` // for fe
NotifyGroups string `json:"-"` // for db
NotifyGroupsJSON []string `json:"notify_groups" gorm:"-"` // for fe
NotifyGroupsObj []*UserGroup `json:"notify_groups_obj" gorm:"-"` // for fe
TargetIdent string `json:"target_ident"`
TargetNote string `json:"target_note"`
TriggerTime int64 `json:"trigger_time"`
TriggerValue string `json:"trigger_value"`
TriggerValues string `json:"trigger_values" gorm:"-"`
Tags string `json:"-"` // for db
TagsJSON []string `json:"tags" gorm:"-"` // for fe
TagsMap map[string]string `json:"tags_map" gorm:"-"` // for internal usage
OriginalTags string `json:"-"` // for db
OriginalTagsJSON []string `json:"original_tags" gorm:"-"` // for fe
Annotations string `json:"-"` //
AnnotationsJSON map[string]string `json:"annotations" gorm:"-"` // for fe
IsRecovered bool `json:"is_recovered" gorm:"-"` // for notify.py
NotifyUsersObj []*User `json:"notify_users_obj" gorm:"-"` // for notify.py
LastEvalTime int64 `json:"last_eval_time" gorm:"-"` // for notify.py 上次计算的时间
LastSentTime int64 `json:"last_sent_time" gorm:"-"` // 上次发送时间
NotifyCurNumber int `json:"notify_cur_number"` // notify: current number
FirstTriggerTime int64 `json:"first_trigger_time"` // 连续告警的首次告警时间
ExtraConfig interface{} `json:"extra_config" gorm:"-"`
Status int `json:"status" gorm:"-"`
Claimant string `json:"claimant" gorm:"-"`
SubRuleId int64 `json:"sub_rule_id" gorm:"-"`
ExtraInfo []string `json:"extra_info" gorm:"-"`
Target *Target `json:"target" gorm:"-"`
Id int64 `json:"id" gorm:"primaryKey"`
Cate string `json:"cate"`
Cluster string `json:"cluster"`
DatasourceId int64 `json:"datasource_id"`
GroupId int64 `json:"group_id"` // busi group id
GroupName string `json:"group_name"` // busi group name
Hash string `json:"hash"` // rule_id + vector_key
RuleId int64 `json:"rule_id"`
RuleName string `json:"rule_name"`
RuleNote string `json:"rule_note"`
RuleProd string `json:"rule_prod"`
RuleAlgo string `json:"rule_algo"`
Severity int `json:"severity"`
PromForDuration int `json:"prom_for_duration"`
PromQl string `json:"prom_ql"`
RuleConfig string `json:"-" gorm:"rule_config"` // rule config
RuleConfigJson interface{} `json:"rule_config" gorm:"-"` // rule config for fe
PromEvalInterval int `json:"prom_eval_interval"`
Callbacks string `json:"-"` // for db
CallbacksJSON []string `json:"callbacks" gorm:"-"` // for fe
RunbookUrl string `json:"runbook_url"`
NotifyRecovered int `json:"notify_recovered"`
NotifyChannels string `json:"-"` // for db
NotifyChannelsJSON []string `json:"notify_channels" gorm:"-"` // for fe
NotifyGroups string `json:"-"` // for db
NotifyGroupsJSON []string `json:"notify_groups" gorm:"-"` // for fe
NotifyGroupsObj []*UserGroup `json:"notify_groups_obj" gorm:"-"` // for fe
TargetIdent string `json:"target_ident"`
TargetNote string `json:"target_note"`
TriggerTime int64 `json:"trigger_time"`
TriggerValue string `json:"trigger_value"`
TriggerValues string `json:"trigger_values" gorm:"-"`
TriggerValuesJson EventTriggerValues `json:"trigger_values_json" gorm:"-"`
Tags string `json:"-"` // for db
TagsJSON []string `json:"tags" gorm:"-"` // for fe
TagsMap map[string]string `json:"tags_map" gorm:"-"` // for internal usage
OriginalTags string `json:"-"` // for db
OriginalTagsJSON []string `json:"original_tags" gorm:"-"` // for fe
Annotations string `json:"-"` //
AnnotationsJSON map[string]string `json:"annotations" gorm:"-"` // for fe
IsRecovered bool `json:"is_recovered" gorm:"-"` // for notify.py
NotifyUsersObj []*User `json:"notify_users_obj" gorm:"-"` // for notify.py
LastEvalTime int64 `json:"last_eval_time" gorm:"-"` // for notify.py 上次计算的时间
LastSentTime int64 `json:"last_sent_time" gorm:"-"` // 上次发送时间
NotifyCurNumber int `json:"notify_cur_number"` // notify: current number
FirstTriggerTime int64 `json:"first_trigger_time"` // 连续告警的首次告警时间
ExtraConfig interface{} `json:"extra_config" gorm:"-"`
Status int `json:"status" gorm:"-"`
Claimant string `json:"claimant" gorm:"-"`
SubRuleId int64 `json:"sub_rule_id" gorm:"-"`
ExtraInfo []string `json:"extra_info" gorm:"-"`
Target *Target `json:"target" gorm:"-"`
RecoverConfig RecoverConfig `json:"recover_config" gorm:"-"`
RuleHash string `json:"rule_hash" gorm:"-"`
ExtraInfoMap []map[string]string `json:"extra_info_map" gorm:"-"`
}
type EventTriggerValues struct {
ValuesWithUnit map[string]unit.FormattedValue `json:"values_with_unit"`
}
func (e *AlertCurEvent) TableName() string {
@@ -106,8 +115,18 @@ func (e *AlertCurEvent) ParseRule(field string) error {
"{{$value := .TriggerValue}}",
}
templateFuncMapCopy := tplx.NewTemplateFuncMap()
templateFuncMapCopy["query"] = func(promql string, param ...int64) []AnomalyPoint {
datasourceId := e.DatasourceId
if len(param) > 0 {
datasourceId = param[0]
}
value := tplx.Query(datasourceId, promql)
return ConvertAnomalyPoints(value)
}
text := strings.Join(append(defs, f), "")
t, err := template.New(fmt.Sprint(e.RuleId)).Funcs(template.FuncMap(tplx.TemplateFuncMap)).Parse(text)
t, err := template.New(fmt.Sprint(e.RuleId)).Funcs(templateFuncMapCopy).Parse(text)
if err != nil {
e.AnnotationsJSON[k] = fmt.Sprintf("failed to parse annotations: %v", err)
continue
@@ -356,6 +375,15 @@ func (e *AlertCurEvent) DB2Mem() {
}
}
func (e *AlertCurEvent) OverrideGlobalWebhook() bool {
var rc RuleConfig
if err := json.Unmarshal([]byte(e.RuleConfig), &rc); err != nil {
logger.Warningf("failed to unmarshal rule config: %v", err)
return false
}
return rc.OverrideGlobalWebhook
}
func FillRuleConfigTplName(ctx *ctx.Context, ruleConfig string) (interface{}, bool) {
var config RuleConfig
err := json.Unmarshal([]byte(ruleConfig), &config)

View File

@@ -108,7 +108,7 @@ func AlertMuteGet(ctx *ctx.Context, where string, args ...interface{}) (*AlertMu
return lst[0], err
}
func AlertMuteGets(ctx *ctx.Context, prods []string, bgid int64, query string) (lst []AlertMute, err error) {
func AlertMuteGets(ctx *ctx.Context, prods []string, bgid int64, disabled int, query string) (lst []AlertMute, err error) {
session := DB(ctx)
if bgid != -1 {
@@ -119,6 +119,14 @@ func AlertMuteGets(ctx *ctx.Context, prods []string, bgid int64, query string) (
session = session.Where("prod in (?)", prods)
}
if disabled != -1 {
if disabled == 0 {
session = session.Where("disabled = 0")
} else {
session = session.Where("disabled = 1")
}
}
if query != "" {
arr := strings.Fields(query)
for i := 0; i < len(arr); i++ {
@@ -287,16 +295,9 @@ func AlertMuteStatistics(ctx *ctx.Context) (*Statistics, error) {
return s, err
}
// clean expired first
buf := int64(30)
err := DB(ctx).Where("etime < ? and mute_time_type = 0", time.Now().Unix()-buf).Delete(new(AlertMute)).Error
if err != nil {
return nil, err
}
session := DB(ctx).Model(&AlertMute{}).Select("count(*) as total", "max(update_at) as last_updated")
err = session.Find(&stats).Error
err := session.Find(&stats).Error
if err != nil {
return nil, err
}
@@ -308,7 +309,7 @@ func AlertMuteGetsAll(ctx *ctx.Context) ([]*AlertMute, error) {
// get my cluster's mutes
var lst []*AlertMute
if !ctx.IsCenter {
lst, err := poster.GetByUrls[[]*AlertMute](ctx, "/v1/n9e/alert-mutes")
lst, err := poster.GetByUrls[[]*AlertMute](ctx, "/v1/n9e/alert-mutes?disabled=0")
if err != nil {
return nil, err
}
@@ -318,7 +319,7 @@ func AlertMuteGetsAll(ctx *ctx.Context) ([]*AlertMute, error) {
return lst, err
}
session := DB(ctx).Model(&AlertMute{})
session := DB(ctx).Model(&AlertMute{}).Where("disabled = 0")
err := session.Find(&lst).Error
if err != nil {

View File

@@ -13,6 +13,7 @@ import (
"github.com/jinzhu/copier"
"github.com/pkg/errors"
"github.com/tidwall/match"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/str"
)
@@ -23,8 +24,9 @@ const (
HOST = "host"
LOKI = "loki"
PROMETHEUS = "prometheus"
TDENGINE = "tdengine"
PROMETHEUS = "prometheus"
TDENGINE = "tdengine"
ELASTICSEARCH = "elasticsearch"
)
const (
@@ -44,55 +46,56 @@ const (
type AlertRule struct {
Id int64 `json:"id" gorm:"primaryKey"`
GroupId int64 `json:"group_id"` // busi group id
Cate string `json:"cate"` // alert rule cate (prometheus|elasticsearch)
DatasourceIds string `json:"-" gorm:"datasource_ids"` // datasource ids
DatasourceIdsJson []int64 `json:"datasource_ids" gorm:"-"` // for fe
Cluster string `json:"cluster"` // take effect by clusters, seperated by space
Name string `json:"name"` // rule name
Note string `json:"note"` // will sent in notify
Prod string `json:"prod"` // product empty means n9e
Algorithm string `json:"algorithm"` // algorithm (''|holtwinters), empty means threshold
AlgoParams string `json:"-" gorm:"algo_params"` // params algorithm need
AlgoParamsJson interface{} `json:"algo_params" gorm:"-"` // for fe
Delay int `json:"delay"` // Time (in seconds) to delay evaluation
Severity int `json:"severity"` // 1: Emergency 2: Warning 3: Notice
Severities []int `json:"severities" gorm:"-"` // 1: Emergency 2: Warning 3: Notice
Disabled int `json:"disabled"` // 0: enabled, 1: disabled
PromForDuration int `json:"prom_for_duration"` // prometheus for, unit:s
PromQl string `json:"prom_ql"` // just one ql
RuleConfig string `json:"-" gorm:"rule_config"` // rule config
RuleConfigJson interface{} `json:"rule_config" gorm:"-"` // rule config for fe
EventRelabelConfig []*pconf.RelabelConfig `json:"event_relabel_config" gorm:"-"` // event relabel config
PromEvalInterval int `json:"prom_eval_interval"` // unit:s
EnableStime string `json:"-"` // split by space: "00:00 10:00 12:00"
EnableStimeJSON string `json:"enable_stime" gorm:"-"` // for fe
EnableStimesJSON []string `json:"enable_stimes" gorm:"-"` // for fe
EnableEtime string `json:"-"` // split by space: "00:00 10:00 12:00"
EnableEtimeJSON string `json:"enable_etime" gorm:"-"` // for fe
EnableEtimesJSON []string `json:"enable_etimes" gorm:"-"` // for fe
EnableDaysOfWeek string `json:"-"` // eg: "0 1 2 3 4 5 6 ; 0 1 2"
EnableDaysOfWeekJSON []string `json:"enable_days_of_week" gorm:"-"` // for fe
EnableDaysOfWeeksJSON [][]string `json:"enable_days_of_weeks" gorm:"-"` // for fe
EnableInBG int `json:"enable_in_bg"` // 0: global 1: enable one busi-group
NotifyRecovered int `json:"notify_recovered"` // whether notify when recovery
NotifyChannels string `json:"-"` // split by space: sms voice email dingtalk wecom
NotifyChannelsJSON []string `json:"notify_channels" gorm:"-"` // for fe
NotifyGroups string `json:"-"` // split by space: 233 43
NotifyGroupsObj []UserGroup `json:"notify_groups_obj" gorm:"-"` // for fe
NotifyGroupsJSON []string `json:"notify_groups" gorm:"-"` // for fe
NotifyRepeatStep int `json:"notify_repeat_step"` // notify repeat interval, unit: min
NotifyMaxNumber int `json:"notify_max_number"` // notify: max number
RecoverDuration int64 `json:"recover_duration"` // unit: s
Callbacks string `json:"-"` // split by space: http://a.com/api/x http://a.com/api/y'
CallbacksJSON []string `json:"callbacks" gorm:"-"` // for fe
RunbookUrl string `json:"runbook_url"` // sop url
AppendTags string `json:"-"` // split by space: service=n9e mod=api
AppendTagsJSON []string `json:"append_tags" gorm:"-"` // for fe
Annotations string `json:"-"` //
AnnotationsJSON map[string]string `json:"annotations" gorm:"-"` // for fe
ExtraConfig string `json:"-" gorm:"extra_config"` // extra config
ExtraConfigJSON interface{} `json:"extra_config" gorm:"-"` // for fe
GroupId int64 `json:"group_id"` // busi group id
Cate string `json:"cate"` // alert rule cate (prometheus|elasticsearch)
DatasourceIds string `json:"-" gorm:"datasource_ids"`
DatasourceIdsJson []int64 `json:"datasource_ids,omitempty" gorm:"-"` // alert rule list page use this field
DatasourceQueries []DatasourceQuery `json:"datasource_queries" gorm:"datasource_queries;type:text;serializer:json"` // datasource queries
Cluster string `json:"cluster"` // take effect by clusters, seperated by space
Name string `json:"name"` // rule name
Note string `json:"note"` // will sent in notify
Prod string `json:"prod"` // product empty means n9e
Algorithm string `json:"algorithm"` // algorithm (''|holtwinters), empty means threshold
AlgoParams string `json:"-" gorm:"algo_params"` // params algorithm need
AlgoParamsJson interface{} `json:"algo_params" gorm:"-"` // for fe
Delay int `json:"delay"` // Time (in seconds) to delay evaluation
Severity int `json:"severity"` // 1: Emergency 2: Warning 3: Notice
Severities []int `json:"severities" gorm:"-"` // 1: Emergency 2: Warning 3: Notice
Disabled int `json:"disabled"` // 0: enabled, 1: disabled
PromForDuration int `json:"prom_for_duration"` // prometheus for, unit:s
PromQl string `json:"prom_ql"` // just one ql
RuleConfig string `json:"-" gorm:"rule_config"` // rule config
RuleConfigJson interface{} `json:"rule_config" gorm:"-"` // rule config for fe
EventRelabelConfig []*pconf.RelabelConfig `json:"event_relabel_config" gorm:"-"` // event relabel config
PromEvalInterval int `json:"prom_eval_interval"` // unit:s
EnableStime string `json:"-"` // split by space: "00:00 10:00 12:00"
EnableStimeJSON string `json:"enable_stime" gorm:"-"` // for fe
EnableStimesJSON []string `json:"enable_stimes" gorm:"-"` // for fe
EnableEtime string `json:"-"` // split by space: "00:00 10:00 12:00"
EnableEtimeJSON string `json:"enable_etime" gorm:"-"` // for fe
EnableEtimesJSON []string `json:"enable_etimes" gorm:"-"` // for fe
EnableDaysOfWeek string `json:"-"` // eg: "0 1 2 3 4 5 6 ; 0 1 2"
EnableDaysOfWeekJSON []string `json:"enable_days_of_week" gorm:"-"` // for fe
EnableDaysOfWeeksJSON [][]string `json:"enable_days_of_weeks" gorm:"-"` // for fe
EnableInBG int `json:"enable_in_bg"` // 0: global 1: enable one busi-group
NotifyRecovered int `json:"notify_recovered"` // whether notify when recovery
NotifyChannels string `json:"-"` // split by space: sms voice email dingtalk wecom
NotifyChannelsJSON []string `json:"notify_channels" gorm:"-"` // for fe
NotifyGroups string `json:"-"` // split by space: 233 43
NotifyGroupsObj []UserGroup `json:"notify_groups_obj" gorm:"-"` // for fe
NotifyGroupsJSON []string `json:"notify_groups" gorm:"-"` // for fe
NotifyRepeatStep int `json:"notify_repeat_step"` // notify repeat interval, unit: min
NotifyMaxNumber int `json:"notify_max_number"` // notify: max number
RecoverDuration int64 `json:"recover_duration"` // unit: s
Callbacks string `json:"-"` // split by space: http://a.com/api/x http://a.com/api/y'
CallbacksJSON []string `json:"callbacks" gorm:"-"` // for fe
RunbookUrl string `json:"runbook_url"` // sop url
AppendTags string `json:"-"` // split by space: service=n9e mod=api
AppendTagsJSON []string `json:"append_tags" gorm:"-"` // for fe
Annotations string `json:"-"` //
AnnotationsJSON map[string]string `json:"annotations" gorm:"-"` // for fe
ExtraConfig string `json:"-" gorm:"extra_config"` // extra config
ExtraConfigJSON interface{} `json:"extra_config" gorm:"-"` // for fe
CreateAt int64 `json:"create_at"`
CreateBy string `json:"create_by"`
UpdateAt int64 `json:"update_at"`
@@ -100,6 +103,29 @@ type AlertRule struct {
UUID int64 `json:"uuid" gorm:"-"` // tpl identifier
CurEventCount int64 `json:"cur_event_count" gorm:"-"`
UpdateByNickname string `json:"update_by_nickname" gorm:"-"` // for fe
CronPattern string `json:"cron_pattern"`
}
type ChildVarConfig struct {
ParamVal []map[string]ParamQuery `json:"param_val"`
ChildVarConfigs *ChildVarConfig `json:"child_var_configs"`
}
type ParamQuery struct {
ParamType string `json:"param_type"` // host、device、enum、threshold 三种类型
Query interface{} `json:"query"`
}
type VarConfig struct {
ParamVal []ParamQueryForFirst `json:"param_val"`
ChildVarConfigs *ChildVarConfig `json:"child_var_configs"`
}
// ParamQueryForFirst 同 ParamQuery仅在第一层出现
type ParamQueryForFirst struct {
Name string `json:"name"`
ParamType string `json:"param_type"`
Query interface{} `json:"query"`
}
type Tpl struct {
@@ -109,15 +135,16 @@ type Tpl struct {
}
type RuleConfig struct {
Version string `json:"version,omitempty"`
EventRelabelConfig []*pconf.RelabelConfig `json:"event_relabel_config,omitempty"`
TaskTpls []*Tpl `json:"task_tpls,omitempty"`
Queries interface{} `json:"queries,omitempty"`
Triggers []Trigger `json:"triggers,omitempty"`
Inhibit bool `json:"inhibit,omitempty"`
PromQl string `json:"prom_ql,omitempty"`
Severity int `json:"severity,omitempty"`
AlgoParams interface{} `json:"algo_params,omitempty"`
Version string `json:"version,omitempty"`
EventRelabelConfig []*pconf.RelabelConfig `json:"event_relabel_config,omitempty"`
TaskTpls []*Tpl `json:"task_tpls,omitempty"`
Queries interface{} `json:"queries,omitempty"`
Triggers []Trigger `json:"triggers,omitempty"`
Inhibit bool `json:"inhibit,omitempty"`
PromQl string `json:"prom_ql,omitempty"`
Severity int `json:"severity,omitempty"`
AlgoParams interface{} `json:"algo_params,omitempty"`
OverrideGlobalWebhook bool `json:"override_global_webhook,omitempty"`
}
type PromRuleConfig struct {
@@ -128,6 +155,19 @@ type PromRuleConfig struct {
AlgoParams interface{} `json:"algo_params"`
}
type RecoverJudge int
const (
Origin RecoverJudge = 0
RecoverWithoutData RecoverJudge = 1
RecoverOnCondition RecoverJudge = 2
)
type RecoverConfig struct {
JudgeType RecoverJudge `json:"judge_type"`
RecoverExp string `json:"recover_exp"`
}
type HostRuleConfig struct {
Queries []HostQuery `json:"queries"`
Triggers []HostTrigger `json:"triggers"`
@@ -135,8 +175,12 @@ type HostRuleConfig struct {
}
type PromQuery struct {
PromQl string `json:"prom_ql"`
Severity int `json:"severity"`
PromQl string `json:"prom_ql"`
Severity int `json:"severity"`
VarEnabled bool `json:"var_enabled"`
VarConfig VarConfig `json:"var_config"`
RecoverConfig RecoverConfig `json:"recover_config"`
Unit string `json:"unit"`
}
type HostTrigger struct {
@@ -159,17 +203,146 @@ type Trigger struct {
Exp string `json:"exp"`
Severity int `json:"severity"`
Type string `json:"type,omitempty"`
Duration int `json:"duration,omitempty"`
Percent int `json:"percent,omitempty"`
Joins []Join `json:"joins"`
Type string `json:"type,omitempty"`
Duration int `json:"duration,omitempty"`
Percent int `json:"percent,omitempty"`
Joins []Join `json:"joins"`
JoinRef string `json:"join_ref"`
RecoverConfig RecoverConfig `json:"recover_config"`
}
type Join struct {
JoinType string `json:"join_type"`
Ref string `json:"ref"`
On []string `json:"on"`
}
var DataSourceQueryAll = DatasourceQuery{
MatchType: 2,
Op: "in",
Values: []interface{}{DatasourceIdAll},
}
type DatasourceQuery struct {
MatchType int `json:"match_type"`
Op string `json:"op"`
Values []interface{} `json:"values"`
}
// GetDatasourceIDsByDatasourceQueries 从 datasourceQueries 中获取 datasourceIDs
// 查询分为精确\模糊匹配,逻辑有 in 与 not in
// idMap 为当前 datasourceQueries 对应的数据源全集
// nameMap 为所有 datasource 的 name 到 id 的映射,用于名称的模糊匹配
func GetDatasourceIDsByDatasourceQueries[T any](datasourceQueries []DatasourceQuery, idMap map[int64]T, nameMap map[string]int64) []int64 {
if len(datasourceQueries) == 0 {
return nil
}
// 所有 query 取交集,初始集合为全集
curIDs := make(map[int64]struct{})
for id, _ := range idMap {
curIDs[id] = struct{}{}
}
for i := range datasourceQueries {
// 每次 query 都在 curIDs 的基础上得到 dsIDs
dsIDs := make(map[int64]struct{})
q := datasourceQueries[i]
if q.MatchType == 0 {
// 精确匹配转为 id 匹配
idValues := make([]int64, 0, len(q.Values))
for v := range q.Values {
var val int64
switch v := q.Values[v].(type) {
case int64:
val = v
case int:
val = int64(v)
case float64:
val = int64(v)
case float32:
val = int64(v)
case int8:
val = int64(v)
case int16:
val = int64(v)
case int32:
val = int64(v)
default:
continue
}
idValues = append(idValues, int64(val))
}
if q.Op == "in" {
if len(idValues) == 1 && idValues[0] == DatasourceIdAll {
for id := range curIDs {
dsIDs[id] = struct{}{}
}
} else {
for idx := range idValues {
if _, exist := curIDs[idValues[idx]]; exist {
dsIDs[idValues[idx]] = struct{}{}
}
}
}
} else if q.Op == "not in" {
for idx := range idValues {
delete(curIDs, idValues[idx])
}
dsIDs = curIDs
}
} else if q.MatchType == 1 {
// 模糊匹配使用 datasource name
if q.Op == "in" {
for dsName, dsID := range nameMap {
if _, exist := curIDs[dsID]; exist {
for idx := range q.Values {
if _, ok := q.Values[idx].(string); !ok {
continue
}
if match.Match(dsName, q.Values[idx].(string)) {
dsIDs[nameMap[dsName]] = struct{}{}
}
}
}
}
} else if q.Op == "not in" {
for dsName, _ := range nameMap {
for idx := range q.Values {
if _, ok := q.Values[idx].(string); !ok {
continue
}
if match.Match(dsName, q.Values[idx].(string)) {
delete(curIDs, nameMap[dsName])
}
}
}
dsIDs = curIDs
}
} else if q.MatchType == 2 {
// 全部数据源
for id := range curIDs {
dsIDs[id] = struct{}{}
}
}
curIDs = dsIDs
if len(curIDs) == 0 {
break
}
}
dsIds := make([]int64, 0, len(curIDs))
for c := range curIDs {
dsIds = append(dsIds, c)
}
return dsIds
}
func GetHostsQuery(queries []HostQuery) []map[string]interface{} {
var query []map[string]interface{}
for _, q := range queries {
@@ -199,12 +372,14 @@ func GetHostsQuery(queries []HostQuery) []map[string]interface{} {
blank += " "
}
} else {
blank := " "
var args []interface{}
var query []string
for _, tag := range lst {
m["tags not like ?"+blank] = "%" + tag + "%"
m["host_tags not like ?"+blank] = "%" + tag + "%"
blank += " "
query = append(query, "tags not like ?",
"(host_tags not like ? or host_tags is null)")
args = append(args, "%"+tag+"%", "%"+tag+"%")
}
m[strings.Join(query, " and ")] = args
}
case "hosts":
lst := []string{}
@@ -225,11 +400,13 @@ func GetHostsQuery(queries []HostQuery) []map[string]interface{} {
blank += " "
}
} else if q.Op == "!~" {
blank := " "
var args []interface{}
var query []string
for _, host := range lst {
m["ident not like ?"+blank] = strings.ReplaceAll(host, "*", "%")
blank += " "
query = append(query, "ident not like ?")
args = append(args, strings.ReplaceAll(host, "*", "%"))
}
m[strings.Join(query, " and ")] = args
}
}
query = append(query, m)
@@ -268,9 +445,9 @@ func (ar *AlertRule) Verify() error {
return fmt.Errorf("GroupId(%d) invalid", ar.GroupId)
}
if IsAllDatasource(ar.DatasourceIdsJson) {
ar.DatasourceIdsJson = []int64{0}
}
//if IsAllDatasource(ar.DatasourceIdsJson) {
// ar.DatasourceIdsJson = []int64{0}
//}
if str.Dangerous(ar.Name) {
return errors.New("Name has invalid characters")
@@ -324,7 +501,7 @@ func (ar *AlertRule) Add(ctx *ctx.Context) error {
return err
}
exists, err := AlertRuleExists(ctx, 0, ar.GroupId, ar.DatasourceIdsJson, ar.Name)
exists, err := AlertRuleExists(ctx, 0, ar.GroupId, ar.Name)
if err != nil {
return err
}
@@ -342,7 +519,7 @@ func (ar *AlertRule) Add(ctx *ctx.Context) error {
func (ar *AlertRule) Update(ctx *ctx.Context, arf AlertRule) error {
if ar.Name != arf.Name {
exists, err := AlertRuleExists(ctx, ar.Id, ar.GroupId, ar.DatasourceIdsJson, arf.Name)
exists, err := AlertRuleExists(ctx, ar.Id, ar.GroupId, arf.Name)
if err != nil {
return err
}
@@ -491,11 +668,30 @@ func (ar *AlertRule) UpdateFieldsMap(ctx *ctx.Context, fields map[string]interfa
return DB(ctx).Model(ar).Updates(fields).Error
}
// for v5 rule
func (ar *AlertRule) FillDatasourceIds() error {
if ar.DatasourceIds != "" {
json.Unmarshal([]byte(ar.DatasourceIds), &ar.DatasourceIdsJson)
return nil
func (ar *AlertRule) FillDatasourceQueries() error {
// 兼容旧逻辑,将 datasourceIds 转换为 datasourceQueries
if len(ar.DatasourceQueries) == 0 && len(ar.DatasourceIds) != 0 {
datasourceQueries := DatasourceQuery{
MatchType: 0,
Op: "in",
Values: make([]interface{}, 0),
}
var values []int
if ar.DatasourceIds != "" {
json.Unmarshal([]byte(ar.DatasourceIds), &values)
}
for i := range values {
if values[i] == 0 {
// 0 表示所有数据源
datasourceQueries.MatchType = 2
break
}
datasourceQueries.Values = append(datasourceQueries.Values, values[i])
}
ar.DatasourceQueries = []DatasourceQuery{datasourceQueries}
}
return nil
}
@@ -614,14 +810,6 @@ func (ar *AlertRule) FE2DB() error {
}
ar.AlgoParams = string(algoParamsByte)
if len(ar.DatasourceIdsJson) > 0 {
idsByte, err := json.Marshal(ar.DatasourceIdsJson)
if err != nil {
return fmt.Errorf("marshal datasource_ids err:%v", err)
}
ar.DatasourceIds = string(idsByte)
}
if ar.RuleConfigJson == nil {
query := PromQuery{
PromQl: ar.PromQl,
@@ -693,8 +881,17 @@ func (ar *AlertRule) DB2FE() error {
json.Unmarshal([]byte(ar.RuleConfig), &ruleConfig)
ar.EventRelabelConfig = ruleConfig.EventRelabelConfig
err := ar.FillDatasourceIds()
return err
// 兼容旧逻辑填充 cron_pattern
if ar.CronPattern == "" && ar.PromEvalInterval != 0 {
ar.CronPattern = fmt.Sprintf("@every %ds", ar.PromEvalInterval)
}
err := ar.FillDatasourceQueries()
if err != nil {
return err
}
return nil
}
func AlertRuleDels(ctx *ctx.Context, ids []int64, bgid ...int64) error {
@@ -708,7 +905,7 @@ func AlertRuleDels(ctx *ctx.Context, ids []int64, bgid ...int64) error {
return ret.Error
}
// 说明确实删掉了,把相关的活跃告警也删了,这些告警永远都不会恢复了,而且策略都没了,说明没关心了
// 说明确实删掉了,把相关的活跃告警也删了,这些告警永远都不会恢复了,而且策略都没了,说明没<EFBFBD><EFBFBD><EFBFBD>关心了
if ret.RowsAffected > 0 {
DB(ctx).Where("rule_id = ?", ids[i]).Delete(new(AlertCurEvent))
}
@@ -717,7 +914,7 @@ func AlertRuleDels(ctx *ctx.Context, ids []int64, bgid ...int64) error {
return nil
}
func AlertRuleExists(ctx *ctx.Context, id, groupId int64, datasourceIds []int64, name string) (bool, error) {
func AlertRuleExists(ctx *ctx.Context, id, groupId int64, name string) (bool, error) {
session := DB(ctx).Where("id <> ? and group_id = ? and name = ?", id, groupId, name)
var lst []AlertRule
@@ -729,15 +926,6 @@ func AlertRuleExists(ctx *ctx.Context, id, groupId int64, datasourceIds []int64,
return false, nil
}
// match cluster
for _, r := range lst {
r.FillDatasourceIds()
for _, id := range r.DatasourceIdsJson {
if MatchDatasource(datasourceIds, id) {
return true, nil
}
}
}
return false, nil
}
@@ -957,7 +1145,6 @@ func (ar *AlertRule) UpdateEvent(event *AlertCurEvent) {
event.PromForDuration = ar.PromForDuration
event.RuleConfig = ar.RuleConfig
event.RuleConfigJson = ar.RuleConfigJson
event.PromEvalInterval = ar.PromEvalInterval
event.Callbacks = ar.Callbacks
event.CallbacksJSON = ar.CallbacksJSON
event.RunbookUrl = ar.RunbookUrl
@@ -1109,3 +1296,7 @@ func InsertAlertRule(ctx *ctx.Context, ars []*AlertRule) error {
}
return DB(ctx).Create(ars).Error
}
func (ar *AlertRule) Hash() string {
return str.MD5(fmt.Sprintf("%d_%s_%s", ar.Id, ar.DatasourceIds, ar.RuleConfig))
}

View File

@@ -114,6 +114,11 @@ func (s *AlertSubscribe) Verify() error {
return errors.New("severities is required")
}
if s.UserGroupIds != "" && s.NewChannels == "" {
// 如果指定了用户组,那么新告警的通知渠道必须指定,否则容易出现告警规则中没有指定通知渠道,导致订阅通知时,没有通知渠道
return errors.New("new_channels is required")
}
ugids := strings.Fields(s.UserGroupIds)
for i := 0; i < len(ugids); i++ {
if _, err := strconv.ParseInt(ugids[i], 10, 64); err != nil {

View File

@@ -1,22 +1,25 @@
package common
package models
import (
"fmt"
"math"
"strings"
"github.com/ccfos/nightingale/v6/pkg/unit"
"github.com/prometheus/common/model"
)
type AnomalyPoint struct {
Key string `json:"key"`
Labels model.Metric `json:"labels"`
Timestamp int64 `json:"timestamp"`
Value float64 `json:"value"`
Severity int `json:"severity"`
Triggered bool `json:"triggered"`
Query string `json:"query"`
Values string `json:"values"`
Key string `json:"key"`
Labels model.Metric `json:"labels"`
Timestamp int64 `json:"timestamp"`
Value float64 `json:"value"`
Severity int `json:"severity"`
Triggered bool `json:"triggered"`
Query string `json:"query"`
Values string `json:"values"`
ValuesUnit map[string]unit.FormattedValue `json:"values_unit"`
RecoverConfig RecoverConfig `json:"recover_config"`
}
func NewAnomalyPoint(key string, labels map[string]string, ts int64, value float64, severity int) AnomalyPoint {

View File

@@ -10,9 +10,20 @@ import (
// BuiltinComponent represents a builtin component along with its metadata.
type BuiltinComponent struct {
ID uint64 `json:"id" gorm:"primaryKey;type:bigint;autoIncrement;comment:'unique identifier'"`
Ident string `json:"ident" gorm:"type:varchar(191);not null;uniqueIndex:idx_ident,sort:asc"`
Logo string `json:"logo" gorm:"type:mediumtext;comment:'logo of component'"`
Readme string `json:"readme" gorm:"type:text;not null;comment:'readme of component'"`
CreatedAt int64 `json:"created_at" gorm:"type:bigint;not null;default:0;comment:'create time'"`
CreatedBy string `json:"created_by" gorm:"type:varchar(191);not null;default:'';comment:'creator'"`
UpdatedAt int64 `json:"updated_at" gorm:"type:bigint;not null;default:0;comment:'update time'"`
UpdatedBy string `json:"updated_by" gorm:"type:varchar(191);not null;default:'';comment:'updater'"`
}
type PostgresBuiltinComponent struct {
ID uint64 `json:"id" gorm:"primaryKey;type:bigint;autoIncrement;comment:'unique identifier'"`
Ident string `json:"ident" gorm:"type:varchar(191);not null;uniqueIndex:idx_ident,sort:asc;comment:'identifier of component'"`
Logo string `json:"logo" gorm:"type:varchar(191);not null;comment:'logo of component'"`
Logo string `json:"logo" gorm:"type:text;comment:'logo of component'"`
Readme string `json:"readme" gorm:"type:text;not null;comment:'readme of component'"`
CreatedAt int64 `json:"created_at" gorm:"type:bigint;not null;default:0;comment:'create time'"`
CreatedBy string `json:"created_by" gorm:"type:varchar(191);not null;default:'';comment:'creator'"`

View File

@@ -14,12 +14,12 @@ import (
type BuiltinMetric struct {
ID int64 `json:"id" gorm:"primaryKey;type:bigint;autoIncrement;comment:'unique identifier'"`
UUID int64 `json:"uuid" gorm:"type:bigint;not null;default:0;comment:'uuid'"`
Collector string `json:"collector" gorm:"type:varchar(191);not null;index:idx_collector,sort:asc;comment:'type of collector'"` // Type of collector (e.g., 'categraf', 'telegraf')
Typ string `json:"typ" gorm:"type:varchar(191);not null;index:idx_typ,sort:asc;comment:'type of metric'"` // Type of metric (e.g., 'host', 'mysql', 'redis')
Name string `json:"name" gorm:"type:varchar(191);not null;index:idx_builtinmetric_name,sort:asc;comment:'name of metric'"`
Collector string `json:"collector" gorm:"uniqueIndex:idx_collector_typ_name;type:varchar(191);not null;index:idx_collector,sort:asc;comment:'type of collector'"`
Typ string `json:"typ" gorm:"uniqueIndex:idx_collector_typ_name;type:varchar(191);not null;index:idx_typ,sort:asc;comment:'type of metric'"`
Name string `json:"name" gorm:"uniqueIndex:idx_collector_typ_name;type:varchar(191);not null;index:idx_builtinmetric_name,sort:asc;comment:'name of metric'"`
Unit string `json:"unit" gorm:"type:varchar(191);not null;comment:'unit of metric'"`
Note string `json:"note" gorm:"type:varchar(4096);not null;comment:'description of metric'"`
Lang string `json:"lang" gorm:"type:varchar(191);not null;default:'zh';index:idx_lang,sort:asc;comment:'language'"`
Lang string `json:"lang" gorm:"uniqueIndex:idx_collector_typ_name;type:varchar(191);not null;default:'zh';index:idx_lang,sort:asc;comment:'language'"`
Expression string `json:"expression" gorm:"type:varchar(4096);not null;comment:'expression of metric'"`
CreatedAt int64 `json:"created_at" gorm:"type:bigint;not null;default:0;comment:'create time'"`
CreatedBy string `json:"created_by" gorm:"type:varchar(191);not null;default:'';comment:'creator'"`

View File

@@ -115,68 +115,54 @@ func BusiGroupExists(ctx *ctx.Context, where string, args ...interface{}) (bool,
return num > 0, err
}
var entries = []struct {
entry interface{}
errorMessage string
}{
{
entry: &AlertRule{},
errorMessage: "Some alert rules still in the BusiGroup",
},
{
entry: &AlertMute{},
errorMessage: "Some alert mutes still in the BusiGroup",
},
{
entry: &AlertSubscribe{},
errorMessage: "Some alert subscribes still in the BusiGroup",
},
{
entry: &Target{},
errorMessage: "Some targets still in the BusiGroup",
},
{
entry: &RecordingRule{},
errorMessage: "Some recording rules still in the BusiGroup",
},
{
entry: &TaskTpl{},
errorMessage: "Some recovery scripts still in the BusiGroup",
},
{
entry: &TaskRecord{},
errorMessage: "Some Task Record records still in the BusiGroup",
},
{
entry: &TargetBusiGroup{},
errorMessage: "Some target busigroups still in the BusiGroup",
},
}
func (bg *BusiGroup) Del(ctx *ctx.Context) error {
has, err := Exists(DB(ctx).Model(&AlertMute{}).Where("group_id=?", bg.Id))
if err != nil {
return err
}
for _, e := range entries {
has, err := Exists(DB(ctx).Model(e.entry).Where("group_id=?", bg.Id))
if err != nil {
return err
}
if has {
return errors.New("Some alert mutes still in the BusiGroup")
}
has, err = Exists(DB(ctx).Model(&AlertSubscribe{}).Where("group_id=?", bg.Id))
if err != nil {
return err
}
if has {
return errors.New("Some alert subscribes still in the BusiGroup")
}
has, err = Exists(DB(ctx).Model(&TargetBusiGroup{}).Where("group_id=?", bg.Id))
if err != nil {
return err
}
if has {
return errors.New("Some targets still in the BusiGroup")
}
has, err = Exists(DB(ctx).Model(&Board{}).Where("group_id=?", bg.Id))
if err != nil {
return err
}
if has {
return errors.New("Some dashboards still in the BusiGroup")
}
has, err = Exists(DB(ctx).Model(&TaskTpl{}).Where("group_id=?", bg.Id))
if err != nil {
return err
}
if has {
return errors.New("Some recovery scripts still in the BusiGroup")
}
// hasCR, err := Exists(DB(ctx).Table("collect_rule").Where("group_id=?", bg.Id))
// if err != nil {
// return err
// }
// if hasCR {
// return errors.New("Some collect rules still in the BusiGroup")
// }
has, err = Exists(DB(ctx).Model(&AlertRule{}).Where("group_id=?", bg.Id))
if err != nil {
return err
}
if has {
return errors.New("Some alert rules still in the BusiGroup")
if has {
return errors.New(e.errorMessage)
}
}
return DB(ctx).Transaction(func(tx *gorm.DB) error {

View File

@@ -106,10 +106,8 @@ func InitRSAPassWord(ctx *ctx.Context) (string, error) {
func ConfigsGet(ctx *ctx.Context, ckey string) (string, error) { //select built-in type configs
if !ctx.IsCenter {
if !ctx.IsCenter {
s, err := poster.GetByUrls[string](ctx, "/v1/n9e/config?key="+ckey)
return s, err
}
s, err := poster.GetByUrls[string](ctx, "/v1/n9e/config?key="+ckey)
return s, err
}
var lst []string
@@ -125,6 +123,22 @@ func ConfigsGet(ctx *ctx.Context, ckey string) (string, error) { //select built-
return "", nil
}
func ConfigsGetAll(ctx *ctx.Context) ([]*Configs, error) { // select built-in type configs
if !ctx.IsCenter {
lst, err := poster.GetByUrls[[]*Configs](ctx, "/v1/n9e/all-configs")
return lst, err
}
var lst []*Configs
err := DB(ctx).Model(&Configs{}).Select("ckey, cval").
Where("ckey!='' and external=? ", 0).Find(&lst).Error
if err != nil {
return nil, errors.WithMessage(err, "failed to query configs")
}
return lst, nil
}
func ConfigsSet(ctx *ctx.Context, ckey, cval string) error {
return ConfigsSetWithUname(ctx, ckey, cval, "default")
}
@@ -355,3 +369,19 @@ func ConfigUserVariableGetDecryptMap(context *ctx.Context, privateKey []byte, pa
return ret, nil
}
func ConfigCvalStatistics(context *ctx.Context) (*Statistics, error) {
if !context.IsCenter {
return poster.GetByUrls[*Statistics](context, "/v1/n9e/statistic?name=cval")
}
session := DB(context).Model(&Configs{}).Select("count(*) as total",
"max(update_at) as last_updated").Where("ckey!='' and external=? ", 0) // built-in config
var stats []*Statistics
err := session.Find(&stats).Error
if err != nil {
return nil, err
}
return stats[0], nil
}

View File

@@ -2,7 +2,9 @@ package models
import (
"encoding/json"
"math/rand"
"net/http"
"net/url"
"strings"
"time"
@@ -50,6 +52,7 @@ type HTTP struct {
TLS TLS `json:"tls"`
MaxIdleConnsPerHost int `json:"max_idle_conns_per_host"`
Url string `json:"url"`
Urls []string `json:"urls"`
Headers map[string]string `json:"headers"`
}
@@ -68,6 +71,49 @@ func (h HTTP) IsLoki() bool {
return false
}
func (h HTTP) GetUrls() []string {
var urls []string
if len(h.Urls) == 0 {
urls = []string{h.Url}
} else {
// 复制切片以避免修改原始数据
urls = make([]string, len(h.Urls))
copy(urls, h.Urls)
}
// 使用 Fisher-Yates 洗牌算法随机打乱顺序
for i := len(urls) - 1; i > 0; i-- {
j := rand.Intn(i + 1)
urls[i], urls[j] = urls[j], urls[i]
}
return urls
}
func (h HTTP) NewReq(reqUrl *string) (req *http.Request, err error) {
urls := h.GetUrls()
for i := 0; i < len(urls); i++ {
if req, err = http.NewRequest("GET", urls[i], nil); err == nil {
*reqUrl = urls[i]
return
}
}
return
}
func (h HTTP) ParseUrl() (target *url.URL, err error) {
urls := h.GetUrls()
if len(urls) == 0 {
return nil, errors.New("no urls")
}
target, err = url.Parse(urls[0])
if err != nil {
return nil, err
}
return
}
type TLS struct {
SkipTlsVerify bool `json:"skip_tls_verify"`
}
@@ -300,6 +346,10 @@ func (ds *Datasource) DB2FE() error {
ds.HTTPJson.MaxIdleConnsPerHost = 100
}
if ds.PluginType == ELASTICSEARCH && len(ds.HTTPJson.Urls) == 0 {
ds.HTTPJson.Urls = []string{ds.HTTPJson.Url}
}
if ds.Auth != "" {
err := json.Unmarshal([]byte(ds.Auth), &ds.AuthJson)
if err != nil {
@@ -336,12 +386,12 @@ func DatasourceGetMap(ctx *ctx.Context) (map[int64]*Datasource, error) {
}
}
ret := make(map[int64]*Datasource)
ds := make(map[int64]*Datasource)
for i := 0; i < len(lst); i++ {
ret[lst[i].Id] = lst[i]
ds[lst[i].Id] = lst[i]
}
return ret, nil
return ds, nil
}
func DatasourceStatistics(ctx *ctx.Context) (*Statistics, error) {

View File

@@ -28,7 +28,7 @@ func MigrateIbexTables(db *gorm.DB) {
db = db.Set("gorm:table_options", tableOptions)
}
dts := []interface{}{&imodels.TaskMeta{}, &imodels.TaskScheduler{}, &imodels.TaskSchedulerHealth{}, &imodels.TaskHostDoing{}, &imodels.TaskAction{}}
dts := []interface{}{&imodels.TaskMeta{}, &imodels.TaskScheduler{}, &TaskHostDoing{}, &imodels.TaskAction{}}
for _, dt := range dts {
err := db.AutoMigrate(dt)
if err != nil {
@@ -38,13 +38,22 @@ func MigrateIbexTables(db *gorm.DB) {
for i := 0; i < 100; i++ {
tableName := fmt.Sprintf("task_host_%d", i)
err := db.Table(tableName).AutoMigrate(&imodels.TaskHost{})
if err != nil {
logger.Errorf("failed to migrate table:%s %v", tableName, err)
exists := db.Migrator().HasTable(tableName)
if exists {
continue
} else {
err := db.Table(tableName).AutoMigrate(&imodels.TaskHost{})
if err != nil {
logger.Errorf("failed to migrate table:%s %v", tableName, err)
}
}
}
}
func isPostgres(db *gorm.DB) bool {
dialect := db.Dialector.Name()
return dialect == "postgres"
}
func MigrateTables(db *gorm.DB) error {
var tableOptions string
switch db.Dialector.(type) {
@@ -54,13 +63,22 @@ func MigrateTables(db *gorm.DB) error {
if tableOptions != "" {
db = db.Set("gorm:table_options", tableOptions)
}
dts := []interface{}{&RecordingRule{}, &AlertRule{}, &AlertSubscribe{}, &AlertMute{},
&TaskRecord{}, &ChartShare{}, &Target{}, &Configs{}, &Datasource{}, &NotifyTpl{},
&Board{}, &BoardBusigroup{}, &Users{}, &SsoConfig{}, &models.BuiltinMetric{},
&models.MetricFilter{}, &models.BuiltinComponent{}, &models.NotificaitonRecord{},
&models.MetricFilter{}, &models.NotificaitonRecord{},
&models.TargetBusiGroup{}}
if isPostgres(db) {
dts = append(dts, &models.PostgresBuiltinComponent{})
} else {
dts = append(dts, &models.BuiltinComponent{})
}
if !db.Migrator().HasColumn(&imodels.TaskSchedulerHealth{}, "scheduler") {
dts = append(dts, &imodels.TaskSchedulerHealth{})
}
if !columnHasIndex(db, &AlertHisEvent{}, "original_tags") ||
!columnHasIndex(db, &AlertCurEvent{}, "original_tags") {
asyncDts := []interface{}{&AlertHisEvent{}, &AlertCurEvent{}}
@@ -74,7 +92,7 @@ func MigrateTables(db *gorm.DB) error {
for _, dt := range asyncDts {
if err := db.AutoMigrate(dt); err != nil {
logger.Errorf("failed to migrate table: %v", err)
logger.Errorf("failed to migrate table %+v err:%v", dt, err)
}
}
}()
@@ -170,14 +188,20 @@ func InsertPermPoints(db *gorm.DB) {
})
for _, op := range ops {
exists, err := models.Exists(db.Model(&models.RoleOperation{}).Where("operation = ? and role_name = ?", op.Operation, op.RoleName))
var count int64
err := db.Raw("SELECT COUNT(*) FROM role_operation WHERE operation = ? AND role_name = ?",
op.Operation, op.RoleName).Scan(&count).Error
if err != nil {
logger.Errorf("check role operation exists failed, %v", err)
continue
}
if exists {
if count > 0 {
continue
}
err = db.Create(&op).Error
if err != nil {
logger.Errorf("insert role operation failed, %v", err)
@@ -186,15 +210,17 @@ func InsertPermPoints(db *gorm.DB) {
}
type AlertRule struct {
ExtraConfig string `gorm:"type:text;column:extra_config"` // extra config
ExtraConfig string `gorm:"type:text;column:extra_config"`
CronPattern string `gorm:"type:varchar(64);column:cron_pattern"`
DatasourceQueries []models.DatasourceQuery `gorm:"datasource_queries;type:text;serializer:json"` // datasource queries
}
type AlertSubscribe struct {
ExtraConfig string `gorm:"type:text;column:extra_config"` // extra config
Severities string `gorm:"column:severities;type:varchar(32);not null;default:''"`
BusiGroups ormx.JSONArr `gorm:"column:busi_groups;type:varchar(4096);not null;default:'[]'"`
BusiGroups ormx.JSONArr `gorm:"column:busi_groups;type:varchar(4096)"`
Note string `gorm:"column:note;type:varchar(1024);default:'';comment:note"`
RuleIds []int64 `gorm:"column:rule_ids;type:varchar(1024);default:'';comment:rule_ids"`
RuleIds []int64 `gorm:"column:rule_ids;type:varchar(1024)"`
}
type AlertMute struct {
@@ -203,9 +229,10 @@ type AlertMute struct {
}
type RecordingRule struct {
QueryConfigs string `gorm:"type:text;not null;column:query_configs"` // query_configs
DatasourceIds string `gorm:"column:datasource_ids;type:varchar(255);default:'';comment:datasource ids"`
CronPattern string `gorm:"column:cron_pattern;type:varchar(255);default:'';comment:cron pattern"`
QueryConfigs string `gorm:"type:text;not null;column:query_configs"` // query_configs
DatasourceIds string `gorm:"column:datasource_ids;type:varchar(255);default:'';comment:datasource ids"`
CronPattern string `gorm:"column:cron_pattern;type:varchar(255);default:'';comment:cron pattern"`
DatasourceQueries []models.DatasourceQuery `json:"datasource_queries" gorm:"datasource_queries;type:text;serializer:json"` // datasource queries
}
type AlertingEngines struct {
@@ -280,3 +307,15 @@ type BuiltinPayloads struct {
UUID int64 `json:"uuid" gorm:"type:bigint;not null;index:idx_uuid;comment:'uuid of payload'"`
ComponentID int64 `json:"component_id" gorm:"type:bigint;index:idx_component,sort:asc;not null;default:0;comment:'component_id of payload'"`
}
type TaskHostDoing struct {
Id int64 `gorm:"column:id;index;primaryKey:false"`
Host string `gorm:"column:host;size:128;not null;index"`
Clock int64 `gorm:"column:clock;not null;default:0"`
Action string `gorm:"column:action;size:16;not null"`
AlertTriggered bool `gorm:"-"`
}
func (TaskHostDoing) TableName() string {
return "task_host_doing"
}

View File

@@ -0,0 +1,69 @@
package migrate
import (
"fmt"
"testing"
"github.com/ccfos/nightingale/v6/models"
"gorm.io/driver/mysql"
"gorm.io/gorm"
"gorm.io/gorm/schema"
)
func TestInsertPermPoints(t *testing.T) {
db, err := gorm.Open(mysql.Open("root:1234@tcp(127.0.0.1:3306)/n9e_v6?charset=utf8mb4&parseTime=True&loc=Local&allowNativePasswords=true"), &gorm.Config{NamingStrategy: schema.NamingStrategy{
SingularTable: true,
}})
if err != nil {
fmt.Printf("failed to connect database: %v", err)
}
var ops []models.RoleOperation
ops = append(ops, models.RoleOperation{
RoleName: "Standard",
Operation: "/alert-mutes/put",
})
ops = append(ops, models.RoleOperation{
RoleName: "Standard",
Operation: "/log/index-patterns",
})
ops = append(ops, models.RoleOperation{
RoleName: "Standard",
Operation: "/help/variable-configs",
})
ops = append(ops, models.RoleOperation{
RoleName: "Admin",
Operation: "/permissions",
})
ops = append(ops, models.RoleOperation{
RoleName: "Standard",
Operation: "/ibex-settings",
})
db = db.Debug()
for _, op := range ops {
var count int64
err := db.Raw("SELECT COUNT(*) FROM role_operation WHERE operation = ? AND role_name = ?",
op.Operation, op.RoleName).Scan(&count).Error
fmt.Printf("count: %d\n", count)
if err != nil {
fmt.Printf("check role operation exists failed, %v", err)
continue
}
if count > 0 {
continue
}
err = db.Create(&op).Error
if err != nil {
fmt.Printf("insert role operation failed, %v", err)
}
}
}

Some files were not shown because too many files have changed in this diff Show More