Compare commits

..

15 Commits

Author SHA1 Message Date
ning
15c2eadda6 refactor: optimize webhook send 2024-12-11 16:25:35 +08:00
ning
0aea38e564 refactor: write queue limit 2024-12-10 21:03:55 +08:00
CRISPpp
45e9253b2a feat: add global metric write rate control (#2347) 2024-12-10 20:43:02 +08:00
CRISPpp
9385ca9931 feat: add pre check for deleting busi_group (#2346) 2024-12-09 20:32:46 +08:00
ning
fdd3d14871 docs: change default db type to sqlite 2024-12-06 21:04:10 +08:00
Yening Qin
e890034c19 feat: auto init db (#2345)
Co-authored-by: CRISPpp <78430796+CRISPpp@users.noreply.github.com>
2024-12-06 20:32:17 +08:00
Yening Qin
3aaab9e6ad fix: event prom eval interval (#2343) 2024-12-06 20:24:49 +08:00
CRISPpp
7f7d707cfc fix: role_operation abnormal count (#2338) 2024-12-06 16:31:47 +08:00
Xu Bin
98402e9f8a fix: quotation mark for alert rule var (#2339) 2024-12-06 16:07:47 +08:00
Xu Bin
017094fd78 fix: var support for aggregate function (#2334) 2024-12-06 11:57:51 +08:00
Yening Qin
8b6b896362 feat: redis support miniredis type (#2337)
Co-authored-by: CRISPpp <78430796+CRISPpp@users.noreply.github.com>
2024-12-06 10:46:05 +08:00
ning
acaa00cfb6 refactor: migrate add more log 2024-12-05 17:55:27 +08:00
flashbo
87f3d8595d fix: targets filter logic (#2333) 2024-12-05 14:31:57 +08:00
flashbo
42791a374d feat: targets support sorting by time (#2331) 2024-12-05 14:20:30 +08:00
kongfei605
3855c25805 chore: update dashboards for mongodb (#2332) 2024-12-04 16:19:21 +08:00
22 changed files with 849 additions and 373 deletions

1
.gitignore vendored
View File

@@ -9,6 +9,7 @@
*.o
*.a
*.so
*.db
*.sw[po]
*.tar.gz
*.[568vq]

View File

@@ -293,9 +293,9 @@ func (e *Dispatch) Send(rule *models.AlertRule, event *models.AlertCurEvent, not
// handle global webhooks
if !event.OverrideGlobalWebhook() {
if e.alerting.WebhookBatchSend {
sender.BatchSendWebhooks(e.ctx, notifyTarget.ToWebhookList(), event, e.Astats)
sender.BatchSendWebhooks(e.ctx, notifyTarget.ToWebhookMap(), event, e.Astats)
} else {
sender.SingleSendWebhooks(e.ctx, notifyTarget.ToWebhookList(), event, e.Astats)
sender.SingleSendWebhooks(e.ctx, notifyTarget.ToWebhookMap(), event, e.Astats)
}
}

View File

@@ -76,52 +76,8 @@ func (s *NotifyTarget) ToCallbackList() []string {
return callbacks
}
func (s *NotifyTarget) ToWebhookList() []*models.Webhook {
webhooks := make([]*models.Webhook, 0, len(s.webhooks))
for _, wh := range s.webhooks {
if wh.Batch == 0 {
wh.Batch = 1000
}
if wh.Timeout == 0 {
wh.Timeout = 10
}
if wh.RetryCount == 0 {
wh.RetryCount = 10
}
if wh.RetryInterval == 0 {
wh.RetryInterval = 10
}
webhooks = append(webhooks, wh)
}
return webhooks
}
func (s *NotifyTarget) ToWebhookMap() map[string]*models.Webhook {
webhookMap := make(map[string]*models.Webhook, len(s.webhooks))
for _, wh := range s.webhooks {
if wh.Batch == 0 {
wh.Batch = 1000
}
if wh.Timeout == 0 {
wh.Timeout = 10
}
if wh.RetryCount == 0 {
wh.RetryCount = 10
}
if wh.RetryInterval == 0 {
wh.RetryInterval = 10
}
webhookMap[wh.Url] = wh
}
return webhookMap
return s.webhooks
}
func (s *NotifyTarget) ToUidList() []int64 {

View File

@@ -104,9 +104,17 @@ func NewAlertRuleWorker(rule *models.AlertRule, datasourceId int64, Processor *p
Processor.ScheduleEntry = arw.Scheduler.Entry(entryID)
Processor.PromEvalInterval = getPromEvalInterval(Processor.ScheduleEntry.Schedule)
return arw
}
func getPromEvalInterval(schedule cron.Schedule) int {
now := time.Now()
next1 := schedule.Next(now)
next2 := schedule.Next(next1)
return int(next2.Sub(next1).Seconds())
}
func (arw *AlertRuleWorker) Key() string {
return common.RuleKey(arw.DatasourceId, arw.Rule.Id)
}
@@ -130,7 +138,10 @@ func (arw *AlertRuleWorker) Start() {
}
func (arw *AlertRuleWorker) Eval() {
arw.Processor.EvalStart = time.Now().Unix()
if arw.Processor.PromEvalInterval == 0 {
arw.Processor.PromEvalInterval = getPromEvalInterval(arw.Processor.ScheduleEntry.Schedule)
}
cachedRule := arw.Rule
if cachedRule == nil {
// logger.Errorf("rule_eval:%s Rule not found", arw.Key())
@@ -240,10 +251,15 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
readerClient := arw.PromClients.GetCli(arw.DatasourceId)
if query.VarEnabled {
anomalyPoints := arw.VarFilling(query, readerClient)
for _, v := range anomalyPoints {
lst = append(lst, v)
var anomalyPoints []models.AnomalyPoint
if hasLabelLossAggregator(query) {
// 若有聚合函数则需要先填充变量然后查询,这个方式效率较低
anomalyPoints = arw.VarFillingBeforeQuery(query, readerClient)
} else {
// 先查询再过滤变量,效率较高,但无法处理有聚合函数的情况
anomalyPoints = arw.VarFillingAfterQuery(query, readerClient)
}
lst = append(lst, anomalyPoints...)
} else {
// 无变量
promql := strings.TrimSpace(query.PromQl)
@@ -297,17 +313,18 @@ type sample struct {
Timestamp model.Time
}
// VarFilling 填充变量
// VarFillingAfterQuery 填充变量,先查询再填充变量
// 公式: mem_used_percent{host="$host"} > $val 其中 $host 为参数变量,$val 为值变量
// 实现步骤:
// 广度优先遍历,保证同一参数变量的子筛选可以覆盖上一层筛选
// 每个节点先查询无参数的 query, 即 mem_used_percent > curVal, 得到满足值变量的所有结果
// 依次遍历参数配置节点,保证同一参数变量的子筛选可以覆盖上一层筛选
// 每个节点先查询无参数的 query, 即 mem_used_percent{} > curVal, 得到满足值变量的所有结果
// 结果中有满足本节点参数变量的值,加入异常点列表
// 参数变量的值不满足的组合,需要覆盖上层筛选中产生的异常点
func (arw *AlertRuleWorker) VarFilling(query models.PromQuery, readerClient promsdk.API) map[string]models.AnomalyPoint {
func (arw *AlertRuleWorker) VarFillingAfterQuery(query models.PromQuery, readerClient promsdk.API) []models.AnomalyPoint {
varToLabel := ExtractVarMapping(query.PromQl)
fullQuery := removeVal(query.PromQl)
// 存储所有的异常点key 为参数变量的组合,可以实现子筛选对上一层筛选的覆盖
anomalyPoints := make(map[string]models.AnomalyPoint)
anomalyPointsMap := make(map[string]models.AnomalyPoint)
// 统一变量配置格式
VarConfigForCalc := &models.ChildVarConfig{
ParamVal: make([]map[string]models.ParamQuery, 1),
@@ -369,12 +386,13 @@ func (arw *AlertRuleWorker) VarFilling(query models.PromQuery, readerClient prom
curRealQuery := realQuery
var cur []string
for _, paramKey := range ParamKeys {
val := string(seqVals[i].Metric[model.LabelName(paramKey)])
val := string(seqVals[i].Metric[model.LabelName(varToLabel[paramKey])])
cur = append(cur, val)
curRealQuery = strings.Replace(curRealQuery, fmt.Sprintf("$%s", paramKey), val, -1)
curRealQuery = fillVar(curRealQuery, paramKey, val)
}
if _, ok := paramPermutation[strings.Join(cur, "-")]; ok {
anomalyPoints[strings.Join(cur, "-")] = models.AnomalyPoint{
anomalyPointsMap[strings.Join(cur, "-")] = models.AnomalyPoint{
Key: seqVals[i].Metric.String(),
Timestamp: seqVals[i].Timestamp.Unix(),
Value: float64(seqVals[i].Value),
@@ -389,12 +407,16 @@ func (arw *AlertRuleWorker) VarFilling(query models.PromQuery, readerClient prom
// 剩余的参数组合为本层筛选不产生异常点的组合,需要覆盖上层筛选中产生的异常点
for k, _ := range paramPermutation {
delete(anomalyPoints, k)
delete(anomalyPointsMap, k)
}
}
curNode = curNode.ChildVarConfigs
}
anomalyPoints := make([]models.AnomalyPoint, 0)
for _, point := range anomalyPointsMap {
anomalyPoints = append(anomalyPoints, point)
}
return anomalyPoints
}
@@ -1198,3 +1220,188 @@ func GetQueryRefAndUnit(query interface{}) (string, string, error) {
json.Unmarshal(queryBytes, &queryMap)
return queryMap.Ref, queryMap.Unit, nil
}
// VarFillingBeforeQuery 填充变量,先填充变量再查询,针对有聚合函数的情况
// 公式: avg(mem_used_percent{host="$host"}) > $val 其中 $host 为参数变量,$val 为值变量
// 实现步骤:
// 依次遍历参数配置节点,保证同一参数变量的子筛选可以覆盖上一层筛选
// 每个节点先填充参数再进行查询, 即先得到完整的 promql avg(mem_used_percent{host="127.0.0.1"}) > 5
// 再查询得到满足值变量的所有结果加入异常点列表
// 参数变量的值不满足的组合,需要覆盖上层筛选中产生的异常点
func (arw *AlertRuleWorker) VarFillingBeforeQuery(query models.PromQuery, readerClient promsdk.API) []models.AnomalyPoint {
// 存储异常点的 mapkey 为参数变量的组合,可以实现子筛选对上一层筛选的覆盖
anomalyPointsMap := sync.Map{}
// 统一变量配置格式
VarConfigForCalc := &models.ChildVarConfig{
ParamVal: make([]map[string]models.ParamQuery, 1),
ChildVarConfigs: query.VarConfig.ChildVarConfigs,
}
VarConfigForCalc.ParamVal[0] = make(map[string]models.ParamQuery)
for _, p := range query.VarConfig.ParamVal {
VarConfigForCalc.ParamVal[0][p.Name] = models.ParamQuery{
ParamType: p.ParamType,
Query: p.Query,
}
}
// 使用一个统一的参数变量顺序
var ParamKeys []string
for val, valQuery := range VarConfigForCalc.ParamVal[0] {
if valQuery.ParamType == "threshold" {
continue
}
ParamKeys = append(ParamKeys, val)
}
sort.Slice(ParamKeys, func(i, j int) bool {
return ParamKeys[i] < ParamKeys[j]
})
// 遍历变量配置链表
curNode := VarConfigForCalc
for curNode != nil {
for _, param := range curNode.ParamVal {
curPromql := query.PromQl
// 取出阈值变量
valMap := make(map[string]string)
for val, valQuery := range param {
if valQuery.ParamType == "threshold" {
valMap[val] = getString(valQuery.Query)
}
}
// 替换阈值变量
for key, val := range valMap {
curPromql = strings.Replace(curPromql, fmt.Sprintf("$%s", key), val, -1)
}
// 得到参数变量的所有组合
paramPermutation, err := arw.getParamPermutation(param, ParamKeys)
if err != nil {
logger.Errorf("rule_eval:%s, paramPermutation error:%v", arw.Key(), err)
continue
}
keyToPromql := make(map[string]string)
for paramPermutationKeys, _ := range paramPermutation {
realPromql := curPromql
split := strings.Split(paramPermutationKeys, "-")
for j := range ParamKeys {
realPromql = fillVar(realPromql, ParamKeys[j], split[j])
}
keyToPromql[paramPermutationKeys] = realPromql
}
// 并发查询
wg := sync.WaitGroup{}
semaphore := make(chan struct{}, 200)
for key, promql := range keyToPromql {
wg.Add(1)
semaphore <- struct{}{}
go func(key, promql string) {
defer func() {
<-semaphore
wg.Done()
}()
value, _, err := readerClient.Query(context.Background(), promql, time.Now())
if err != nil {
logger.Errorf("rule_eval:%s, promql:%s, error:%v", arw.Key(), promql, err)
return
}
points := models.ConvertAnomalyPoints(value)
if len(points) == 0 {
anomalyPointsMap.Delete(key)
return
}
for i := 0; i < len(points); i++ {
points[i].Severity = query.Severity
points[i].Query = promql
points[i].ValuesUnit = map[string]unit.FormattedValue{
"v": unit.ValueFormatter(query.Unit, 2, points[i].Value),
}
}
anomalyPointsMap.Store(key, points)
}(key, promql)
}
wg.Wait()
}
curNode = curNode.ChildVarConfigs
}
anomalyPoints := make([]models.AnomalyPoint, 0)
anomalyPointsMap.Range(func(key, value any) bool {
if points, ok := value.([]models.AnomalyPoint); ok {
anomalyPoints = append(anomalyPoints, points...)
}
return true
})
return anomalyPoints
}
// 判断 query 中是否有会导致标签丢失的聚合函数
func hasLabelLossAggregator(query models.PromQuery) bool {
noLabelAggregators := []string{
"sum", "min", "max", "avg",
"stddev", "stdvar",
"count", "quantile",
"group",
}
promql := strings.ToLower(query.PromQl)
for _, fn := range noLabelAggregators {
// 检查是否包含这些聚合函数,需要确保函数名后面跟着左括号
if strings.Contains(promql, fn+"(") {
return true
}
}
return false
}
// ExtractVarMapping 从 promql 中提取变量映射关系,为了在 query 之后可以将标签正确的放回 promql
// 输入: sum(rate(mem_used_percent{host="$my_host"})) by (instance) + avg(node_load1{region="$region"}) > $val
// 输出: map[string]string{"my_host":"host", "region":"region"}
func ExtractVarMapping(promql string) map[string]string {
varMapping := make(map[string]string)
// 遍历所有花括号对
for {
start := strings.Index(promql, "{")
if start == -1 {
break
}
end := strings.Index(promql, "}")
if end == -1 {
break
}
// 提取标签键值对
labels := promql[start+1 : end]
pairs := strings.Split(labels, ",")
for _, pair := range pairs {
// 分割键值对
kv := strings.Split(pair, "=")
if len(kv) != 2 {
continue
}
key := strings.TrimSpace(kv[0])
value := strings.Trim(strings.TrimSpace(kv[1]), "\"")
value = strings.Trim(value, "'")
// 检查值是否为变量(以$开头)
if strings.HasPrefix(value, "$") {
varName := value[1:] // 去掉$前缀
varMapping[varName] = key
}
}
// 继续处理剩余部分
promql = promql[end+1:]
}
return varMapping
}
func fillVar(curRealQuery string, paramKey string, val string) string {
curRealQuery = strings.Replace(curRealQuery, fmt.Sprintf("'$%s'", paramKey), fmt.Sprintf("'%s'", val), -1)
curRealQuery = strings.Replace(curRealQuery, fmt.Sprintf("\"$%s\"", paramKey), fmt.Sprintf("\"%s\"", val), -1)
return curRealQuery
}

View File

@@ -340,7 +340,7 @@ func Test_removeVal(t *testing.T) {
{
name: "removeVal7",
args: args{
promql: "mem{test1=\"test1\",test2=\"test2\",test3=\"$test3\"} > $val",
promql: "mem{test1=\"test1\",test2=\"test2\",test3='$test3'} > $val",
},
want: "mem{test1=\"test1\",test2=\"test2\"} > $val",
},
@@ -361,16 +361,16 @@ func Test_removeVal(t *testing.T) {
{
name: "removeVal10",
args: args{
promql: "mem{test1=\"test1\",test2=\"$test2\"} > $val1 and mem{test3=\"test3\",test4=\"test4\"} > $val2",
promql: "mem{test1=\"test1\",test2='$test2'} > $val1 and mem{test3=\"test3\",test4=\"test4\"} > $val2",
},
want: "mem{test1=\"test1\"} > $val1 and mem{test3=\"test3\",test4=\"test4\"} > $val2",
},
{
name: "removeVal11",
args: args{
promql: "mem{test1=\"test1\",test2=\"test2\"} > $val1 and mem{test3=\"$test3\",test4=\"test4\"} > $val2",
promql: "mem{test1='test1',test2=\"test2\"} > $val1 and mem{test3=\"$test3\",test4=\"test4\"} > $val2",
},
want: "mem{test1=\"test1\",test2=\"test2\"} > $val1 and mem{test4=\"test4\"} > $val2",
want: "mem{test1='test1',test2=\"test2\"} > $val1 and mem{test4=\"test4\"} > $val2",
},
{
name: "removeVal12",
@@ -388,3 +388,71 @@ func Test_removeVal(t *testing.T) {
})
}
}
func TestExtractVarMapping(t *testing.T) {
tests := []struct {
name string
promql string
want map[string]string
}{
{
name: "单个花括号单个变量",
promql: `mem_used_percent{host="$my_host"} > $val`,
want: map[string]string{"my_host": "host"},
},
{
name: "单个花括号多个变量",
promql: `mem_used_percent{host="$my_host",region="$region",env="prod"} > $val`,
want: map[string]string{"my_host": "host", "region": "region"},
},
{
name: "多个花括号多个变量",
promql: `sum(rate(mem_used_percent{host="$my_host"})) by (instance) + avg(node_load1{region="$region"}) > $val`,
want: map[string]string{"my_host": "host", "region": "region"},
},
{
name: "相同变量出现多次",
promql: `sum(rate(mem_used_percent{host="$my_host"})) + avg(node_load1{host="$my_host"}) > $val`,
want: map[string]string{"my_host": "host"},
},
{
name: "没有变量",
promql: `mem_used_percent{host="localhost",region="cn"} > 80`,
want: map[string]string{},
},
{
name: "没有花括号",
promql: `80 > $val`,
want: map[string]string{},
},
{
name: "格式不规范的标签",
promql: `mem_used_percent{host=$my_host,region = $region} > $val`,
want: map[string]string{"my_host": "host", "region": "region"},
},
{
name: "空花括号",
promql: `mem_used_percent{} > $val`,
want: map[string]string{},
},
{
name: "不完整的花括号",
promql: `mem_used_percent{host="$my_host"`,
want: map[string]string{},
},
{
name: "复杂表达式",
promql: `sum(rate(http_requests_total{handler="$handler",code="$code"}[5m])) by (handler) / sum(rate(http_requests_total{handler="$handler"}[5m])) by (handler) * 100 > $threshold`,
want: map[string]string{"handler": "handler", "code": "code"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := ExtractVarMapping(tt.promql)
if !reflect.DeepEqual(got, tt.want) {
t.Errorf("ExtractVarMapping() = %v, want %v", got, tt.want)
}
})
}
}

View File

@@ -80,8 +80,8 @@ type Processor struct {
HandleRecoverEventHook HandleEventFunc
EventMuteHook EventMuteHookFunc
ScheduleEntry cron.Entry
EvalStart int64
ScheduleEntry cron.Entry
PromEvalInterval int
}
func (p *Processor) Key() string {
@@ -424,6 +424,7 @@ func (p *Processor) handleEvent(events []*models.AlertCurEvent) {
p.pendingsUseByRecover.Set(event.Hash, event)
}
event.PromEvalInterval = p.PromEvalInterval
if p.rule.PromForDuration == 0 {
fireEvents = append(fireEvents, event)
if severity > event.Severity {
@@ -442,7 +443,6 @@ func (p *Processor) handleEvent(events []*models.AlertCurEvent) {
preTriggerTime = event.TriggerTime
}
event.PromEvalInterval = int(p.ScheduleEntry.Schedule.Next(time.Unix(p.EvalStart, 0)).Unix() - p.EvalStart)
if event.LastEvalTime-preTriggerTime+int64(event.PromEvalInterval) >= int64(p.rule.PromForDuration) {
fireEvents = append(fireEvents, event)
if severity > event.Severity {

View File

@@ -59,17 +59,21 @@ func sendWebhook(webhook *models.Webhook, event interface{}, stats *astats.Stats
if webhook != nil {
insecureSkipVerify = webhook.SkipVerify
}
client := http.Client{
Timeout: time.Duration(conf.Timeout) * time.Second,
Transport: &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: insecureSkipVerify},
},
if conf.Client == nil {
logger.Warningf("event_%s, event:%s, url: [%s], error: [%s]", channel, string(bs), conf.Url, "client is nil")
conf.Client = &http.Client{
Timeout: time.Duration(conf.Timeout) * time.Second,
Transport: &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: insecureSkipVerify},
},
}
}
stats.AlertNotifyTotal.WithLabelValues(channel).Inc()
var resp *http.Response
var body []byte
resp, err = client.Do(req)
resp, err = conf.Client.Do(req)
if err != nil {
stats.AlertNotifyErrorTotal.WithLabelValues(channel).Inc()
@@ -91,7 +95,7 @@ func sendWebhook(webhook *models.Webhook, event interface{}, stats *astats.Stats
return false, string(body), nil
}
func SingleSendWebhooks(ctx *ctx.Context, webhooks []*models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
func SingleSendWebhooks(ctx *ctx.Context, webhooks map[string]*models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
for _, conf := range webhooks {
retryCount := 0
for retryCount < 3 {
@@ -106,7 +110,7 @@ func SingleSendWebhooks(ctx *ctx.Context, webhooks []*models.Webhook, event *mod
}
}
func BatchSendWebhooks(ctx *ctx.Context, webhooks []*models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
func BatchSendWebhooks(ctx *ctx.Context, webhooks map[string]*models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
for _, conf := range webhooks {
logger.Infof("push event:%+v to queue:%v", event, conf)
PushEvent(ctx, conf, event, stats)

View File

@@ -73,14 +73,14 @@ DefaultRoles = ["Standard"]
OpenRSA = false
[DB]
# mysql postgres sqlite
DBType = "sqlite"
# postgres: host=%s port=%s user=%s dbname=%s password=%s sslmode=%s
# postgres: DSN="host=127.0.0.1 port=5432 user=root dbname=n9e_v6 password=1234 sslmode=disable"
# sqlite: DSN="/path/to/filename.db"
DSN = "root:1234@tcp(127.0.0.1:3306)/n9e_v6?charset=utf8mb4&parseTime=True&loc=Local&allowNativePasswords=true"
DSN = "n9e.db"
# enable debug mode or not
Debug = false
# mysql postgres sqlite
DBType = "mysql"
# unit: s
MaxLifetime = 7200
# max open connections
@@ -98,8 +98,8 @@ Address = "127.0.0.1:6379"
# DB = 0
# UseTLS = false
# TLSMinVersion = "1.2"
# standalone cluster sentinel
RedisType = "standalone"
# standalone cluster sentinel miniredis
RedisType = "miniredis"
# Mastername for sentinel type
# MasterName = "mymaster"
# SentinelUsername = ""
@@ -138,6 +138,9 @@ ForceUseServerTS = true
# [Pushgw.WriterOpt]
# QueueMaxSize = 1000000
# QueuePopSize = 1000
# AllQueueMaxSize = 1000000
# fresh time, unit ms
# AllQueueMaxSizeInterval = 200
[[Pushgw.Writers]]
# Url = "http://127.0.0.1:8480/insert/0/prometheus/api/v1/write"

7
go.mod
View File

@@ -45,10 +45,15 @@ require (
gorm.io/gorm v1.25.7-0.20240204074919-46816ad31dde
)
require github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
require (
github.com/alicebob/gopher-json v0.0.0-20200520072559-a9ecdc9d1d3a // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/yuin/gopher-lua v1.1.1 // indirect
)
require (
github.com/Azure/go-ntlmssp v0.0.0-20220621081337-cb9428e4ac1e // indirect
github.com/alicebob/miniredis/v2 v2.33.0
github.com/beorn7/perks v1.0.1 // indirect
github.com/bytedance/sonic v1.9.1 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect

6
go.sum
View File

@@ -15,6 +15,10 @@ github.com/Masterminds/semver/v3 v3.1.1 h1:hLg3sBzpNErnxhQtUy/mmLR2I9foDujNK030I
github.com/Masterminds/semver/v3 v3.1.1/go.mod h1:VPu/7SZ7ePZ3QOrcuXROw5FAcLl4a0cBrbBpGY/8hQs=
github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137 h1:s6gZFSlWYmbqAuRjVTiNNhvNRfY2Wxp9nhfyel4rklc=
github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137/go.mod h1:OMCwj8VM1Kc9e19TLln2VL61YJF0x1XFtfdL4JdbSyE=
github.com/alicebob/gopher-json v0.0.0-20200520072559-a9ecdc9d1d3a h1:HbKu58rmZpUGpz5+4FfNmIU+FmZg2P3Xaj2v2bfNWmk=
github.com/alicebob/gopher-json v0.0.0-20200520072559-a9ecdc9d1d3a/go.mod h1:SGnFV6hVsYE877CKEZ6tDNTjaSXYUk6QqoIK6PrAtcc=
github.com/alicebob/miniredis/v2 v2.33.0 h1:uvTF0EDeu9RLnUEG27Db5I68ESoIxTiXbNUiji6lZrA=
github.com/alicebob/miniredis/v2 v2.33.0/go.mod h1:MhP4a3EU7aENRi9aO+tHfTBZicLqQevyi/DJpoj6mi0=
github.com/aws/aws-sdk-go v1.44.302 h1:ST3ko6GrJKn3Xi+nAvxjG3uk/V1pW8KC52WLeIxqqNk=
github.com/aws/aws-sdk-go v1.44.302/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
@@ -328,6 +332,8 @@ github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZ
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
github.com/yuin/gopher-lua v1.1.1 h1:kYKnWBjvbNP4XLT3+bPEwAXJx262OhaHDWDVOPjL46M=
github.com/yuin/gopher-lua v1.1.1/go.mod h1:GBR0iDaNXjAgGg9zfCvksxSRnQx76gclCIb7kdAd1Pw=
github.com/zenazn/goji v0.9.0/go.mod h1:7S9M489iMyHBNxwZnk9/EHS098H4/F6TATF2mIxtB1Q=
go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=

View File

@@ -1,5 +1,5 @@
{
"name": "MongoDB by instance",
"name": "MongoDB Overview by exporter",
"tags": "Prometheus MongoDB",
"ident": "",
"configs": {
@@ -9,11 +9,11 @@
"id": "939298f2-b21f-4e2f-9142-c10946cc4032",
"layout": {
"h": 1,
"i": "939298f2-b21f-4e2f-9142-c10946cc4032",
"isResizable": false,
"w": 24,
"x": 0,
"y": 0,
"i": "939298f2-b21f-4e2f-9142-c10946cc4032",
"isResizable": false
"y": 0
},
"name": "Basic Info",
"type": "row"
@@ -32,12 +32,12 @@
"description": "instance count",
"id": "91970d24-3f04-4424-a1ed-73e7d28f5706",
"layout": {
"h": 4,
"h": 7,
"i": "91970d24-3f04-4424-a1ed-73e7d28f5706",
"isResizable": true,
"w": 6,
"x": 0,
"y": 1,
"i": "91970d24-3f04-4424-a1ed-73e7d28f5706",
"isResizable": true
"y": 1
},
"name": "Up",
"options": {
@@ -77,54 +77,39 @@
"version": "2.0.0"
},
{
"type": "stat",
"id": "c7b52e8e-b417-4c61-a15e-e2f186fccd67",
"layout": {
"h": 4,
"w": 6,
"x": 6,
"y": 1,
"i": "c7b52e8e-b417-4c61-a15e-e2f186fccd67",
"isResizable": true
},
"version": "3.0.0",
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"targets": [
{
"expr": "mongodb_ss_uptime{instance=\"$instance\"}",
"refId": "A",
"maxDataPoints": 240
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"name": "Uptime",
"description": "Uptime",
"maxPerRow": 4,
"custom": {
"textMode": "value",
"graphMode": "none",
"colorMode": "value",
"calc": "lastNotNull",
"valueField": "Value",
"colSpan": 1,
"colorMode": "value",
"textMode": "value",
"textSize": {
"title": null
},
"orientation": "auto"
"valueField": "Value"
},
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"description": "Uptime",
"id": "c7b52e8e-b417-4c61-a15e-e2f186fccd67",
"layout": {
"h": 7,
"i": "c7b52e8e-b417-4c61-a15e-e2f186fccd67",
"isResizable": true,
"w": 6,
"x": 6,
"y": 1
},
"name": "Uptime",
"options": {
"standardOptions": {
"util": "humantimeSeconds"
},
"thresholds": {
"steps": [
{
"color": "#634CD9",
"value": null,
"type": "base"
"type": "base",
"value": null
}
]
},
@@ -147,51 +132,34 @@
},
"type": "range"
}
],
"standardOptions": {
"util": "seconds",
"decimals": 2
}
]
},
"overrides": [
"targets": [
{
"matcher": {
"id": "byFrameRefID"
},
"properties": {
"thresholds": {
"steps": [
{
"color": "#6C53B1",
"value": null,
"type": "base"
}
]
},
"standardOptions": {
"decimals": 0
}
}
"expr": "mongodb_ss_uptime{instance=\"$instance\"}",
"refId": "A"
}
]
],
"type": "stat",
"version": "2.0.0"
},
{
"type": "timeseries",
"id": "8446dded-9e11-4ee9-bdad-769b193ddf3e",
"layout": {
"h": 4,
"h": 7,
"i": "8446dded-9e11-4ee9-bdad-769b193ddf3e",
"isResizable": true,
"w": 6,
"x": 12,
"y": 1,
"i": "8446dded-9e11-4ee9-bdad-769b193ddf3e",
"isResizable": true
"y": 1
},
"version": "3.0.0",
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"targets": [
{
"expr": "mongodb_ss_mem_resident * 1024 * 1024",
"expr": "mongodb_ss_mem_resident{instance='$instance'} * 1024 * 1024",
"legend": "{{type}}",
"refId": "A",
"maxDataPoints": 240
@@ -219,8 +187,7 @@
"selectMode": "single"
},
"standardOptions": {
"util": "bytesIEC",
"decimals": 2
"util": "bytesIEC"
},
"thresholds": {
"steps": [
@@ -275,12 +242,12 @@
"description": "Page faults indicate that requests are processed from disk either because an index is missing or there is not enough memory for the data set. Consider increasing memory or sharding out.",
"id": "3eda28e7-2480-4ddc-b346-89ced1c33034",
"layout": {
"h": 4,
"h": 7,
"i": "3eda28e7-2480-4ddc-b346-89ced1c33034",
"isResizable": true,
"w": 6,
"x": 18,
"y": 1,
"i": "3eda28e7-2480-4ddc-b346-89ced1c33034",
"isResizable": true
"y": 1
},
"name": "Page Faults",
"options": {
@@ -333,12 +300,12 @@
"description": "Network traffic (bytes)",
"id": "528d0485-f947-470d-95f3-59eae157ebb6",
"layout": {
"h": 4,
"h": 7,
"i": "528d0485-f947-470d-95f3-59eae157ebb6",
"isResizable": true,
"w": 6,
"x": 0,
"y": 5,
"i": "528d0485-f947-470d-95f3-59eae157ebb6",
"isResizable": true
"y": 8
},
"name": "Network I/O",
"options": {
@@ -395,12 +362,12 @@
"description": "Number of connections Keep in mind the hard limit on the maximum number of connections set by your distribution.",
"id": "067e97c3-4e57-447f-a9dc-a49627b6ce18",
"layout": {
"h": 4,
"h": 7,
"i": "067e97c3-4e57-447f-a9dc-a49627b6ce18",
"isResizable": true,
"w": 6,
"x": 6,
"y": 5,
"i": "067e97c3-4e57-447f-a9dc-a49627b6ce18",
"isResizable": true
"y": 8
},
"name": "Connections",
"options": {
@@ -450,12 +417,12 @@
"description": "Number of assertion errors, Asserts are not important by themselves, but you can correlate spikes with other graphs.",
"id": "9e9b7356-cf0e-4e5f-95f5-00258c576bf4",
"layout": {
"h": 4,
"h": 7,
"i": "9e9b7356-cf0e-4e5f-95f5-00258c576bf4",
"isResizable": true,
"w": 6,
"x": 12,
"y": 5,
"i": "9e9b7356-cf0e-4e5f-95f5-00258c576bf4",
"isResizable": true
"y": 8
},
"name": "Assert Events",
"options": {
@@ -505,12 +472,12 @@
"description": "Number of operations waiting to acquire locks, Any number of queued operations for long periods of time is an indication of possible issues. Find the cause and fix it before requests get stuck in the queue.",
"id": "2698f0f8-a76a-499b-99cf-30504f0f4db6",
"layout": {
"h": 4,
"h": 7,
"i": "2698f0f8-a76a-499b-99cf-30504f0f4db6",
"isResizable": true,
"w": 6,
"x": 18,
"y": 5,
"i": "2698f0f8-a76a-499b-99cf-30504f0f4db6",
"isResizable": true
"y": 8
},
"name": "Lock Queue",
"options": {
@@ -547,11 +514,11 @@
"id": "2bdb8cc9-92f4-449e-8f70-a4c470a21604",
"layout": {
"h": 1,
"i": "2bdb8cc9-92f4-449e-8f70-a4c470a21604",
"isResizable": false,
"w": 24,
"x": 0,
"y": 9,
"i": "2bdb8cc9-92f4-449e-8f70-a4c470a21604",
"isResizable": false
"y": 15
},
"name": "Operation Info",
"type": "row"
@@ -574,12 +541,12 @@
"description": "Number of requests received Shows how many times a command is executed per second on average during the selected interval.",
"id": "c2819508-95e7-4c63-aeae-ce19f92469cd",
"layout": {
"h": 5,
"h": 7,
"i": "c2819508-95e7-4c63-aeae-ce19f92469cd",
"isResizable": true,
"w": 12,
"x": 0,
"y": 10,
"i": "c2819508-95e7-4c63-aeae-ce19f92469cd",
"isResizable": true
"y": 16
},
"name": "Command Operations",
"options": {
@@ -625,12 +592,12 @@
"type": "timeseries",
"id": "7030d97a-d69f-4916-a415-ec57503ab1ed",
"layout": {
"h": 5,
"h": 7,
"i": "7030d97a-d69f-4916-a415-ec57503ab1ed",
"isResizable": true,
"w": 12,
"x": 12,
"y": 10,
"i": "7030d97a-d69f-4916-a415-ec57503ab1ed",
"isResizable": true
"y": 16
},
"version": "3.0.0",
"datasourceCate": "prometheus",
@@ -704,19 +671,19 @@
"type": "timeseries",
"id": "1c3b73d5-c25c-449f-995d-26acc9c621e1",
"layout": {
"h": 5,
"h": 7,
"i": "1c3b73d5-c25c-449f-995d-26acc9c621e1",
"isResizable": true,
"w": 8,
"x": 0,
"y": 15,
"i": "1c3b73d5-c25c-449f-995d-26acc9c621e1",
"isResizable": true
"y": 23
},
"version": "3.0.0",
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"targets": [
{
"expr": "rate(mongodb_ss_opLatencies_latency{}[5m]) / rate(mongodb_ss_opLatencies_latency{}[5m]) / 1000",
"expr": "rate(mongodb_ss_opLatencies_latency{instance='$instance'}[5m]) / rate(mongodb_ss_opLatencies_latency{instance='$instance'}[5m]) / 1000",
"legend": "{{op_type}}",
"refId": "A",
"maxDataPoints": 240
@@ -799,12 +766,12 @@
"description": "",
"id": "e642183c-8ba2-4f60-abc6-c65de49e7577",
"layout": {
"h": 5,
"h": 7,
"i": "e642183c-8ba2-4f60-abc6-c65de49e7577",
"isResizable": true,
"w": 8,
"x": 8,
"y": 15,
"i": "e642183c-8ba2-4f60-abc6-c65de49e7577",
"isResizable": true
"y": 23
},
"name": "Query Efficiency",
"options": {
@@ -861,12 +828,12 @@
"description": "number of cursors Helps identify why connections are increasing. Shows active cursors compared to cursors being automatically killed after 10 minutes due to an application not closing the connection.",
"id": "8b5a4f44-3291-4822-ab73-f56be6c62674",
"layout": {
"h": 5,
"h": 7,
"i": "8b5a4f44-3291-4822-ab73-f56be6c62674",
"isResizable": true,
"w": 8,
"x": 16,
"y": 15,
"i": "8b5a4f44-3291-4822-ab73-f56be6c62674",
"isResizable": true
"y": 23
},
"name": "Cursors",
"options": {
@@ -903,11 +870,11 @@
"id": "06946b19-94b4-4f72-bd87-70f87989257d",
"layout": {
"h": 1,
"i": "06946b19-94b4-4f72-bd87-70f87989257d",
"isResizable": false,
"w": 24,
"x": 0,
"y": 20,
"i": "06946b19-94b4-4f72-bd87-70f87989257d",
"isResizable": false
"y": 30
},
"name": "Cache Info",
"panels": [],
@@ -917,19 +884,19 @@
"type": "timeseries",
"id": "bb0ae571-43a1-430b-8f63-256f6f1ebee6",
"layout": {
"h": 5,
"h": 7,
"i": "bb0ae571-43a1-430b-8f63-256f6f1ebee6",
"isResizable": true,
"w": 6,
"x": 0,
"y": 21,
"i": "bb0ae571-43a1-430b-8f63-256f6f1ebee6",
"isResizable": true
"y": 31
},
"version": "3.0.0",
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"targets": [
{
"expr": "mongodb_ss_wt_cache_bytes_currently_in_the_cache{}",
"expr": "mongodb_ss_wt_cache_bytes_currently_in_the_cache{instance='$instance'}",
"legend": "total",
"refId": "A",
"maxDataPoints": 240
@@ -975,8 +942,7 @@
"selectMode": "single"
},
"standardOptions": {
"util": "bytesIEC",
"decimals": 2
"util": "bytesIEC"
},
"thresholds": {
"steps": [
@@ -1017,19 +983,19 @@
"type": "timeseries",
"id": "f1ffd169-2a1a-42bc-9647-0e6621be0fef",
"layout": {
"h": 5,
"h": 7,
"i": "f1ffd169-2a1a-42bc-9647-0e6621be0fef",
"isResizable": true,
"w": 6,
"x": 6,
"y": 21,
"i": "f1ffd169-2a1a-42bc-9647-0e6621be0fef",
"isResizable": true
"y": 31
},
"version": "3.0.0",
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"targets": [
{
"expr": "rate(mongodb_ss_wt_cache_bytes_read_into_cache{}[5m])",
"expr": "rate(mongodb_ss_wt_cache_bytes_read_into_cache{instance='$instance'}[5m])",
"legend": "read",
"refId": "A",
"maxDataPoints": 240
@@ -1104,19 +1070,19 @@
"type": "timeseries",
"id": "43ee140d-ae6d-474a-9892-fa4743d7f97e",
"layout": {
"h": 5,
"h": 7,
"i": "43ee140d-ae6d-474a-9892-fa4743d7f97e",
"isResizable": true,
"w": 6,
"x": 12,
"y": 21,
"i": "43ee140d-ae6d-474a-9892-fa4743d7f97e",
"isResizable": true
"y": 31
},
"version": "3.0.0",
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"targets": [
{
"expr": "100 * sum(mongodb_ss_wt_cache_tracked_dirty_pages_in_the_cache{}) / sum(mongodb_ss_wt_cache_pages_currently_held_in_the_cache{})",
"expr": "100 * sum(mongodb_ss_wt_cache_tracked_dirty_pages_in_the_cache{instance='$instance'}) / sum(mongodb_ss_wt_cache_pages_currently_held_in_the_cache{instance='$instance'})",
"legend": "dirty rate",
"refId": "A",
"maxDataPoints": 240
@@ -1185,19 +1151,19 @@
"type": "timeseries",
"id": "1a22c31a-859a-400c-af2a-ae83c308d0f2",
"layout": {
"h": 5,
"h": 7,
"i": "1a22c31a-859a-400c-af2a-ae83c308d0f2",
"isResizable": true,
"w": 6,
"x": 18,
"y": 21,
"i": "1a22c31a-859a-400c-af2a-ae83c308d0f2",
"isResizable": true
"y": 31
},
"version": "3.0.0",
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"targets": [
{
"expr": "rate(mongodb_mongod_wiredtiger_cache_evicted_total{}[5m])",
"expr": "rate(mongodb_mongod_wiredtiger_cache_evicted_total{instance='$instance'}[5m])",
"legend": "evicted pages",
"refId": "A",
"maxDataPoints": 240
@@ -1265,95 +1231,125 @@
"id": "b0016f4a-c565-4276-a08d-bacdf94b6b5a",
"layout": {
"h": 1,
"i": "b0016f4a-c565-4276-a08d-bacdf94b6b5a",
"isResizable": false,
"w": 24,
"x": 0,
"y": 26,
"i": "b0016f4a-c565-4276-a08d-bacdf94b6b5a",
"isResizable": false
"y": 45
},
"name": "ReplSet Info",
"type": "row"
},
{
"type": "timeseries",
"id": "f73fd0cd-ecbe-41f0-a2dc-4e02f7eaef1c",
"layout": {
"h": 5,
"w": 12,
"x": 0,
"y": 27,
"i": "f73fd0cd-ecbe-41f0-a2dc-4e02f7eaef1c",
"isResizable": true
"custom": {
"calc": "lastNotNull",
"colSpan": 1,
"colorMode": "value",
"textMode": "value",
"textSize": {},
"valueField": "Value"
},
"version": "3.0.0",
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"targets": [
{
"expr": "mongodb_mongod_replset_member_replication_lag{instance=\"$instance\"}",
"legend": "",
"refId": "A",
"maxDataPoints": 240
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"name": "Replset Lag Seconds",
"description": "replica set member master-slave synchronization delay",
"maxPerRow": 4,
"description": "",
"id": "6187ceee-7c25-43f2-be1b-c44ad612ab52",
"layout": {
"h": 7,
"i": "6187ceee-7c25-43f2-be1b-c44ad612ab52",
"isResizable": true,
"w": 12,
"x": 0,
"y": 46
},
"name": "Replset Election",
"options": {
"tooltip": {
"mode": "all",
"sort": "none"
},
"legend": {
"displayMode": "hidden",
"heightInPercentage": 30,
"placement": "bottom",
"behaviour": "showItem",
"selectMode": "single"
},
"standardOptions": {
"decimals": 1,
"util": "seconds"
},
"thresholds": {
"steps": [
{
"color": "#6C53B1",
"value": null,
"type": "base"
"color": "#634CD9",
"type": "base",
"value": null
}
]
}
},
"valueMappings": [
{
"match": {
"to": 1800
},
"result": {
"color": "#f24526"
},
"type": "range"
},
{
"match": {
"from": 1800
},
"result": {
"color": "#53b503"
},
"type": "range"
}
]
},
"targets": [
{
"expr": "time() - mongodb_mongod_replset_member_election_date",
"refId": "A"
}
],
"type": "stat",
"version": "2.0.0"
},
{
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"spanNulls": false,
"lineWidth": 2,
"fillOpacity": 0.3,
"gradientMode": "opacity",
"stack": "off",
"scaleDistribution": {
"type": "linear"
},
"showPoints": "none",
"pointSize": 5
"lineInterpolation": "smooth",
"lineWidth": 2,
"stack": "off"
},
"overrides": [
{
"matcher": {
"id": "byFrameRefID"
},
"properties": {
"rightYAxisDisplay": "off"
}
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"description": "replica set member master-slave synchronization delay",
"id": "f73fd0cd-ecbe-41f0-a2dc-4e02f7eaef1c",
"layout": {
"h": 7,
"i": "f73fd0cd-ecbe-41f0-a2dc-4e02f7eaef1c",
"isResizable": true,
"w": 12,
"x": 12,
"y": 46
},
"name": "Replset Lag Seconds",
"options": {
"legend": {
"displayMode": "hidden"
},
"standardOptions": {
"util": "seconds"
},
"thresholds": {},
"tooltip": {
"mode": "all",
"sort": "none"
}
]
},
"targets": [
{
"expr": "mongodb_mongod_replset_member_replication_lag{instance=\"$instance\"}",
"legend": "lag",
"refId": "A"
}
],
"type": "timeseries",
"version": "2.0.0"
}
],
"var": [
@@ -1375,4 +1371,4 @@
"version": "3.0.0"
},
"uuid": 1717556328065329000
}
}

View File

@@ -1,7 +1,9 @@
package memsto
import (
"crypto/tls"
"encoding/json"
"net/http"
"strings"
"sync"
"time"
@@ -19,7 +21,7 @@ import (
type NotifyConfigCacheType struct {
ctx *ctx.Context
ConfigCache *ConfigCache
webhooks []*models.Webhook
webhooks map[string]*models.Webhook
smtp aconf.SMTPConfig
script models.NotifyScript
@@ -47,6 +49,7 @@ func NewNotifyConfigCache(ctx *ctx.Context, configCache *ConfigCache) *NotifyCon
w := &NotifyConfigCacheType{
ctx: ctx,
ConfigCache: configCache,
webhooks: make(map[string]*models.Webhook),
}
w.SyncNotifyConfigs()
return w
@@ -85,11 +88,55 @@ func (w *NotifyConfigCacheType) syncNotifyConfigs() error {
}
if strings.TrimSpace(cval) != "" {
err = json.Unmarshal([]byte(cval), &w.webhooks)
var webhooks []*models.Webhook
err = json.Unmarshal([]byte(cval), &webhooks)
if err != nil {
dumper.PutSyncRecord("webhooks", start.Unix(), -1, -1, "failed to unmarshal configs.webhook: "+err.Error())
logger.Errorf("failed to unmarshal webhooks:%s error:%v", cval, err)
}
newWebhooks := make(map[string]*models.Webhook, len(webhooks))
for i := 0; i < len(webhooks); i++ {
if webhooks[i].Batch == 0 {
webhooks[i].Batch = 1000
}
if webhooks[i].Timeout == 0 {
webhooks[i].Timeout = 10
}
if webhooks[i].RetryCount == 0 {
webhooks[i].RetryCount = 10
}
if webhooks[i].RetryInterval == 0 {
webhooks[i].RetryInterval = 10
}
if webhooks[i].Client == nil {
webhooks[i].Client = &http.Client{
Timeout: time.Second * time.Duration(webhooks[i].Timeout),
Transport: &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: webhooks[i].SkipVerify},
},
}
}
newWebhooks[webhooks[i].Url] = webhooks[i]
}
for url, wh := range newWebhooks {
if oldWh, has := w.webhooks[url]; has && oldWh.Hash() != wh.Hash() {
w.webhooks[url] = wh
} else {
w.webhooks[url] = wh
}
}
for url := range w.webhooks {
if _, has := newWebhooks[url]; !has {
delete(w.webhooks, url)
}
}
}
dumper.PutSyncRecord("webhooks", start.Unix(), time.Since(start).Milliseconds(), len(w.webhooks), "success, webhooks:\n"+cval)
@@ -133,7 +180,7 @@ func (w *NotifyConfigCacheType) syncNotifyConfigs() error {
return nil
}
func (w *NotifyConfigCacheType) GetWebhooks() []*models.Webhook {
func (w *NotifyConfigCacheType) GetWebhooks() map[string]*models.Webhook {
w.RWMutex.RLock()
defer w.RWMutex.RUnlock()
return w.webhooks

View File

@@ -372,12 +372,14 @@ func GetHostsQuery(queries []HostQuery) []map[string]interface{} {
blank += " "
}
} else {
blank := " "
var args []interface{}
var query []string
for _, tag := range lst {
m["tags not like ?"+blank] = "%" + tag + "%"
m["host_tags not like ?"+blank] = "%" + tag + "%"
blank += " "
query = append(query, "tags not like ?",
"(host_tags not like ? or host_tags is null)")
args = append(args, "%"+tag+"%", "%"+tag+"%")
}
m[strings.Join(query, " and ")] = args
}
case "hosts":
lst := []string{}
@@ -398,11 +400,13 @@ func GetHostsQuery(queries []HostQuery) []map[string]interface{} {
blank += " "
}
} else if q.Op == "!~" {
blank := " "
var args []interface{}
var query []string
for _, host := range lst {
m["ident not like ?"+blank] = strings.ReplaceAll(host, "*", "%")
blank += " "
query = append(query, "ident not like ?")
args = append(args, strings.ReplaceAll(host, "*", "%"))
}
m[strings.Join(query, " and ")] = args
}
}
query = append(query, m)

View File

@@ -115,68 +115,54 @@ func BusiGroupExists(ctx *ctx.Context, where string, args ...interface{}) (bool,
return num > 0, err
}
var entries = []struct {
entry interface{}
errorMessage string
}{
{
entry: &AlertRule{},
errorMessage: "Some alert rules still in the BusiGroup",
},
{
entry: &AlertMute{},
errorMessage: "Some alert mutes still in the BusiGroup",
},
{
entry: &AlertSubscribe{},
errorMessage: "Some alert subscribes still in the BusiGroup",
},
{
entry: &Target{},
errorMessage: "Some targets still in the BusiGroup",
},
{
entry: &RecordingRule{},
errorMessage: "Some recording rules still in the BusiGroup",
},
{
entry: &TaskTpl{},
errorMessage: "Some recovery scripts still in the BusiGroup",
},
{
entry: &TaskRecord{},
errorMessage: "Some Task Record records still in the BusiGroup",
},
{
entry: &TargetBusiGroup{},
errorMessage: "Some target busigroups still in the BusiGroup",
},
}
func (bg *BusiGroup) Del(ctx *ctx.Context) error {
has, err := Exists(DB(ctx).Model(&AlertMute{}).Where("group_id=?", bg.Id))
if err != nil {
return err
}
for _, e := range entries {
has, err := Exists(DB(ctx).Model(e.entry).Where("group_id=?", bg.Id))
if err != nil {
return err
}
if has {
return errors.New("Some alert mutes still in the BusiGroup")
}
has, err = Exists(DB(ctx).Model(&AlertSubscribe{}).Where("group_id=?", bg.Id))
if err != nil {
return err
}
if has {
return errors.New("Some alert subscribes still in the BusiGroup")
}
has, err = Exists(DB(ctx).Model(&TargetBusiGroup{}).Where("group_id=?", bg.Id))
if err != nil {
return err
}
if has {
return errors.New("Some targets still in the BusiGroup")
}
has, err = Exists(DB(ctx).Model(&Board{}).Where("group_id=?", bg.Id))
if err != nil {
return err
}
if has {
return errors.New("Some dashboards still in the BusiGroup")
}
has, err = Exists(DB(ctx).Model(&TaskTpl{}).Where("group_id=?", bg.Id))
if err != nil {
return err
}
if has {
return errors.New("Some recovery scripts still in the BusiGroup")
}
// hasCR, err := Exists(DB(ctx).Table("collect_rule").Where("group_id=?", bg.Id))
// if err != nil {
// return err
// }
// if hasCR {
// return errors.New("Some collect rules still in the BusiGroup")
// }
has, err = Exists(DB(ctx).Model(&AlertRule{}).Where("group_id=?", bg.Id))
if err != nil {
return err
}
if has {
return errors.New("Some alert rules still in the BusiGroup")
if has {
return errors.New(e.errorMessage)
}
}
return DB(ctx).Transaction(func(tx *gorm.DB) error {

View File

@@ -92,7 +92,7 @@ func MigrateTables(db *gorm.DB) error {
for _, dt := range asyncDts {
if err := db.AutoMigrate(dt); err != nil {
logger.Errorf("failed to migrate table: %v", err)
logger.Errorf("failed to migrate table %+v err:%v", dt, err)
}
}
}()
@@ -188,14 +188,20 @@ func InsertPermPoints(db *gorm.DB) {
})
for _, op := range ops {
exists, err := models.Exists(db.Model(&models.RoleOperation{}).Where("operation = ? and role_name = ?", op.Operation, op.RoleName))
var count int64
err := db.Raw("SELECT COUNT(*) FROM role_operation WHERE operation = ? AND role_name = ?",
op.Operation, op.RoleName).Scan(&count).Error
if err != nil {
logger.Errorf("check role operation exists failed, %v", err)
continue
}
if exists {
if count > 0 {
continue
}
err = db.Create(&op).Error
if err != nil {
logger.Errorf("insert role operation failed, %v", err)

View File

@@ -0,0 +1,69 @@
package migrate
import (
"fmt"
"testing"
"github.com/ccfos/nightingale/v6/models"
"gorm.io/driver/mysql"
"gorm.io/gorm"
"gorm.io/gorm/schema"
)
func TestInsertPermPoints(t *testing.T) {
db, err := gorm.Open(mysql.Open("root:1234@tcp(127.0.0.1:3306)/n9e_v6?charset=utf8mb4&parseTime=True&loc=Local&allowNativePasswords=true"), &gorm.Config{NamingStrategy: schema.NamingStrategy{
SingularTable: true,
}})
if err != nil {
fmt.Printf("failed to connect database: %v", err)
}
var ops []models.RoleOperation
ops = append(ops, models.RoleOperation{
RoleName: "Standard",
Operation: "/alert-mutes/put",
})
ops = append(ops, models.RoleOperation{
RoleName: "Standard",
Operation: "/log/index-patterns",
})
ops = append(ops, models.RoleOperation{
RoleName: "Standard",
Operation: "/help/variable-configs",
})
ops = append(ops, models.RoleOperation{
RoleName: "Admin",
Operation: "/permissions",
})
ops = append(ops, models.RoleOperation{
RoleName: "Standard",
Operation: "/ibex-settings",
})
db = db.Debug()
for _, op := range ops {
var count int64
err := db.Raw("SELECT COUNT(*) FROM role_operation WHERE operation = ? AND role_name = ?",
op.Operation, op.RoleName).Scan(&count).Error
fmt.Printf("count: %d\n", count)
if err != nil {
fmt.Printf("check role operation exists failed, %v", err)
continue
}
if count > 0 {
continue
}
err = db.Create(&op).Error
if err != nil {
fmt.Printf("insert role operation failed, %v", err)
}
}
}

View File

@@ -1,5 +1,12 @@
package models
import (
"fmt"
"net/http"
"github.com/toolkits/pkg/str"
)
const WEBHOOKKEY = "webhook"
const NOTIFYSCRIPT = "notify_script"
const NOTIFYCHANNEL = "notify_channel"
@@ -24,6 +31,11 @@ type Webhook struct {
RetryCount int `json:"retry_count"`
RetryInterval int `json:"retry_interval"`
Batch int `json:"batch"`
Client *http.Client `json:"-"`
}
func (w *Webhook) Hash() string {
return str.MD5(fmt.Sprintf("%d_%t_%s_%s_%s_%d_%v_%t_%s_%d_%d_%d", w.Type, w.Enable, w.Url, w.BasicAuthUser, w.BasicAuthPass, w.Timeout, w.HeaderMap, w.SkipVerify, w.Note, w.RetryCount, w.RetryInterval, w.Batch))
}
type NotifyScript struct {

View File

@@ -185,8 +185,16 @@ func BuildTargetWhereWithQuery(query string) BuildTargetWhereOption {
if query != "" {
arr := strings.Fields(query)
for i := 0; i < len(arr); i++ {
q := "%" + arr[i] + "%"
session = session.Where("ident like ? or host_ip like ? or note like ? or tags like ? or host_tags like ? or os like ?", q, q, q, q, q, q)
if strings.HasPrefix(arr[i], "-") {
q := "%" + arr[i][1:] + "%"
session = session.Where("ident not like ? and host_ip not like ? and "+
"note not like ? and tags not like ? and (host_tags not like ? or "+
"host_tags is null) and os not like ?", q, q, q, q, q, q)
} else {
q := "%" + arr[i] + "%"
session = session.Where("ident like ? or host_ip like ? or note like ? or "+
"tags like ? or host_tags like ? or os like ?", q, q, q, q, q, q)
}
}
}
return session
@@ -197,6 +205,8 @@ func BuildTargetWhereWithDowntime(downtime int64) BuildTargetWhereOption {
return func(session *gorm.DB) *gorm.DB {
if downtime > 0 {
session = session.Where("target.update_at < ?", time.Now().Unix()-downtime)
} else if downtime < 0 {
session = session.Where("target.update_at > ?", time.Now().Unix()+downtime)
}
return session
}
@@ -270,7 +280,11 @@ func TargetFilterQueryBuild(ctx *ctx.Context, query []map[string]interface{}, li
for _, q := range query {
tx := DB(ctx).Model(&Target{})
for k, v := range q {
tx = tx.Or(k, v)
if strings.Count(k, "?") > 1 {
tx = tx.Or(k, v.([]interface{})...)
} else {
tx = tx.Or(k, v)
}
}
sub = sub.Where(tx)
}

View File

@@ -24,8 +24,10 @@ type Pushgw struct {
}
type WriterGlobalOpt struct {
QueueMaxSize int
QueuePopSize int
QueueMaxSize int
QueuePopSize int
AllQueueMaxSize int
AllQueueMaxSizeInterval int
}
type WriterOptions struct {
@@ -77,6 +79,14 @@ func (p *Pushgw) PreCheck() {
p.WriterOpt.QueuePopSize = 1000
}
if p.WriterOpt.AllQueueMaxSize <= 0 {
p.WriterOpt.AllQueueMaxSize = 10000000
}
if p.WriterOpt.AllQueueMaxSizeInterval <= 0 {
p.WriterOpt.AllQueueMaxSizeInterval = 200
}
if p.WriteConcurrency <= 0 {
p.WriteConcurrency = 5000
}

View File

@@ -8,6 +8,7 @@ import (
"net/http"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/ccfos/nightingale/v6/pkg/fasttime"
@@ -138,9 +139,10 @@ func (w WriterType) Post(req []byte, headers ...map[string]string) error {
}
type WritersType struct {
pushgw pconf.Pushgw
backends map[string]WriterType
queues map[string]*IdentQueue
pushgw pconf.Pushgw
backends map[string]WriterType
queues map[string]*IdentQueue
allQueueLen atomic.Value
sync.RWMutex
}
@@ -160,14 +162,30 @@ func (ws *WritersType) ReportQueueStats(ident string, identQueue *IdentQueue) (i
}
}
func (ws *WritersType) SetAllQueueLen() {
for {
curMetricLen := 0
ws.RLock()
for _, q := range ws.queues {
curMetricLen += q.list.Len()
}
ws.RUnlock()
ws.allQueueLen.Store(curMetricLen)
time.Sleep(time.Duration(ws.pushgw.WriterOpt.AllQueueMaxSizeInterval) * time.Millisecond)
}
}
func NewWriters(pushgwConfig pconf.Pushgw) *WritersType {
writers := &WritersType{
backends: make(map[string]WriterType),
queues: make(map[string]*IdentQueue),
pushgw: pushgwConfig,
backends: make(map[string]WriterType),
queues: make(map[string]*IdentQueue),
pushgw: pushgwConfig,
allQueueLen: atomic.Value{},
}
writers.Init()
go writers.SetAllQueueLen()
go writers.CleanExpQueue()
return writers
}
@@ -217,6 +235,13 @@ func (ws *WritersType) PushSample(ident string, v interface{}) {
}
identQueue.ts = time.Now().Unix()
curLen := ws.allQueueLen.Load().(int)
if curLen > ws.pushgw.WriterOpt.AllQueueMaxSize {
logger.Warningf("Write %+v full, metric count over limit: %d", v, curLen)
CounterPushQueueErrorTotal.WithLabelValues(ident).Inc()
return
}
succ := identQueue.list.PushFront(v)
if !succ {
logger.Warningf("Write channel(%s) full, current channel size: %d", ident, identQueue.list.Len())
@@ -245,6 +270,7 @@ func (ws *WritersType) StartConsumer(identQueue *IdentQueue) {
func (ws *WritersType) Init() error {
opts := ws.pushgw.Writers
ws.allQueueLen.Store(0)
for i := 0; i < len(opts); i++ {
tlsConf, err := opts[i].ClientConfig.TLSConfig()

View File

@@ -7,6 +7,7 @@ import (
"os"
"strings"
"github.com/alicebob/miniredis/v2"
"github.com/ccfos/nightingale/v6/pkg/tlsx"
"github.com/redis/go-redis/v9"
"github.com/toolkits/pkg/logger"
@@ -28,6 +29,7 @@ type Redis redis.Cmdable
func NewRedis(cfg RedisConfig) (Redis, error) {
var redisClient Redis
switch cfg.RedisType {
case "standalone", "":
redisOptions := &redis.Options{
@@ -88,6 +90,16 @@ func NewRedis(cfg RedisConfig) (Redis, error) {
redisClient = redis.NewFailoverClient(redisOptions)
case "miniredis":
s, err := miniredis.Run()
if err != nil {
fmt.Println("failed to init miniredis:", err)
os.Exit(1)
}
redisClient = redis.NewClient(&redis.Options{
Addr: s.Addr(),
})
default:
fmt.Println("failed to init redis , redis type is illegal:", cfg.RedisType)
os.Exit(1)

44
storage/redis_test.go Normal file
View File

@@ -0,0 +1,44 @@
package storage
import (
"context"
"testing"
"github.com/alicebob/miniredis/v2"
"github.com/redis/go-redis/v9"
"github.com/stretchr/testify/assert"
)
func TestMiniRedisMGet(t *testing.T) {
s, err := miniredis.Run()
if err != nil {
t.Fatalf("failed to start miniredis: %v", err)
}
defer s.Close()
rdb := redis.NewClient(&redis.Options{
Addr: s.Addr(),
})
err = rdb.Ping(context.Background()).Err()
if err != nil {
t.Fatalf("failed to ping miniredis: %v", err)
}
mp := make(map[string]interface{})
mp["key1"] = "value1"
mp["key2"] = "value2"
mp["key3"] = "value3"
err = MSet(context.Background(), rdb, mp)
if err != nil {
t.Fatalf("failed to set miniredis value: %v", err)
}
ctx := context.Background()
keys := []string{"key1", "key2", "key3", "key4"}
vals := MGet(ctx, rdb, keys)
expected := [][]byte{[]byte("value1"), []byte("value2"), []byte("value3")}
assert.Equal(t, expected, vals)
}