mirror of
https://github.com/ccfos/nightingale.git
synced 2026-03-04 23:18:57 +00:00
Compare commits
1 Commits
optimize-c
...
import-pro
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cb4ef9f85f |
@@ -315,6 +315,8 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.GET("/busi-group/:id/alert-rules", rt.auth(), rt.user(), rt.perm("/alert-rules"), rt.alertRuleGets)
|
||||
pages.POST("/busi-group/:id/alert-rules", rt.auth(), rt.user(), rt.perm("/alert-rules/add"), rt.bgrw(), rt.alertRuleAddByFE)
|
||||
pages.POST("/busi-group/:id/alert-rules/import", rt.auth(), rt.user(), rt.perm("/alert-rules/add"), rt.bgrw(), rt.alertRuleAddByImport)
|
||||
pages.POST("/busi-group/:id/alert-rules/import-prom-rule", rt.auth(),
|
||||
rt.user(), rt.perm("/alert-rules/add"), rt.bgrw(), rt.alertRuleAddByImportPromRule)
|
||||
pages.DELETE("/busi-group/:id/alert-rules", rt.auth(), rt.user(), rt.perm("/alert-rules/del"), rt.bgrw(), rt.alertRuleDel)
|
||||
pages.PUT("/busi-group/:id/alert-rules/fields", rt.auth(), rt.user(), rt.perm("/alert-rules/put"), rt.bgrw(), rt.alertRulePutFields)
|
||||
pages.PUT("/busi-group/:id/alert-rule/:arid", rt.auth(), rt.user(), rt.perm("/alert-rules/put"), rt.alertRulePutByFE)
|
||||
|
||||
@@ -126,6 +126,25 @@ func (rt *Router) alertRuleAddByImport(c *gin.Context) {
|
||||
ginx.NewRender(c).Data(reterr, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) alertRuleAddByImportPromRule(c *gin.Context) {
|
||||
username := c.MustGet("username").(string)
|
||||
|
||||
type PromRule struct {
|
||||
Groups []models.PromRuleGroup `yaml:"groups"`
|
||||
}
|
||||
var pr PromRule
|
||||
ginx.Dangerous(c.BindYAML(&pr))
|
||||
if len(pr.Groups) == 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, "input yaml is empty")
|
||||
}
|
||||
|
||||
lst := models.DealPromGroup(pr.Groups)
|
||||
bgid := ginx.UrlParamInt64(c, "id")
|
||||
err := rt.alertRuleAdd(lst, username, bgid, c.GetHeader("X-Language"))
|
||||
|
||||
ginx.NewRender(c).Data(err, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) alertRuleAddByService(c *gin.Context) {
|
||||
var lst []models.AlertRule
|
||||
ginx.BindJSON(c, &lst)
|
||||
|
||||
@@ -26,6 +26,21 @@ const (
|
||||
TDENGINE = "tdengine"
|
||||
)
|
||||
|
||||
const (
|
||||
AlertRuleEnabled = 0
|
||||
AlertRuleDisabled = 1
|
||||
|
||||
AlertRuleEnableInGlobalBG = 0
|
||||
AlertRuleEnableInOneBG = 1
|
||||
|
||||
AlertRuleNotNotifyRecovered = 0
|
||||
AlertRuleNotifyRecovered = 1
|
||||
|
||||
AlertRuleNotifyRepeatStep60Min = 60
|
||||
|
||||
AlertRuleRecoverDuration0Sec = 0
|
||||
)
|
||||
|
||||
type AlertRule struct {
|
||||
Id int64 `json:"id" gorm:"primaryKey"`
|
||||
GroupId int64 `json:"group_id"` // busi group id
|
||||
|
||||
93
models/prom_alert_rule.go
Normal file
93
models/prom_alert_rule.go
Normal file
@@ -0,0 +1,93 @@
|
||||
package models
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
type PromRule struct {
|
||||
Alert string `yaml:"alert,omitempty" json:"alert,omitempty"` // 报警规则的名称
|
||||
Record string `yaml:"record,omitempty" json:"record,omitempty"` // 记录规则的名称
|
||||
Expr string `yaml:"expr,omitempty" json:"expr,omitempty"` // PromQL 表达式
|
||||
For string `yaml:"for,omitempty" json:"for,omitempty"` // 告警的等待时间
|
||||
Annotations map[string]string `yaml:"annotations,omitempty" json:"annotations,omitempty"` // 规则的注释信息
|
||||
Labels map[string]string `yaml:"labels,omitempty" json:"labels,omitempty"` // 规则的标签信息
|
||||
}
|
||||
|
||||
type PromRuleGroup struct {
|
||||
Name string `yaml:"name"`
|
||||
Rules []PromRule `yaml:"rules"`
|
||||
Interval string `yaml:"interval,omitempty"`
|
||||
}
|
||||
|
||||
func convertInterval(interval string) int {
|
||||
duration, err := time.ParseDuration(interval)
|
||||
if err != nil {
|
||||
logger.Errorf("Error parsing interval `%s`,err: %v", interval, err)
|
||||
return 0
|
||||
}
|
||||
return int(duration.Seconds())
|
||||
}
|
||||
|
||||
func ConvertAlert(rule PromRule, interval string) AlertRule {
|
||||
annotations := rule.Annotations
|
||||
appendTags := []string{}
|
||||
severity := 2
|
||||
|
||||
if len(rule.Labels) > 0 {
|
||||
for k, v := range rule.Labels {
|
||||
if k != "severity" {
|
||||
appendTags = append(appendTags, fmt.Sprintf("%s=%s", k, v))
|
||||
} else {
|
||||
switch v {
|
||||
case "critical":
|
||||
severity = 1
|
||||
case "warning":
|
||||
severity = 2
|
||||
case "info":
|
||||
severity = 3
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return AlertRule{
|
||||
Name: rule.Alert,
|
||||
Severity: severity,
|
||||
Disabled: AlertRuleEnabled,
|
||||
PromForDuration: convertInterval(rule.For),
|
||||
PromQl: rule.Expr,
|
||||
PromEvalInterval: convertInterval(interval),
|
||||
EnableStimeJSON: "00:00",
|
||||
EnableEtimeJSON: "23:59",
|
||||
EnableDaysOfWeekJSON: []string{
|
||||
"1", "2", "3", "4", "5", "6", "0",
|
||||
},
|
||||
EnableInBG: AlertRuleEnableInGlobalBG,
|
||||
NotifyRecovered: AlertRuleNotifyRecovered,
|
||||
NotifyRepeatStep: AlertRuleNotifyRepeatStep60Min,
|
||||
RecoverDuration: AlertRuleRecoverDuration0Sec,
|
||||
AnnotationsJSON: annotations,
|
||||
AppendTagsJSON: appendTags,
|
||||
}
|
||||
}
|
||||
|
||||
func DealPromGroup(promRule []PromRuleGroup) []AlertRule {
|
||||
var alertRules []AlertRule
|
||||
|
||||
for _, group := range promRule {
|
||||
interval := group.Interval
|
||||
if interval == "" {
|
||||
interval = "15s"
|
||||
}
|
||||
for _, rule := range group.Rules {
|
||||
if rule.Alert != "" {
|
||||
alertRules = append(alertRules, ConvertAlert(rule, interval))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return alertRules
|
||||
}
|
||||
84
models/prom_alert_rule_test.go
Normal file
84
models/prom_alert_rule_test.go
Normal file
@@ -0,0 +1,84 @@
|
||||
package models_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"gopkg.in/yaml.v2"
|
||||
)
|
||||
|
||||
func TestConvertAlert(t *testing.T) {
|
||||
jobMissing := []models.PromRule{}
|
||||
err := yaml.Unmarshal([]byte(` - alert: PrometheusJobMissing
|
||||
expr: absent(up{job="prometheus"})
|
||||
for: 1m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: Prometheus job missing (instance {{ $labels.instance }})
|
||||
description: "A Prometheus job has disappeared\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"`), &jobMissing)
|
||||
if err != nil {
|
||||
t.Errorf("Failed to Unmarshal, err: %s", err)
|
||||
}
|
||||
t.Logf("jobMissing: %+v", jobMissing[0])
|
||||
convJobMissing := models.ConvertAlert(jobMissing[0], "30s")
|
||||
if convJobMissing.PromEvalInterval != 30 {
|
||||
t.Errorf("PromEvalInterval is expected to be 30, but got %d",
|
||||
convJobMissing.PromEvalInterval)
|
||||
}
|
||||
if convJobMissing.PromForDuration != 60 {
|
||||
t.Errorf("PromForDuration is expected to be 60, but got %d",
|
||||
convJobMissing.PromForDuration)
|
||||
}
|
||||
if convJobMissing.Severity != 2 {
|
||||
t.Errorf("Severity is expected to be 2, but got %d", convJobMissing.Severity)
|
||||
}
|
||||
|
||||
ruleEvaluationSlow := []models.PromRule{}
|
||||
yaml.Unmarshal([]byte(` - alert: PrometheusRuleEvaluationSlow
|
||||
expr: prometheus_rule_group_last_duration_seconds > prometheus_rule_group_interval_seconds
|
||||
for: 180s
|
||||
labels:
|
||||
severity: info
|
||||
annotations:
|
||||
summary: Prometheus rule evaluation slow (instance {{ $labels.instance }})
|
||||
description: "Prometheus rule evaluation took more time than the scheduled interval. It indicates a slower storage backend access or too complex query.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||
`), &ruleEvaluationSlow)
|
||||
t.Logf("ruleEvaluationSlow: %+v", ruleEvaluationSlow[0])
|
||||
convRuleEvaluationSlow := models.ConvertAlert(ruleEvaluationSlow[0], "1m")
|
||||
if convRuleEvaluationSlow.PromEvalInterval != 60 {
|
||||
t.Errorf("PromEvalInterval is expected to be 60, but got %d",
|
||||
convJobMissing.PromEvalInterval)
|
||||
}
|
||||
if convRuleEvaluationSlow.PromForDuration != 180 {
|
||||
t.Errorf("PromForDuration is expected to be 180, but got %d",
|
||||
convJobMissing.PromForDuration)
|
||||
}
|
||||
if convRuleEvaluationSlow.Severity != 3 {
|
||||
t.Errorf("Severity is expected to be 3, but got %d", convJobMissing.Severity)
|
||||
}
|
||||
|
||||
targetMissing := []models.PromRule{}
|
||||
yaml.Unmarshal([]byte(` - alert: PrometheusTargetMissing
|
||||
expr: up == 0
|
||||
for: 1.5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: Prometheus target missing (instance {{ $labels.instance }})
|
||||
description: "A Prometheus target has disappeared. An exporter might be crashed.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||
`), &targetMissing)
|
||||
t.Logf("targetMissing: %+v", targetMissing[0])
|
||||
convTargetMissing := models.ConvertAlert(targetMissing[0], "1h")
|
||||
if convTargetMissing.PromEvalInterval != 3600 {
|
||||
t.Errorf("PromEvalInterval is expected to be 3600, but got %d",
|
||||
convTargetMissing.PromEvalInterval)
|
||||
}
|
||||
if convTargetMissing.PromForDuration != 90 {
|
||||
t.Errorf("PromForDuration is expected to be 90, but got %d",
|
||||
convTargetMissing.PromForDuration)
|
||||
}
|
||||
if convTargetMissing.Severity != 1 {
|
||||
t.Errorf("Severity is expected to be 1, but got %d", convTargetMissing.Severity)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user