Compare commits

...

12 Commits

Author SHA1 Message Date
Ulric Qin
77e4499a32 refactor linux dashboard 2022-07-27 19:05:00 +08:00
ulricqin
7c351e09e5 add api: /board/:bid/pure (#1073) 2022-07-27 14:30:35 +08:00
xiaoziv
14ad3b1b0a fix proxy auth username error (#1072) 2022-07-27 14:13:48 +08:00
Ulric Qin
184867d07c feature: query busigroup by ident 2022-07-27 13:13:17 +08:00
Ulric Qin
3476b95b35 fix: query busigroup by ident 2022-07-26 18:23:14 +08:00
Ulric Qin
76e105c93a query busigroup by ident 2022-07-26 17:59:57 +08:00
Ulric Qin
39705787c9 Merge branch 'main' of github.com:ccfos/nightingale 2022-07-26 15:54:42 +08:00
Ulric Qin
293680a9cd use english comma 2022-07-26 15:54:25 +08:00
Yening Qin
05005357fb feat: push event api add mute (#1070) 2022-07-25 16:05:35 +08:00
ulricqin
ba7ff133e6 modify prometheus query batch response format (#1068) 2022-07-23 17:50:16 +08:00
ulricqin
0bd7ba9549 code refactor notify (#1066) 2022-07-22 18:12:42 +08:00
ulricqin
17c7361620 code refactor notify plugin (#1065) 2022-07-22 17:56:52 +08:00
15 changed files with 1721 additions and 1631 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -7,13 +7,6 @@ import (
"github.com/tidwall/gjson"
)
// the caller can be called for alerting notify by complete this interface
type inter interface {
Descript() string
Notify([]byte)
NotifyMaintainer([]byte)
}
// N9E complete
type N9EPlugin struct {
Name string
@@ -41,13 +34,13 @@ func (n *N9EPlugin) Notify(bs []byte) {
func (n *N9EPlugin) NotifyMaintainer(bs []byte) {
fmt.Println("do something... begin")
result := string(bs)
fmt.Println("%T",result)
fmt.Println(result)
fmt.Println("do something... end")
}
// will be loaded for alertingCall , The first letter must be capitalized to be exported
var N9eCaller = N9EPlugin{
Name: "n9e",
Description: "演示告警通过动态链接库方式通知",
Name: "N9EPlugin",
Description: "Notify by lib",
BuildAt: time.Now().Local().Format("2006/01/02 15:04:05"),
}

View File

@@ -71,6 +71,20 @@ func (b *Board) Del() error {
})
}
func BoardGetByID(id int64) (*Board, error) {
var lst []*Board
err := DB().Where("id = ?", id).Find(&lst).Error
if err != nil {
return nil, err
}
if len(lst) == 0 {
return nil, nil
}
return lst[0], nil
}
// BoardGet for detail page
func BoardGet(where string, args ...interface{}) (*Board, error) {
var lst []*Board

View File

@@ -450,6 +450,21 @@ func (u *User) BusiGroups(limit int, query string, all ...bool) ([]BusiGroup, er
var lst []BusiGroup
if u.IsAdmin() || (len(all) > 0 && all[0]) {
err := session.Where("name like ?", "%"+query+"%").Find(&lst).Error
if err != nil {
return lst, err
}
if len(lst) == 0 && len(query) > 0 {
// 隐藏功能一般人不告诉哈哈。query可能是给的ident所以上面的sql没有查到当做ident来查一下试试
var t *Target
t, err = TargetGet("ident=?", query)
if err != nil {
return lst, err
}
err = DB().Order("name").Limit(limit).Where("id=?", t.GroupId).Find(&lst).Error
}
return lst, err
}
@@ -468,6 +483,22 @@ func (u *User) BusiGroups(limit int, query string, all ...bool) ([]BusiGroup, er
}
err = session.Where("id in ?", busiGroupIds).Where("name like ?", "%"+query+"%").Find(&lst).Error
if err != nil {
return nil, err
}
if len(lst) == 0 && len(query) > 0 {
var t *Target
t, err = TargetGet("ident=?", query)
if err != nil {
return lst, err
}
if slice.ContainsInt64(busiGroupIds, t.GroupId) {
err = DB().Order("name").Limit(limit).Where("id=?", t.GroupId).Find(&lst).Error
}
}
return lst, err
}

9
src/notifier/notifier.go Normal file
View File

@@ -0,0 +1,9 @@
package notifier
type Notifier interface {
Descript() string
Notify([]byte)
NotifyMaintainer([]byte)
}
var Instance Notifier

View File

@@ -2,8 +2,11 @@ package config
import (
"fmt"
"log"
"net"
"os"
"plugin"
"runtime"
"strings"
"sync"
"time"
@@ -11,6 +14,7 @@ import (
"github.com/gin-gonic/gin"
"github.com/koding/multiconfig"
"github.com/didi/nightingale/v5/src/notifier"
"github.com/didi/nightingale/v5/src/pkg/httpx"
"github.com/didi/nightingale/v5/src/pkg/logx"
"github.com/didi/nightingale/v5/src/pkg/ormx"
@@ -100,6 +104,33 @@ func MustLoad(fpaths ...string) {
}
}
if C.Alerting.CallPlugin.Enable {
if runtime.GOOS == "windows" {
fmt.Println("notify plugin on unsupported os:", runtime.GOOS)
os.Exit(1)
}
p, err := plugin.Open(C.Alerting.CallPlugin.PluginPath)
if err != nil {
fmt.Println("failed to load plugin:", err)
os.Exit(1)
}
caller, err := p.Lookup(C.Alerting.CallPlugin.Caller)
if err != nil {
fmt.Println("failed to lookup plugin Caller:", err)
os.Exit(1)
}
ins, ok := caller.(notifier.Notifier)
if !ok {
log.Println("notifier interface not implemented")
os.Exit(1)
}
notifier.Instance = ins
}
if C.WriterOpt.QueueMaxSize <= 0 {
C.WriterOpt.QueueMaxSize = 100000
}

View File

@@ -6,7 +6,7 @@ import (
)
// 如果传入了clock这个可选参数就表示使用这个clock表示的时间否则就从event的字段中取TriggerTime
func isMuted(event *models.AlertCurEvent, clock ...int64) bool {
func IsMuted(event *models.AlertCurEvent, clock ...int64) bool {
mutes, has := memsto.AlertMuteCache.Gets(event.GroupId)
if !has || len(mutes) == 0 {
return false

View File

@@ -9,8 +9,6 @@ import (
"net/http"
"os/exec"
"path"
"plugin"
"runtime"
"strings"
"time"
@@ -22,6 +20,7 @@ import (
"github.com/toolkits/pkg/slice"
"github.com/didi/nightingale/v5/src/models"
"github.com/didi/nightingale/v5/src/notifier"
"github.com/didi/nightingale/v5/src/pkg/sys"
"github.com/didi/nightingale/v5/src/pkg/tplx"
"github.com/didi/nightingale/v5/src/server/common/sender"
@@ -103,7 +102,6 @@ func alertingRedisPub(bs []byte) {
func handleNotice(notice Notice, bs []byte) {
alertingCallScript(bs)
alertingCallPlugin(bs)
if len(config.C.Alerting.NotifyBuiltinChannels) == 0 {
@@ -398,12 +396,6 @@ func alertingCallScript(stdinBytes []byte) {
logger.Infof("event_notify: exec %s output: %s", fpath, buf.String())
}
type Notifier interface {
Descript() string
Notify([]byte)
NotifyMaintainer([]byte)
}
// call notify.so via golang plugin build
// ig. etc/script/notify/notify.so
func alertingCallPlugin(stdinBytes []byte) {
@@ -411,26 +403,8 @@ func alertingCallPlugin(stdinBytes []byte) {
return
}
if runtime.GOOS == "windows" {
logger.Errorf("call notify plugin on unsupported os: %s", runtime.GOOS)
return
}
p, err := plugin.Open(config.C.Alerting.CallPlugin.PluginPath)
if err != nil {
logger.Errorf("failed to open notify plugin: %v", err)
return
}
caller, err := p.Lookup(config.C.Alerting.CallPlugin.Caller)
if err != nil {
logger.Errorf("failed to load caller: %v", err)
return
}
notifier, ok := caller.(Notifier)
if !ok {
logger.Errorf("notifier interface not implemented): %v", err)
return
}
notifier.Notify(stdinBytes)
logger.Debugf("alertingCallPlugin done. %s", notifier.Descript())
logger.Debugf("alertingCallPlugin begin")
logger.Debugf("payload:", string(stdinBytes))
notifier.Instance.Notify(stdinBytes)
logger.Debugf("alertingCallPlugin done")
}

View File

@@ -2,11 +2,10 @@ package engine
import (
"encoding/json"
"plugin"
"runtime"
"time"
"github.com/didi/nightingale/v5/src/models"
"github.com/didi/nightingale/v5/src/notifier"
"github.com/didi/nightingale/v5/src/server/common/sender"
"github.com/didi/nightingale/v5/src/server/config"
"github.com/didi/nightingale/v5/src/server/memsto"
@@ -14,72 +13,59 @@ import (
"github.com/toolkits/pkg/logger"
)
type NoticeMaintainer struct {
NotifyUsersObj []*models.User `json:"notify_user_obj" gorm:"-"`
Title string `json:"title"`
Content string `json:"content"`
type MaintainMessage struct {
Tos []*models.User `json:"tos"`
Title string `json:"title"`
Content string `json:"content"`
}
func noticeCallPlugin(stdinBytes []byte) {
func notifyMaintainerWithPlugin(e error, title, triggerTime string, users []*models.User) {
if !config.C.Alerting.CallPlugin.Enable {
return
}
if runtime.GOOS == "windows" {
logger.Errorf("call notify plugin on unsupported os: %s", runtime.GOOS)
stdinBytes, err := json.Marshal(MaintainMessage{
Tos: users,
Title: title,
Content: "Title: " + title + "\nContent: " + e.Error() + "\nTime: " + triggerTime,
})
if err != nil {
logger.Error("failed to marshal MaintainMessage:", err)
return
}
p, err := plugin.Open(config.C.Alerting.CallPlugin.PluginPath)
if err != nil {
logger.Errorf("failed to open notify plugin: %v", err)
return
}
caller, err := p.Lookup(config.C.Alerting.CallPlugin.Caller)
if err != nil {
logger.Errorf("failed to load caller: %v", err)
return
}
notifier, ok := caller.(Notifier)
if !ok {
logger.Errorf("notifier interface not implemented): %v", err)
return
}
notifier.NotifyMaintainer(stdinBytes)
logger.Debugf("noticeCallPlugin done. %s", notifier.Descript())
notifier.Instance.NotifyMaintainer(stdinBytes)
logger.Debugf("notify maintainer with plugin done")
}
// notify to maintainer to handle the error
func notifyToMaintainer(e error, title string) {
logger.Errorf("notifyToMaintainer, title:%s, error:%v", title, e)
logger.Errorf("notifyToMaintainertitle:%s, error:%v", title, e)
var noticeMaintainer NoticeMaintainer
maintainerUsers := memsto.UserCache.GetMaintainerUsers()
if len(maintainerUsers) == 0 {
users := memsto.UserCache.GetMaintainerUsers()
if len(users) == 0 {
return
}
triggerTime := time.Now().Format("2006/01/02 - 15:04:05")
noticeMaintainer.NotifyUsersObj = maintainerUsers
noticeMaintainer.Content = "【内部处理错误】当前标题: " + title + "\n【内部处理错误】当前异常: " + e.Error() + "\n【内部处理错误】发送时间: " + triggerTime
noticeMaintainer.Title = title
stdinBytes, err := json.Marshal(noticeMaintainer)
if err != nil {
logger.Errorf("notifyToMaintainer: failed to marshal noticeMaintainer: %v", err)
} else {
noticeCallPlugin(stdinBytes)
}
triggerTime := time.Now().Format("2006/01/02 - 15:04:05")
notifyMaintainerWithPlugin(e, title, triggerTime, users)
notifyMaintainerWithBuiltin(e, title, triggerTime, users)
}
func notifyMaintainerWithBuiltin(e error, title, triggerTime string, users []*models.User) {
if len(config.C.Alerting.NotifyBuiltinChannels) == 0 {
return
}
emailset := make(map[string]struct{})
phoneset := make(map[string]struct{})
wecomset := make(map[string]struct{})
dingtalkset := make(map[string]struct{})
feishuset := make(map[string]struct{})
for _, user := range maintainerUsers {
for _, user := range users {
if user.Email != "" {
emailset[user.Email] = struct{}{}
}
@@ -118,13 +104,13 @@ func notifyToMaintainer(e error, title string) {
if len(emailset) == 0 {
continue
}
content := "【内部处理错误】当前标题: " + title + "\n【内部处理错误】当前异常: " + e.Error() + "\n【内部处理错误】发送时间: " + triggerTime
content := "Title: " + title + "\nContent: " + e.Error() + "\nTime: " + triggerTime
sender.WriteEmail(title, content, StringSetKeys(emailset))
case "dingtalk":
if len(dingtalkset) == 0 {
continue
}
content := "**【内部处理错误】当前标题: **" + title + "\n**【内部处理错误】当前异常: **" + e.Error() + "\n**【内部处理错误】发送时间: **" + triggerTime
content := "**Title: **" + title + "\n**Content: **" + e.Error() + "\n**Time: **" + triggerTime
sender.SendDingtalk(sender.DingtalkMessage{
Title: title,
Text: content,
@@ -135,7 +121,7 @@ func notifyToMaintainer(e error, title string) {
if len(wecomset) == 0 {
continue
}
content := "**【内部处理错误】当前标题: **" + title + "\n**【内部处理错误】当前异常: **" + e.Error() + "\n**【内部处理错误】发送时间: **" + triggerTime
content := "**Title: **" + title + "\n**Content: **" + e.Error() + "\n**Time: **" + triggerTime
sender.SendWecom(sender.WecomMessage{
Text: content,
Tokens: StringSetKeys(wecomset),
@@ -145,7 +131,7 @@ func notifyToMaintainer(e error, title string) {
continue
}
content := "【内部处理错误】当前标题: " + title + "\n【内部处理错误】当前异常: " + e.Error() + "\n【内部处理错误】发送时间: " + triggerTime
content := "Title: " + title + "\nContent: " + e.Error() + "\nTime: " + triggerTime
sender.SendFeishu(sender.FeishuMessage{
Text: content,
AtMobiles: phones,

View File

@@ -87,7 +87,7 @@ func (r RuleEval) Start() {
return
default:
r.Work()
logger.Debugf("rule executedrule_id=%d", r.RuleID())
logger.Debugf("rule executed, rule_id=%d", r.RuleID())
interval := r.rule.PromEvalInterval
if interval <= 0 {
interval = 10
@@ -116,8 +116,7 @@ func (r RuleEval) Work() {
value, warnings, err = reader.Client.Query(context.Background(), promql, time.Now())
if err != nil {
logger.Errorf("rule_eval:%d promql:%s, error:%v", r.RuleID(), promql, err)
// 告警查询prometheus逻辑出错发告警信息给管理员
notifyToMaintainer(err, "查询prometheus出错")
notifyToMaintainer(err, "failed to query prometheus")
return
}
@@ -190,7 +189,6 @@ func (ws *WorkersType) Build(rids []int64) {
elst, err := models.AlertCurEventGetByRule(rules[hash].Id)
if err != nil {
logger.Errorf("worker_build: AlertCurEventGetByRule failed: %v", err)
notifyToMaintainer(err, "AlertCurEventGetByRule ErrorruleID="+fmt.Sprint(rules[hash].Id))
continue
}
@@ -326,7 +324,7 @@ func (r RuleEval) judge(vectors []conv.Vector) {
}
// isMuted only need TriggerTime RuleName and TagsMap
if isMuted(event) {
if IsMuted(event) {
logger.Infof("event_muted: rule_id=%d %s", r.rule.Id, vectors[i].Key)
continue
}

View File

@@ -36,6 +36,13 @@ func pushEventToQueue(c *gin.Context) {
event.TagsMap[arr[0]] = arr[1]
}
// isMuted only need TriggerTime RuleName and TagsMap
if engine.IsMuted(event) {
logger.Infof("event_muted: rule_id=%d %s", event.RuleId, event.Hash)
ginx.NewRender(c).Message(nil)
return
}
if err := event.ParseRuleNote(); err != nil {
event.RuleNote = fmt.Sprintf("failed to parse rule note: %v", err)
}

View File

@@ -101,11 +101,9 @@ func configRoute(r *gin.Engine, version string) {
if config.C.AnonymousAccess.PromQuerier {
pages.Any("/prometheus/*url", prometheusProxy)
pages.POST("/query-range-batch", promBatchQueryRange)
} else {
pages.Any("/prometheus/*url", auth(), prometheusProxy)
pages.POST("/query-range-batch", auth(), promBatchQueryRange)
}
@@ -179,6 +177,7 @@ func configRoute(r *gin.Engine, version string) {
pages.POST("/busi-group/:id/board/:bid/clone", auth(), user(), perm("/dashboards/add"), bgrw(), boardClone)
pages.GET("/board/:bid", auth(), user(), boardGet)
pages.GET("/board/:bid/pure", boardPureGet)
pages.PUT("/board/:bid", auth(), user(), perm("/dashboards/put"), boardPut)
pages.PUT("/board/:bid/configs", auth(), user(), perm("/dashboards/put"), boardPutConfigs)
pages.DELETE("/boards", auth(), user(), perm("/dashboards/del"), boardDel)

View File

@@ -51,6 +51,17 @@ func boardGet(c *gin.Context) {
ginx.NewRender(c).Data(board, nil)
}
func boardPureGet(c *gin.Context) {
board, err := models.BoardGetByID(ginx.UrlParamInt64(c, "bid"))
ginx.Dangerous(err)
if board == nil {
ginx.Bomb(http.StatusNotFound, "No such dashboard")
}
ginx.NewRender(c).Data(board, nil)
}
// bgrwCheck
func boardDel(c *gin.Context) {
var f idsForm

View File

@@ -59,7 +59,7 @@ func proxyAuth() gin.HandlerFunc {
return func(c *gin.Context) {
user := handleProxyUser(c)
c.Set("userid", user.Id)
c.Set("username", user)
c.Set("username", user.Username)
c.Next()
}
}

View File

@@ -32,21 +32,15 @@ type batchQueryForm struct {
func promBatchQueryRange(c *gin.Context) {
xcluster := c.GetHeader("X-Cluster")
if xcluster == "" {
c.String(500, "X-Cluster is blank")
return
ginx.Bomb(http.StatusBadRequest, "header(X-Cluster) is blank")
}
var f batchQueryForm
err := c.BindJSON(&f)
if err != nil {
c.String(500, err.Error())
return
}
ginx.Dangerous(c.BindJSON(&f))
cluster, exist := prom.Clusters.Get(xcluster)
if !exist {
c.String(http.StatusBadRequest, "cluster(%s) not found", xcluster)
return
ginx.Bomb(http.StatusBadRequest, "cluster(%s) not found", xcluster)
}
var lst []model.Value
@@ -59,15 +53,12 @@ func promBatchQueryRange(c *gin.Context) {
}
resp, _, err := cluster.PromClient.QueryRange(context.Background(), item.Query, r)
if err != nil {
c.String(500, err.Error())
return
}
ginx.Dangerous(err)
lst = append(lst, resp)
}
c.JSON(200, lst)
ginx.NewRender(c).Data(lst, nil)
}
func prometheusProxy(c *gin.Context) {