mirror of
https://github.com/ccfos/nightingale.git
synced 2026-03-03 14:38:55 +00:00
Compare commits
7 Commits
eval-check
...
webhook-ba
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b057940134 | ||
|
|
4500c4aba8 | ||
|
|
726e994d58 | ||
|
|
71c4c24f00 | ||
|
|
d14a834149 | ||
|
|
5f895552a9 | ||
|
|
1e3f62b92f |
@@ -32,6 +32,7 @@ type Alerting struct {
|
||||
Timeout int64
|
||||
TemplatesDir string
|
||||
NotifyConcurrency int
|
||||
WebhookBatchSend bool
|
||||
}
|
||||
|
||||
type CallPlugin struct {
|
||||
|
||||
@@ -265,7 +265,11 @@ func (e *Dispatch) Send(rule *models.AlertRule, event *models.AlertCurEvent, not
|
||||
e.SendCallbacks(rule, notifyTarget, event)
|
||||
|
||||
// handle global webhooks
|
||||
sender.SendWebhooks(notifyTarget.ToWebhookList(), event, e.Astats)
|
||||
if e.alerting.WebhookBatchSend {
|
||||
sender.BatchSendWebhooks(notifyTarget.ToWebhookList(), event, e.Astats)
|
||||
} else {
|
||||
sender.SingleSendWebhooks(notifyTarget.ToWebhookList(), event, e.Astats)
|
||||
}
|
||||
|
||||
// handle plugin call
|
||||
go sender.MayPluginNotify(e.genNoticeBytes(event), e.notifyConfigCache.GetNotifyScript(), e.Astats)
|
||||
@@ -280,7 +284,7 @@ func (e *Dispatch) SendCallbacks(rule *models.AlertRule, notifyTarget *NotifyTar
|
||||
continue
|
||||
}
|
||||
|
||||
cbCtx := sender.BuildCallBackContext(e.ctx, urlStr, rule, []*models.AlertCurEvent{event}, uids, e.userCache, e.Astats)
|
||||
cbCtx := sender.BuildCallBackContext(e.ctx, urlStr, rule, []*models.AlertCurEvent{event}, uids, e.userCache, e.alerting.WebhookBatchSend, e.Astats)
|
||||
|
||||
if strings.HasPrefix(urlStr, "${ibex}") {
|
||||
e.CallBacks[models.IbexDomain].CallBack(cbCtx)
|
||||
|
||||
@@ -79,6 +79,22 @@ func (s *NotifyTarget) ToCallbackList() []string {
|
||||
func (s *NotifyTarget) ToWebhookList() []*models.Webhook {
|
||||
webhooks := make([]*models.Webhook, 0, len(s.webhooks))
|
||||
for _, wh := range s.webhooks {
|
||||
if wh.Batch == 0 {
|
||||
wh.Batch = 1000
|
||||
}
|
||||
|
||||
if wh.Timeout == 0 {
|
||||
wh.Timeout = 10
|
||||
}
|
||||
|
||||
if wh.RetryCount == 0 {
|
||||
wh.RetryCount = 10
|
||||
}
|
||||
|
||||
if wh.RetryInterval == 0 {
|
||||
wh.RetryInterval = 10
|
||||
}
|
||||
|
||||
webhooks = append(webhooks, wh)
|
||||
}
|
||||
return webhooks
|
||||
|
||||
@@ -29,13 +29,14 @@ type (
|
||||
Rule *models.AlertRule
|
||||
Events []*models.AlertCurEvent
|
||||
Stats *astats.Stats
|
||||
BatchSend bool
|
||||
}
|
||||
|
||||
DefaultCallBacker struct{}
|
||||
)
|
||||
|
||||
func BuildCallBackContext(ctx *ctx.Context, callBackURL string, rule *models.AlertRule, events []*models.AlertCurEvent,
|
||||
uids []int64, userCache *memsto.UserCacheType, stats *astats.Stats) CallBackContext {
|
||||
uids []int64, userCache *memsto.UserCacheType, batchSend bool, stats *astats.Stats) CallBackContext {
|
||||
users := userCache.GetByUserIds(uids)
|
||||
|
||||
newCallBackUrl, _ := events[0].ParseURL(callBackURL)
|
||||
@@ -45,6 +46,7 @@ func BuildCallBackContext(ctx *ctx.Context, callBackURL string, rule *models.Ale
|
||||
Rule: rule,
|
||||
Events: events,
|
||||
Users: users,
|
||||
BatchSend: batchSend,
|
||||
Stats: stats,
|
||||
}
|
||||
}
|
||||
@@ -112,6 +114,21 @@ func (c *DefaultCallBacker) CallBack(ctx CallBackContext) {
|
||||
|
||||
event := ctx.Events[0]
|
||||
|
||||
if ctx.BatchSend {
|
||||
webhookConf := &models.Webhook{
|
||||
Type: models.RuleCallback,
|
||||
Enable: true,
|
||||
Url: ctx.CallBackURL,
|
||||
Timeout: 5,
|
||||
RetryCount: 3,
|
||||
RetryInterval: 10,
|
||||
Batch: 1000,
|
||||
}
|
||||
|
||||
PushCallbackEvent(webhookConf, event, ctx.Stats)
|
||||
return
|
||||
}
|
||||
|
||||
ctx.Stats.AlertNotifyTotal.WithLabelValues("rule_callback").Inc()
|
||||
resp, code, err := poster.PostJSON(ctx.CallBackURL, 5*time.Second, event, 3)
|
||||
if err != nil {
|
||||
@@ -140,3 +157,27 @@ type TaskCreateReply struct {
|
||||
Err string `json:"err"`
|
||||
Dat int64 `json:"dat"` // task.id
|
||||
}
|
||||
|
||||
func PushCallbackEvent(webhook *models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
|
||||
CallbackEventQueueLock.RLock()
|
||||
queue := CallbackEventQueue[webhook.Url]
|
||||
CallbackEventQueueLock.RUnlock()
|
||||
|
||||
if queue == nil {
|
||||
queue = &WebhookQueue{
|
||||
list: NewSafeListLimited(QueueMaxSize),
|
||||
closeCh: make(chan struct{}),
|
||||
}
|
||||
|
||||
CallbackEventQueueLock.Lock()
|
||||
CallbackEventQueue[webhook.Url] = queue
|
||||
CallbackEventQueueLock.Unlock()
|
||||
|
||||
StartConsumer(queue, webhook.Batch, webhook, stats)
|
||||
}
|
||||
|
||||
succ := queue.list.PushFront(event)
|
||||
if !succ {
|
||||
logger.Warningf("Write channel(%s) full, current channel size: %d event:%v", webhook.Url, queue.list.Len(), event)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"encoding/json"
|
||||
"io"
|
||||
"net/http"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/astats"
|
||||
@@ -14,14 +15,19 @@ import (
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
func sendWebhook(webhook *models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) bool {
|
||||
func sendWebhook(webhook *models.Webhook, event interface{}, stats *astats.Stats) bool {
|
||||
channel := "webhook"
|
||||
if webhook.Type == models.RuleCallback {
|
||||
channel = "callback"
|
||||
}
|
||||
|
||||
conf := webhook
|
||||
if conf.Url == "" || !conf.Enable {
|
||||
return false
|
||||
}
|
||||
bs, err := json.Marshal(event)
|
||||
if err != nil {
|
||||
logger.Errorf("alertingWebhook failed to marshal event:%+v err:%v", event, err)
|
||||
logger.Errorf("%s alertingWebhook failed to marshal event:%+v err:%v", channel, event, err)
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -29,7 +35,7 @@ func sendWebhook(webhook *models.Webhook, event *models.AlertCurEvent, stats *as
|
||||
|
||||
req, err := http.NewRequest("POST", conf.Url, bf)
|
||||
if err != nil {
|
||||
logger.Warningf("alertingWebhook failed to new reques event:%+v err:%v", event, err)
|
||||
logger.Warningf("%s alertingWebhook failed to new reques event:%s err:%v", channel, string(bs), err)
|
||||
return true
|
||||
}
|
||||
|
||||
@@ -58,12 +64,12 @@ func sendWebhook(webhook *models.Webhook, event *models.AlertCurEvent, stats *as
|
||||
},
|
||||
}
|
||||
|
||||
stats.AlertNotifyTotal.WithLabelValues("webhook").Inc()
|
||||
stats.AlertNotifyTotal.WithLabelValues(channel).Inc()
|
||||
var resp *http.Response
|
||||
resp, err = client.Do(req)
|
||||
if err != nil {
|
||||
stats.AlertNotifyErrorTotal.WithLabelValues("webhook").Inc()
|
||||
logger.Errorf("event_webhook_fail, ruleId: [%d], eventId: [%d], event:%+v, url: [%s], error: [%s]", event.RuleId, event.Id, event, conf.Url, err)
|
||||
stats.AlertNotifyErrorTotal.WithLabelValues(channel).Inc()
|
||||
logger.Errorf("event_%s_fail, event:%s, url: [%s], error: [%s]", channel, string(bs), conf.Url, err)
|
||||
return true
|
||||
}
|
||||
|
||||
@@ -74,15 +80,15 @@ func sendWebhook(webhook *models.Webhook, event *models.AlertCurEvent, stats *as
|
||||
}
|
||||
|
||||
if resp.StatusCode == 429 {
|
||||
logger.Errorf("event_webhook_fail, url: %s, response code: %d, body: %s event:%+v", conf.Url, resp.StatusCode, string(body), event)
|
||||
logger.Errorf("event_%s_fail, url: %s, response code: %d, body: %s event:%s", channel, conf.Url, resp.StatusCode, string(body), string(bs))
|
||||
return true
|
||||
}
|
||||
|
||||
logger.Debugf("event_webhook_succ, url: %s, response code: %d, body: %s event:%+v", conf.Url, resp.StatusCode, string(body), event)
|
||||
logger.Debugf("event_%s_succ, url: %s, response code: %d, body: %s event:%s", channel, conf.Url, resp.StatusCode, string(body), string(bs))
|
||||
return false
|
||||
}
|
||||
|
||||
func SendWebhooks(webhooks []*models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
|
||||
func SingleSendWebhooks(webhooks []*models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
|
||||
for _, conf := range webhooks {
|
||||
retryCount := 0
|
||||
for retryCount < 3 {
|
||||
@@ -95,3 +101,73 @@ func SendWebhooks(webhooks []*models.Webhook, event *models.AlertCurEvent, stats
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BatchSendWebhooks(webhooks []*models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
|
||||
for _, conf := range webhooks {
|
||||
logger.Infof("push event:%+v to queue:%v", event, conf)
|
||||
PushEvent(conf, event, stats)
|
||||
}
|
||||
}
|
||||
|
||||
var EventQueue = make(map[string]*WebhookQueue)
|
||||
var CallbackEventQueue = make(map[string]*WebhookQueue)
|
||||
var CallbackEventQueueLock sync.RWMutex
|
||||
var EventQueueLock sync.RWMutex
|
||||
|
||||
const QueueMaxSize = 100000
|
||||
|
||||
type WebhookQueue struct {
|
||||
list *SafeListLimited
|
||||
closeCh chan struct{}
|
||||
}
|
||||
|
||||
func PushEvent(webhook *models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
|
||||
EventQueueLock.RLock()
|
||||
queue := EventQueue[webhook.Url]
|
||||
EventQueueLock.RUnlock()
|
||||
|
||||
if queue == nil {
|
||||
queue = &WebhookQueue{
|
||||
list: NewSafeListLimited(QueueMaxSize),
|
||||
closeCh: make(chan struct{}),
|
||||
}
|
||||
|
||||
EventQueueLock.Lock()
|
||||
EventQueue[webhook.Url] = queue
|
||||
EventQueueLock.Unlock()
|
||||
|
||||
StartConsumer(queue, webhook.Batch, webhook, stats)
|
||||
}
|
||||
|
||||
succ := queue.list.PushFront(event)
|
||||
if !succ {
|
||||
stats.AlertNotifyErrorTotal.WithLabelValues("push_event_queue").Inc()
|
||||
logger.Warningf("Write channel(%s) full, current channel size: %d event:%v", webhook.Url, queue.list.Len(), event)
|
||||
}
|
||||
}
|
||||
|
||||
func StartConsumer(queue *WebhookQueue, popSize int, webhook *models.Webhook, stats *astats.Stats) {
|
||||
for {
|
||||
select {
|
||||
case <-queue.closeCh:
|
||||
logger.Infof("event queue:%v closed", queue)
|
||||
return
|
||||
default:
|
||||
events := queue.list.PopBack(popSize)
|
||||
if len(events) == 0 {
|
||||
time.Sleep(time.Millisecond * 400)
|
||||
continue
|
||||
}
|
||||
|
||||
retryCount := 0
|
||||
for retryCount < webhook.RetryCount {
|
||||
needRetry := sendWebhook(webhook, events, stats)
|
||||
if !needRetry {
|
||||
break
|
||||
}
|
||||
retryCount++
|
||||
time.Sleep(time.Second * time.Duration(webhook.RetryInterval) * time.Duration(retryCount))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
111
alert/sender/webhook_queue.go
Normal file
111
alert/sender/webhook_queue.go
Normal file
@@ -0,0 +1,111 @@
|
||||
package sender
|
||||
|
||||
import (
|
||||
"container/list"
|
||||
"sync"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
)
|
||||
|
||||
type SafeList struct {
|
||||
sync.RWMutex
|
||||
L *list.List
|
||||
}
|
||||
|
||||
func NewSafeList() *SafeList {
|
||||
return &SafeList{L: list.New()}
|
||||
}
|
||||
|
||||
func (sl *SafeList) PushFront(v interface{}) *list.Element {
|
||||
sl.Lock()
|
||||
e := sl.L.PushFront(v)
|
||||
sl.Unlock()
|
||||
return e
|
||||
}
|
||||
|
||||
func (sl *SafeList) PushFrontBatch(vs []interface{}) {
|
||||
sl.Lock()
|
||||
for _, item := range vs {
|
||||
sl.L.PushFront(item)
|
||||
}
|
||||
sl.Unlock()
|
||||
}
|
||||
|
||||
func (sl *SafeList) PopBack(max int) []*models.AlertCurEvent {
|
||||
sl.Lock()
|
||||
|
||||
count := sl.L.Len()
|
||||
if count == 0 {
|
||||
sl.Unlock()
|
||||
return []*models.AlertCurEvent{}
|
||||
}
|
||||
|
||||
if count > max {
|
||||
count = max
|
||||
}
|
||||
|
||||
items := make([]*models.AlertCurEvent, 0, count)
|
||||
for i := 0; i < count; i++ {
|
||||
item := sl.L.Remove(sl.L.Back())
|
||||
sample, ok := item.(*models.AlertCurEvent)
|
||||
if ok {
|
||||
items = append(items, sample)
|
||||
}
|
||||
}
|
||||
|
||||
sl.Unlock()
|
||||
return items
|
||||
}
|
||||
|
||||
func (sl *SafeList) RemoveAll() {
|
||||
sl.Lock()
|
||||
sl.L.Init()
|
||||
sl.Unlock()
|
||||
}
|
||||
|
||||
func (sl *SafeList) Len() int {
|
||||
sl.RLock()
|
||||
size := sl.L.Len()
|
||||
sl.RUnlock()
|
||||
return size
|
||||
}
|
||||
|
||||
// SafeList with Limited Size
|
||||
type SafeListLimited struct {
|
||||
maxSize int
|
||||
SL *SafeList
|
||||
}
|
||||
|
||||
func NewSafeListLimited(maxSize int) *SafeListLimited {
|
||||
return &SafeListLimited{SL: NewSafeList(), maxSize: maxSize}
|
||||
}
|
||||
|
||||
func (sll *SafeListLimited) PopBack(max int) []*models.AlertCurEvent {
|
||||
return sll.SL.PopBack(max)
|
||||
}
|
||||
|
||||
func (sll *SafeListLimited) PushFront(v interface{}) bool {
|
||||
if sll.SL.Len() >= sll.maxSize {
|
||||
return false
|
||||
}
|
||||
|
||||
sll.SL.PushFront(v)
|
||||
return true
|
||||
}
|
||||
|
||||
func (sll *SafeListLimited) PushFrontBatch(vs []interface{}) bool {
|
||||
if sll.SL.Len() >= sll.maxSize {
|
||||
return false
|
||||
}
|
||||
|
||||
sll.SL.PushFrontBatch(vs)
|
||||
return true
|
||||
}
|
||||
|
||||
func (sll *SafeListLimited) RemoveAll() {
|
||||
sll.SL.RemoveAll()
|
||||
}
|
||||
|
||||
func (sll *SafeListLimited) Len() int {
|
||||
return sll.SL.Len()
|
||||
}
|
||||
@@ -7,7 +7,11 @@ const NOTIFYCONTACT = "notify_contact"
|
||||
const SMTP = "smtp_config"
|
||||
const IBEX = "ibex_server"
|
||||
|
||||
var GlobalCallback = 0
|
||||
var RuleCallback = 1
|
||||
|
||||
type Webhook struct {
|
||||
Type int `json:"type"`
|
||||
Enable bool `json:"enable"`
|
||||
Url string `json:"url"`
|
||||
BasicAuthUser string `json:"basic_auth_user"`
|
||||
@@ -17,6 +21,9 @@ type Webhook struct {
|
||||
Headers []string `json:"headers_str"`
|
||||
SkipVerify bool `json:"skip_verify"`
|
||||
Note string `json:"note"`
|
||||
RetryCount int `json:"retry_count"`
|
||||
RetryInterval int `json:"retry_interval"`
|
||||
Batch int `json:"batch"`
|
||||
}
|
||||
|
||||
type NotifyScript struct {
|
||||
|
||||
Reference in New Issue
Block a user