Compare commits

...

7 Commits

Author SHA1 Message Date
ning
b057940134 add stats 2024-08-01 15:18:49 +08:00
ning
4500c4aba8 refactor callback 2024-08-01 15:12:03 +08:00
710leo
726e994d58 merge main 2024-07-30 15:58:31 +08:00
710leo
71c4c24f00 code refactor 2024-07-30 15:57:15 +08:00
710leo
d14a834149 webhook send batch 2024-07-30 15:47:25 +08:00
ning
5f895552a9 done 2024-07-26 12:54:28 +08:00
ning
1e3f62b92f refactor 2024-07-25 17:46:19 +08:00
7 changed files with 268 additions and 12 deletions

View File

@@ -32,6 +32,7 @@ type Alerting struct {
Timeout int64
TemplatesDir string
NotifyConcurrency int
WebhookBatchSend bool
}
type CallPlugin struct {

View File

@@ -265,7 +265,11 @@ func (e *Dispatch) Send(rule *models.AlertRule, event *models.AlertCurEvent, not
e.SendCallbacks(rule, notifyTarget, event)
// handle global webhooks
sender.SendWebhooks(notifyTarget.ToWebhookList(), event, e.Astats)
if e.alerting.WebhookBatchSend {
sender.BatchSendWebhooks(notifyTarget.ToWebhookList(), event, e.Astats)
} else {
sender.SingleSendWebhooks(notifyTarget.ToWebhookList(), event, e.Astats)
}
// handle plugin call
go sender.MayPluginNotify(e.genNoticeBytes(event), e.notifyConfigCache.GetNotifyScript(), e.Astats)
@@ -280,7 +284,7 @@ func (e *Dispatch) SendCallbacks(rule *models.AlertRule, notifyTarget *NotifyTar
continue
}
cbCtx := sender.BuildCallBackContext(e.ctx, urlStr, rule, []*models.AlertCurEvent{event}, uids, e.userCache, e.Astats)
cbCtx := sender.BuildCallBackContext(e.ctx, urlStr, rule, []*models.AlertCurEvent{event}, uids, e.userCache, e.alerting.WebhookBatchSend, e.Astats)
if strings.HasPrefix(urlStr, "${ibex}") {
e.CallBacks[models.IbexDomain].CallBack(cbCtx)

View File

@@ -79,6 +79,22 @@ func (s *NotifyTarget) ToCallbackList() []string {
func (s *NotifyTarget) ToWebhookList() []*models.Webhook {
webhooks := make([]*models.Webhook, 0, len(s.webhooks))
for _, wh := range s.webhooks {
if wh.Batch == 0 {
wh.Batch = 1000
}
if wh.Timeout == 0 {
wh.Timeout = 10
}
if wh.RetryCount == 0 {
wh.RetryCount = 10
}
if wh.RetryInterval == 0 {
wh.RetryInterval = 10
}
webhooks = append(webhooks, wh)
}
return webhooks

View File

@@ -29,13 +29,14 @@ type (
Rule *models.AlertRule
Events []*models.AlertCurEvent
Stats *astats.Stats
BatchSend bool
}
DefaultCallBacker struct{}
)
func BuildCallBackContext(ctx *ctx.Context, callBackURL string, rule *models.AlertRule, events []*models.AlertCurEvent,
uids []int64, userCache *memsto.UserCacheType, stats *astats.Stats) CallBackContext {
uids []int64, userCache *memsto.UserCacheType, batchSend bool, stats *astats.Stats) CallBackContext {
users := userCache.GetByUserIds(uids)
newCallBackUrl, _ := events[0].ParseURL(callBackURL)
@@ -45,6 +46,7 @@ func BuildCallBackContext(ctx *ctx.Context, callBackURL string, rule *models.Ale
Rule: rule,
Events: events,
Users: users,
BatchSend: batchSend,
Stats: stats,
}
}
@@ -112,6 +114,21 @@ func (c *DefaultCallBacker) CallBack(ctx CallBackContext) {
event := ctx.Events[0]
if ctx.BatchSend {
webhookConf := &models.Webhook{
Type: models.RuleCallback,
Enable: true,
Url: ctx.CallBackURL,
Timeout: 5,
RetryCount: 3,
RetryInterval: 10,
Batch: 1000,
}
PushCallbackEvent(webhookConf, event, ctx.Stats)
return
}
ctx.Stats.AlertNotifyTotal.WithLabelValues("rule_callback").Inc()
resp, code, err := poster.PostJSON(ctx.CallBackURL, 5*time.Second, event, 3)
if err != nil {
@@ -140,3 +157,27 @@ type TaskCreateReply struct {
Err string `json:"err"`
Dat int64 `json:"dat"` // task.id
}
func PushCallbackEvent(webhook *models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
CallbackEventQueueLock.RLock()
queue := CallbackEventQueue[webhook.Url]
CallbackEventQueueLock.RUnlock()
if queue == nil {
queue = &WebhookQueue{
list: NewSafeListLimited(QueueMaxSize),
closeCh: make(chan struct{}),
}
CallbackEventQueueLock.Lock()
CallbackEventQueue[webhook.Url] = queue
CallbackEventQueueLock.Unlock()
StartConsumer(queue, webhook.Batch, webhook, stats)
}
succ := queue.list.PushFront(event)
if !succ {
logger.Warningf("Write channel(%s) full, current channel size: %d event:%v", webhook.Url, queue.list.Len(), event)
}
}

View File

@@ -6,6 +6,7 @@ import (
"encoding/json"
"io"
"net/http"
"sync"
"time"
"github.com/ccfos/nightingale/v6/alert/astats"
@@ -14,14 +15,19 @@ import (
"github.com/toolkits/pkg/logger"
)
func sendWebhook(webhook *models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) bool {
func sendWebhook(webhook *models.Webhook, event interface{}, stats *astats.Stats) bool {
channel := "webhook"
if webhook.Type == models.RuleCallback {
channel = "callback"
}
conf := webhook
if conf.Url == "" || !conf.Enable {
return false
}
bs, err := json.Marshal(event)
if err != nil {
logger.Errorf("alertingWebhook failed to marshal event:%+v err:%v", event, err)
logger.Errorf("%s alertingWebhook failed to marshal event:%+v err:%v", channel, event, err)
return false
}
@@ -29,7 +35,7 @@ func sendWebhook(webhook *models.Webhook, event *models.AlertCurEvent, stats *as
req, err := http.NewRequest("POST", conf.Url, bf)
if err != nil {
logger.Warningf("alertingWebhook failed to new reques event:%+v err:%v", event, err)
logger.Warningf("%s alertingWebhook failed to new reques event:%s err:%v", channel, string(bs), err)
return true
}
@@ -58,12 +64,12 @@ func sendWebhook(webhook *models.Webhook, event *models.AlertCurEvent, stats *as
},
}
stats.AlertNotifyTotal.WithLabelValues("webhook").Inc()
stats.AlertNotifyTotal.WithLabelValues(channel).Inc()
var resp *http.Response
resp, err = client.Do(req)
if err != nil {
stats.AlertNotifyErrorTotal.WithLabelValues("webhook").Inc()
logger.Errorf("event_webhook_fail, ruleId: [%d], eventId: [%d], event:%+v, url: [%s], error: [%s]", event.RuleId, event.Id, event, conf.Url, err)
stats.AlertNotifyErrorTotal.WithLabelValues(channel).Inc()
logger.Errorf("event_%s_fail, event:%s, url: [%s], error: [%s]", channel, string(bs), conf.Url, err)
return true
}
@@ -74,15 +80,15 @@ func sendWebhook(webhook *models.Webhook, event *models.AlertCurEvent, stats *as
}
if resp.StatusCode == 429 {
logger.Errorf("event_webhook_fail, url: %s, response code: %d, body: %s event:%+v", conf.Url, resp.StatusCode, string(body), event)
logger.Errorf("event_%s_fail, url: %s, response code: %d, body: %s event:%s", channel, conf.Url, resp.StatusCode, string(body), string(bs))
return true
}
logger.Debugf("event_webhook_succ, url: %s, response code: %d, body: %s event:%+v", conf.Url, resp.StatusCode, string(body), event)
logger.Debugf("event_%s_succ, url: %s, response code: %d, body: %s event:%s", channel, conf.Url, resp.StatusCode, string(body), string(bs))
return false
}
func SendWebhooks(webhooks []*models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
func SingleSendWebhooks(webhooks []*models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
for _, conf := range webhooks {
retryCount := 0
for retryCount < 3 {
@@ -95,3 +101,73 @@ func SendWebhooks(webhooks []*models.Webhook, event *models.AlertCurEvent, stats
}
}
}
func BatchSendWebhooks(webhooks []*models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
for _, conf := range webhooks {
logger.Infof("push event:%+v to queue:%v", event, conf)
PushEvent(conf, event, stats)
}
}
var EventQueue = make(map[string]*WebhookQueue)
var CallbackEventQueue = make(map[string]*WebhookQueue)
var CallbackEventQueueLock sync.RWMutex
var EventQueueLock sync.RWMutex
const QueueMaxSize = 100000
type WebhookQueue struct {
list *SafeListLimited
closeCh chan struct{}
}
func PushEvent(webhook *models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
EventQueueLock.RLock()
queue := EventQueue[webhook.Url]
EventQueueLock.RUnlock()
if queue == nil {
queue = &WebhookQueue{
list: NewSafeListLimited(QueueMaxSize),
closeCh: make(chan struct{}),
}
EventQueueLock.Lock()
EventQueue[webhook.Url] = queue
EventQueueLock.Unlock()
StartConsumer(queue, webhook.Batch, webhook, stats)
}
succ := queue.list.PushFront(event)
if !succ {
stats.AlertNotifyErrorTotal.WithLabelValues("push_event_queue").Inc()
logger.Warningf("Write channel(%s) full, current channel size: %d event:%v", webhook.Url, queue.list.Len(), event)
}
}
func StartConsumer(queue *WebhookQueue, popSize int, webhook *models.Webhook, stats *astats.Stats) {
for {
select {
case <-queue.closeCh:
logger.Infof("event queue:%v closed", queue)
return
default:
events := queue.list.PopBack(popSize)
if len(events) == 0 {
time.Sleep(time.Millisecond * 400)
continue
}
retryCount := 0
for retryCount < webhook.RetryCount {
needRetry := sendWebhook(webhook, events, stats)
if !needRetry {
break
}
retryCount++
time.Sleep(time.Second * time.Duration(webhook.RetryInterval) * time.Duration(retryCount))
}
}
}
}

View File

@@ -0,0 +1,111 @@
package sender
import (
"container/list"
"sync"
"github.com/ccfos/nightingale/v6/models"
)
type SafeList struct {
sync.RWMutex
L *list.List
}
func NewSafeList() *SafeList {
return &SafeList{L: list.New()}
}
func (sl *SafeList) PushFront(v interface{}) *list.Element {
sl.Lock()
e := sl.L.PushFront(v)
sl.Unlock()
return e
}
func (sl *SafeList) PushFrontBatch(vs []interface{}) {
sl.Lock()
for _, item := range vs {
sl.L.PushFront(item)
}
sl.Unlock()
}
func (sl *SafeList) PopBack(max int) []*models.AlertCurEvent {
sl.Lock()
count := sl.L.Len()
if count == 0 {
sl.Unlock()
return []*models.AlertCurEvent{}
}
if count > max {
count = max
}
items := make([]*models.AlertCurEvent, 0, count)
for i := 0; i < count; i++ {
item := sl.L.Remove(sl.L.Back())
sample, ok := item.(*models.AlertCurEvent)
if ok {
items = append(items, sample)
}
}
sl.Unlock()
return items
}
func (sl *SafeList) RemoveAll() {
sl.Lock()
sl.L.Init()
sl.Unlock()
}
func (sl *SafeList) Len() int {
sl.RLock()
size := sl.L.Len()
sl.RUnlock()
return size
}
// SafeList with Limited Size
type SafeListLimited struct {
maxSize int
SL *SafeList
}
func NewSafeListLimited(maxSize int) *SafeListLimited {
return &SafeListLimited{SL: NewSafeList(), maxSize: maxSize}
}
func (sll *SafeListLimited) PopBack(max int) []*models.AlertCurEvent {
return sll.SL.PopBack(max)
}
func (sll *SafeListLimited) PushFront(v interface{}) bool {
if sll.SL.Len() >= sll.maxSize {
return false
}
sll.SL.PushFront(v)
return true
}
func (sll *SafeListLimited) PushFrontBatch(vs []interface{}) bool {
if sll.SL.Len() >= sll.maxSize {
return false
}
sll.SL.PushFrontBatch(vs)
return true
}
func (sll *SafeListLimited) RemoveAll() {
sll.SL.RemoveAll()
}
func (sll *SafeListLimited) Len() int {
return sll.SL.Len()
}

View File

@@ -7,7 +7,11 @@ const NOTIFYCONTACT = "notify_contact"
const SMTP = "smtp_config"
const IBEX = "ibex_server"
var GlobalCallback = 0
var RuleCallback = 1
type Webhook struct {
Type int `json:"type"`
Enable bool `json:"enable"`
Url string `json:"url"`
BasicAuthUser string `json:"basic_auth_user"`
@@ -17,6 +21,9 @@ type Webhook struct {
Headers []string `json:"headers_str"`
SkipVerify bool `json:"skip_verify"`
Note string `json:"note"`
RetryCount int `json:"retry_count"`
RetryInterval int `json:"retry_interval"`
Batch int `json:"batch"`
}
type NotifyScript struct {