Compare commits

..

1 Commits

Author SHA1 Message Date
ning
9867a9519c fix: alert with var 2025-11-29 23:40:16 +08:00
194 changed files with 1132 additions and 8834 deletions

View File

@@ -31,9 +31,7 @@
Nightingale is an open-source monitoring project that focuses on alerting. Similar to Grafana, Nightingale also connects with various existing data sources. However, while Grafana emphasizes visualization, Nightingale places greater emphasis on the alerting engine, as well as the processing and distribution of alarms.
> 💡 Nightingale has now officially launched the [MCP-Server](https://github.com/n9e/n9e-mcp-server/). This MCP Server enables AI assistants to interact with the Nightingale API using natural language, facilitating alert management, monitoring, and observability tasks.
>
> The Nightingale project was initially developed and open-sourced by DiDi.inc. On May 11, 2022, it was donated to the Open Source Development Committee of the China Computer Federation (CCF ODTC).
> The Nightingale project was initially developed and open-sourced by DiDi.inc. On May 11, 2022, it was donated to the Open Source Development Committee of the China Computer Federation (CCF ODC).
![](https://n9e.github.io/img/global/arch-bg.png)

View File

@@ -3,7 +3,7 @@
<img src="doc/img/Nightingale_L_V.png" alt="nightingale - cloud native monitoring" width="100" /></a>
</p>
<p align="center">
<b>开源监控告警管理专家</b>
<b>开源告警管理专家</b>
</p>
<p align="center">
@@ -33,8 +33,7 @@
夜莺侧重于监控告警,类似于 Grafana 的数据源集成方式,夜莺也是对接多种既有的数据源,不过 Grafana 侧重于可视化,夜莺则是侧重于告警引擎、告警事件的处理和分发。
> - 💡夜莺正式推出了 [MCP-Server](https://github.com/n9e/n9e-mcp-server/),此 MCP Server 允许 AI 助手通过自然语言与夜莺 API 交互,实现告警管理、监控和可观测性任务
> - 夜莺监控项目,最初由滴滴开发和开源,并于 2022 年 5 月 11 日捐赠予中国计算机学会开源发展技术委员会CCF ODTC为 CCF ODTC 成立后接受捐赠的第一个开源项目。
> 夜莺监控项目,最初由滴滴开发和开源,并于 2022 年 5 月 11 日捐赠予中国计算机学会开源发展技术委员会CCF ODTC为 CCF ODTC 成立后接受捐赠的第一个开源项目
![](https://n9e.github.io/img/global/arch-bg.png)

View File

@@ -79,7 +79,7 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
r := httpx.GinEngine(config.Global.RunMode, config.HTTP,
configCvalCache.PrintBodyPaths, configCvalCache.PrintAccessLog)
rt := router.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors, config.Log.Dir)
rt := router.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors)
if config.Ibex.Enable {
ibex.ServerStart(false, nil, redis, config.HTTP.APIForService.BasicAuth, config.Alert.Heartbeat, &config.CenterApi, r, nil, config.Ibex, config.HTTP.Port)

View File

@@ -8,6 +8,7 @@ import (
"time"
"github.com/ccfos/nightingale/v6/alert/aconf"
"github.com/ccfos/nightingale/v6/alert/common"
"github.com/ccfos/nightingale/v6/alert/queue"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
@@ -98,12 +99,12 @@ func (e *Consumer) consumeOne(event *models.AlertCurEvent) {
e.dispatch.Astats.CounterAlertsTotal.WithLabelValues(event.Cluster, eventType, event.GroupName).Inc()
if err := event.ParseRule("rule_name"); err != nil {
logger.Warningf("alert_eval_%d datasource_%d failed to parse rule name: %v", event.RuleId, event.DatasourceId, err)
logger.Warningf("ruleid:%d failed to parse rule name: %v", event.RuleId, err)
event.RuleName = fmt.Sprintf("failed to parse rule name: %v", err)
}
if err := event.ParseRule("annotations"); err != nil {
logger.Warningf("alert_eval_%d datasource_%d failed to parse annotations: %v", event.RuleId, event.DatasourceId, err)
logger.Warningf("ruleid:%d failed to parse annotations: %v", event.RuleId, err)
event.Annotations = fmt.Sprintf("failed to parse annotations: %v", err)
event.AnnotationsJSON["error"] = event.Annotations
}
@@ -111,7 +112,7 @@ func (e *Consumer) consumeOne(event *models.AlertCurEvent) {
e.queryRecoveryVal(event)
if err := event.ParseRule("rule_note"); err != nil {
logger.Warningf("alert_eval_%d datasource_%d failed to parse rule note: %v", event.RuleId, event.DatasourceId, err)
logger.Warningf("ruleid:%d failed to parse rule note: %v", event.RuleId, err)
event.RuleNote = fmt.Sprintf("failed to parse rule note: %v", err)
}
@@ -130,7 +131,7 @@ func (e *Consumer) persist(event *models.AlertCurEvent) {
var err error
event.Id, err = poster.PostByUrlsWithResp[int64](e.ctx, "/v1/n9e/event-persist", event)
if err != nil {
logger.Errorf("event:%s persist err:%v", event.Hash, err)
logger.Errorf("event:%+v persist err:%v", event, err)
e.dispatch.Astats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", event.DatasourceId), "persist_event", event.GroupName, fmt.Sprintf("%v", event.RuleId)).Inc()
}
return
@@ -138,7 +139,7 @@ func (e *Consumer) persist(event *models.AlertCurEvent) {
err := models.EventPersist(e.ctx, event)
if err != nil {
logger.Errorf("event:%s persist err:%v", event.Hash, err)
logger.Errorf("event%+v persist err:%v", event, err)
e.dispatch.Astats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", event.DatasourceId), "persist_event", event.GroupName, fmt.Sprintf("%v", event.RuleId)).Inc()
}
}
@@ -156,12 +157,12 @@ func (e *Consumer) queryRecoveryVal(event *models.AlertCurEvent) {
promql = strings.TrimSpace(promql)
if promql == "" {
logger.Warningf("alert_eval_%d datasource_%d promql is blank", event.RuleId, event.DatasourceId)
logger.Warningf("rule_eval:%s promql is blank", getKey(event))
return
}
if e.promClients.IsNil(event.DatasourceId) {
logger.Warningf("alert_eval_%d datasource_%d error reader client is nil", event.RuleId, event.DatasourceId)
logger.Warningf("rule_eval:%s error reader client is nil", getKey(event))
return
}
@@ -170,7 +171,7 @@ func (e *Consumer) queryRecoveryVal(event *models.AlertCurEvent) {
var warnings promsdk.Warnings
value, warnings, err := readerClient.Query(e.ctx.Ctx, promql, time.Now())
if err != nil {
logger.Errorf("alert_eval_%d datasource_%d promql:%s, error:%v", event.RuleId, event.DatasourceId, promql, err)
logger.Errorf("rule_eval:%s promql:%s, error:%v", getKey(event), promql, err)
event.AnnotationsJSON["recovery_promql_error"] = fmt.Sprintf("promql:%s error:%v", promql, err)
b, err := json.Marshal(event.AnnotationsJSON)
@@ -184,12 +185,12 @@ func (e *Consumer) queryRecoveryVal(event *models.AlertCurEvent) {
}
if len(warnings) > 0 {
logger.Errorf("alert_eval_%d datasource_%d promql:%s, warnings:%v", event.RuleId, event.DatasourceId, promql, warnings)
logger.Errorf("rule_eval:%s promql:%s, warnings:%v", getKey(event), promql, warnings)
}
anomalyPoints := models.ConvertAnomalyPoints(value)
if len(anomalyPoints) == 0 {
logger.Warningf("alert_eval_%d datasource_%d promql:%s, result is empty", event.RuleId, event.DatasourceId, promql)
logger.Warningf("rule_eval:%s promql:%s, result is empty", getKey(event), promql)
event.AnnotationsJSON["recovery_promql_error"] = fmt.Sprintf("promql:%s error:%s", promql, "result is empty")
} else {
event.AnnotationsJSON["recovery_value"] = fmt.Sprintf("%v", anomalyPoints[0].Value)
@@ -204,3 +205,6 @@ func (e *Consumer) queryRecoveryVal(event *models.AlertCurEvent) {
}
}
func getKey(event *models.AlertCurEvent) string {
return common.RuleKey(event.DatasourceId, event.RuleId)
}

View File

@@ -16,7 +16,6 @@ import (
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/alert/common"
"github.com/ccfos/nightingale/v6/alert/pipeline"
"github.com/ccfos/nightingale/v6/alert/pipeline/engine"
"github.com/ccfos/nightingale/v6/alert/sender"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
@@ -171,7 +170,7 @@ func (e *Dispatch) HandleEventWithNotifyRule(eventOrigin *models.AlertCurEvent)
// 深拷贝新的 event避免并发修改 event 冲突
eventCopy := eventOrigin.DeepCopy()
logger.Infof("notify rule ids: %v, event: %s", notifyRuleId, eventCopy.Hash)
logger.Infof("notify rule ids: %v, event: %+v", notifyRuleId, eventCopy)
notifyRule := e.notifyRuleCache.Get(notifyRuleId)
if notifyRule == nil {
continue
@@ -185,7 +184,7 @@ func (e *Dispatch) HandleEventWithNotifyRule(eventOrigin *models.AlertCurEvent)
eventCopy = HandleEventPipeline(notifyRule.PipelineConfigs, eventOrigin, eventCopy, e.eventProcessorCache, e.ctx, notifyRuleId, "notify_rule")
if ShouldSkipNotify(e.ctx, eventCopy, notifyRuleId) {
logger.Infof("notify_id: %d, event:%s, should skip notify", notifyRuleId, eventCopy.Hash)
logger.Infof("notify_id: %d, event:%+v, should skip notify", notifyRuleId, eventCopy)
continue
}
@@ -193,7 +192,7 @@ func (e *Dispatch) HandleEventWithNotifyRule(eventOrigin *models.AlertCurEvent)
for i := range notifyRule.NotifyConfigs {
err := NotifyRuleMatchCheck(&notifyRule.NotifyConfigs[i], eventCopy)
if err != nil {
logger.Errorf("notify_id: %d, event:%s, channel_id:%d, template_id: %d, notify_config:%+v, err:%v", notifyRuleId, eventCopy.Hash, notifyRule.NotifyConfigs[i].ChannelID, notifyRule.NotifyConfigs[i].TemplateID, notifyRule.NotifyConfigs[i], err)
logger.Errorf("notify_id: %d, event:%+v, channel_id:%d, template_id: %d, notify_config:%+v, err:%v", notifyRuleId, eventCopy, notifyRule.NotifyConfigs[i].ChannelID, notifyRule.NotifyConfigs[i].TemplateID, notifyRule.NotifyConfigs[i], err)
continue
}
@@ -201,12 +200,12 @@ func (e *Dispatch) HandleEventWithNotifyRule(eventOrigin *models.AlertCurEvent)
messageTemplate := e.messageTemplateCache.Get(notifyRule.NotifyConfigs[i].TemplateID)
if notifyChannel == nil {
sender.NotifyRecord(e.ctx, []*models.AlertCurEvent{eventCopy}, notifyRuleId, fmt.Sprintf("notify_channel_id:%d", notifyRule.NotifyConfigs[i].ChannelID), "", "", errors.New("notify_channel not found"))
logger.Warningf("notify_id: %d, event:%s, channel_id:%d, template_id: %d, notify_channel not found", notifyRuleId, eventCopy.Hash, notifyRule.NotifyConfigs[i].ChannelID, notifyRule.NotifyConfigs[i].TemplateID)
logger.Warningf("notify_id: %d, event:%+v, channel_id:%d, template_id: %d, notify_channel not found", notifyRuleId, eventCopy, notifyRule.NotifyConfigs[i].ChannelID, notifyRule.NotifyConfigs[i].TemplateID)
continue
}
if notifyChannel.RequestType != "flashduty" && notifyChannel.RequestType != "pagerduty" && messageTemplate == nil {
logger.Warningf("notify_id: %d, channel_name: %v, event:%s, template_id: %d, message_template not found", notifyRuleId, notifyChannel.Ident, eventCopy.Hash, notifyRule.NotifyConfigs[i].TemplateID)
logger.Warningf("notify_id: %d, channel_name: %v, event:%+v, template_id: %d, message_template not found", notifyRuleId, notifyChannel.Ident, eventCopy, notifyRule.NotifyConfigs[i].TemplateID)
sender.NotifyRecord(e.ctx, []*models.AlertCurEvent{eventCopy}, notifyRuleId, notifyChannel.Name, "", "", errors.New("message_template not found"))
continue
@@ -232,8 +231,6 @@ func shouldSkipNotify(ctx *ctx.Context, event *models.AlertCurEvent, notifyRuleI
}
func HandleEventPipeline(pipelineConfigs []models.PipelineConfig, eventOrigin, event *models.AlertCurEvent, eventProcessorCache *memsto.EventProcessorCacheType, ctx *ctx.Context, id int64, from string) *models.AlertCurEvent {
workflowEngine := engine.NewWorkflowEngine(ctx)
for _, pipelineConfig := range pipelineConfigs {
if !pipelineConfig.Enable {
continue
@@ -241,37 +238,32 @@ func HandleEventPipeline(pipelineConfigs []models.PipelineConfig, eventOrigin, e
eventPipeline := eventProcessorCache.Get(pipelineConfig.PipelineId)
if eventPipeline == nil {
logger.Warningf("processor_by_%s_id:%d pipeline_id:%d, event pipeline not found, event: %s", from, id, pipelineConfig.PipelineId, event.Hash)
logger.Warningf("processor_by_%s_id:%d pipeline_id:%d, event pipeline not found, event: %+v", from, id, pipelineConfig.PipelineId, event)
continue
}
if !PipelineApplicable(eventPipeline, event) {
logger.Debugf("processor_by_%s_id:%d pipeline_id:%d, event pipeline not applicable, event: %s", from, id, pipelineConfig.PipelineId, event.Hash)
logger.Debugf("processor_by_%s_id:%d pipeline_id:%d, event pipeline not applicable, event: %+v", from, id, pipelineConfig.PipelineId, event)
continue
}
// 统一使用工作流引擎执行(兼容线性模式和工作流模式)
triggerCtx := &models.WorkflowTriggerContext{
Mode: models.TriggerModeEvent,
TriggerBy: from + "_" + strconv.FormatInt(id, 10),
}
processors := eventProcessorCache.GetProcessorsById(pipelineConfig.PipelineId)
for _, processor := range processors {
var res string
var err error
logger.Infof("processor_by_%s_id:%d pipeline_id:%d, before processor:%+v, event: %+v", from, id, pipelineConfig.PipelineId, processor, event)
event, res, err = processor.Process(ctx, event)
if event == nil {
logger.Infof("processor_by_%s_id:%d pipeline_id:%d, event dropped, after processor:%+v, event: %+v", from, id, pipelineConfig.PipelineId, processor, eventOrigin)
resultEvent, result, err := workflowEngine.Execute(eventPipeline, event, triggerCtx)
if err != nil {
logger.Errorf("processor_by_%s_id:%d pipeline_id:%d, pipeline execute error: %v", from, id, pipelineConfig.PipelineId, err)
continue
}
if resultEvent == nil {
logger.Infof("processor_by_%s_id:%d pipeline_id:%d, event dropped, event: %s", from, id, pipelineConfig.PipelineId, eventOrigin.Hash)
if from == "notify_rule" {
sender.NotifyRecord(ctx, []*models.AlertCurEvent{eventOrigin}, id, "", "", result.Message, fmt.Errorf("processor_by_%s_id:%d pipeline_id:%d, drop by pipeline", from, id, pipelineConfig.PipelineId))
if from == "notify_rule" {
// alert_rule 获取不到 eventId 记录没有意义
sender.NotifyRecord(ctx, []*models.AlertCurEvent{eventOrigin}, id, "", "", res, fmt.Errorf("processor_by_%s_id:%d pipeline_id:%d, drop by processor", from, id, pipelineConfig.PipelineId))
}
return nil
}
return nil
logger.Infof("processor_by_%s_id:%d pipeline_id:%d, after processor:%+v, event: %+v, res:%v, err:%v", from, id, pipelineConfig.PipelineId, processor, event, res, err)
}
event = resultEvent
logger.Infof("processor_by_%s_id:%d pipeline_id:%d, pipeline executed, status:%s, message:%s", from, id, pipelineConfig.PipelineId, result.Status, result.Message)
}
event.FE2DB()
@@ -290,18 +282,15 @@ func PipelineApplicable(pipeline *models.EventPipeline, event *models.AlertCurEv
tagMatch := true
if len(pipeline.LabelFilters) > 0 {
// Deep copy to avoid concurrent map writes on cached objects
labelFiltersCopy := make([]models.TagFilter, len(pipeline.LabelFilters))
copy(labelFiltersCopy, pipeline.LabelFilters)
for i := range labelFiltersCopy {
if labelFiltersCopy[i].Func == "" {
labelFiltersCopy[i].Func = labelFiltersCopy[i].Op
for i := range pipeline.LabelFilters {
if pipeline.LabelFilters[i].Func == "" {
pipeline.LabelFilters[i].Func = pipeline.LabelFilters[i].Op
}
}
tagFilters, err := models.ParseTagFilter(labelFiltersCopy)
tagFilters, err := models.ParseTagFilter(pipeline.LabelFilters)
if err != nil {
logger.Errorf("pipeline applicable failed to parse tag filter: %v event:%s pipeline:%+v", err, event.Hash, pipeline)
logger.Errorf("pipeline applicable failed to parse tag filter: %v event:%+v pipeline:%+v", err, event, pipeline)
return false
}
tagMatch = common.MatchTags(event.TagsMap, tagFilters)
@@ -309,13 +298,9 @@ func PipelineApplicable(pipeline *models.EventPipeline, event *models.AlertCurEv
attributesMatch := true
if len(pipeline.AttrFilters) > 0 {
// Deep copy to avoid concurrent map writes on cached objects
attrFiltersCopy := make([]models.TagFilter, len(pipeline.AttrFilters))
copy(attrFiltersCopy, pipeline.AttrFilters)
tagFilters, err := models.ParseTagFilter(attrFiltersCopy)
tagFilters, err := models.ParseTagFilter(pipeline.AttrFilters)
if err != nil {
logger.Errorf("pipeline applicable failed to parse tag filter: %v event:%s pipeline:%+v err:%v", tagFilters, event.Hash, pipeline, err)
logger.Errorf("pipeline applicable failed to parse tag filter: %v event:%+v pipeline:%+v err:%v", tagFilters, event, pipeline, err)
return false
}
@@ -394,18 +379,15 @@ func NotifyRuleMatchCheck(notifyConfig *models.NotifyConfig, event *models.Alert
tagMatch := true
if len(notifyConfig.LabelKeys) > 0 {
// Deep copy to avoid concurrent map writes on cached objects
labelKeysCopy := make([]models.TagFilter, len(notifyConfig.LabelKeys))
copy(labelKeysCopy, notifyConfig.LabelKeys)
for i := range labelKeysCopy {
if labelKeysCopy[i].Func == "" {
labelKeysCopy[i].Func = labelKeysCopy[i].Op
for i := range notifyConfig.LabelKeys {
if notifyConfig.LabelKeys[i].Func == "" {
notifyConfig.LabelKeys[i].Func = notifyConfig.LabelKeys[i].Op
}
}
tagFilters, err := models.ParseTagFilter(labelKeysCopy)
tagFilters, err := models.ParseTagFilter(notifyConfig.LabelKeys)
if err != nil {
logger.Errorf("notify send failed to parse tag filter: %v event:%s notify_config:%+v", err, event.Hash, notifyConfig)
logger.Errorf("notify send failed to parse tag filter: %v event:%+v notify_config:%+v", err, event, notifyConfig)
return fmt.Errorf("failed to parse tag filter: %v", err)
}
tagMatch = common.MatchTags(event.TagsMap, tagFilters)
@@ -417,13 +399,9 @@ func NotifyRuleMatchCheck(notifyConfig *models.NotifyConfig, event *models.Alert
attributesMatch := true
if len(notifyConfig.Attributes) > 0 {
// Deep copy to avoid concurrent map writes on cached objects
attributesCopy := make([]models.TagFilter, len(notifyConfig.Attributes))
copy(attributesCopy, notifyConfig.Attributes)
tagFilters, err := models.ParseTagFilter(attributesCopy)
tagFilters, err := models.ParseTagFilter(notifyConfig.Attributes)
if err != nil {
logger.Errorf("notify send failed to parse tag filter: %v event:%s notify_config:%+v err:%v", tagFilters, event.Hash, notifyConfig, err)
logger.Errorf("notify send failed to parse tag filter: %v event:%+v notify_config:%+v err:%v", tagFilters, event, notifyConfig, err)
return fmt.Errorf("failed to parse tag filter: %v", err)
}
@@ -434,7 +412,7 @@ func NotifyRuleMatchCheck(notifyConfig *models.NotifyConfig, event *models.Alert
return fmt.Errorf("event attributes not match attributes filter")
}
logger.Infof("notify send timeMatch:%v severityMatch:%v tagMatch:%v attributesMatch:%v event:%s notify_config:%+v", timeMatch, severityMatch, tagMatch, attributesMatch, event.Hash, notifyConfig)
logger.Infof("notify send timeMatch:%v severityMatch:%v tagMatch:%v attributesMatch:%v event:%+v notify_config:%+v", timeMatch, severityMatch, tagMatch, attributesMatch, event, notifyConfig)
return nil
}
@@ -546,8 +524,8 @@ func SendNotifyRuleMessage(ctx *ctx.Context, userCache *memsto.UserCacheType, us
for i := range flashDutyChannelIDs {
start := time.Now()
respBody, err := notifyChannel.SendFlashDuty(events, flashDutyChannelIDs[i], notifyChannelCache.GetHttpClient(notifyChannel.ID))
respBody = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), respBody)
logger.Infof("duty_sender notify_id: %d, channel_name: %v, event:%s, IntegrationUrl: %v dutychannel_id: %v, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0].Hash, notifyChannel.RequestConfig.FlashDutyRequestConfig.IntegrationUrl, flashDutyChannelIDs[i], respBody, err)
respBody = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), respBody)
logger.Infof("duty_sender notify_id: %d, channel_name: %v, event:%+v, IntegrationUrl: %v dutychannel_id: %v, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0], notifyChannel.RequestConfig.FlashDutyRequestConfig.IntegrationUrl, flashDutyChannelIDs[i], respBody, err)
sender.NotifyRecord(ctx, events, notifyRuleId, notifyChannel.Name, strconv.FormatInt(flashDutyChannelIDs[i], 10), respBody, err)
}
@@ -555,8 +533,8 @@ func SendNotifyRuleMessage(ctx *ctx.Context, userCache *memsto.UserCacheType, us
for _, routingKey := range pagerdutyRoutingKeys {
start := time.Now()
respBody, err := notifyChannel.SendPagerDuty(events, routingKey, siteInfo.SiteUrl, notifyChannelCache.GetHttpClient(notifyChannel.ID))
respBody = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), respBody)
logger.Infof("pagerduty_sender notify_id: %d, channel_name: %v, event:%s, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0].Hash, respBody, err)
respBody = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), respBody)
logger.Infof("pagerduty_sender notify_id: %d, channel_name: %v, event:%+v, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0], respBody, err)
sender.NotifyRecord(ctx, events, notifyRuleId, notifyChannel.Name, "", respBody, err)
}
@@ -586,11 +564,11 @@ func SendNotifyRuleMessage(ctx *ctx.Context, userCache *memsto.UserCacheType, us
case "script":
start := time.Now()
target, res, err := notifyChannel.SendScript(events, tplContent, customParams, sendtos)
res = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), res)
logger.Infof("script_sender notify_id: %d, channel_name: %v, event:%s, tplContent:%s, customParams:%v, target:%s, res:%s, err:%v", notifyRuleId, notifyChannel.Name, events[0].Hash, tplContent, customParams, target, res, err)
res = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), res)
logger.Infof("script_sender notify_id: %d, channel_name: %v, event:%+v, tplContent:%s, customParams:%v, target:%s, res:%s, err:%v", notifyRuleId, notifyChannel.Name, events[0], tplContent, customParams, target, res, err)
sender.NotifyRecord(ctx, events, notifyRuleId, notifyChannel.Name, target, res, err)
default:
logger.Warningf("notify_id: %d, channel_name: %v, event:%s send type not found", notifyRuleId, notifyChannel.Name, events[0].Hash)
logger.Warningf("notify_id: %d, channel_name: %v, event:%+v send type not found", notifyRuleId, notifyChannel.Name, events[0])
}
}
@@ -734,7 +712,7 @@ func (e *Dispatch) Send(rule *models.AlertRule, event *models.AlertCurEvent, not
event = msgCtx.Events[0]
}
logger.Debugf("send to channel:%s event:%s users:%+v", channel, event.Hash, msgCtx.Users)
logger.Debugf("send to channel:%s event:%+v users:%+v", channel, event, msgCtx.Users)
s.Send(msgCtx)
}
}
@@ -833,12 +811,12 @@ func (e *Dispatch) HandleIbex(rule *models.AlertRule, event *models.AlertCurEven
if len(t.Host) == 0 {
sender.CallIbex(e.ctx, t.TplId, event.TargetIdent,
e.taskTplsCache, e.targetCache, e.userCache, event, "")
e.taskTplsCache, e.targetCache, e.userCache, event)
continue
}
for _, host := range t.Host {
sender.CallIbex(e.ctx, t.TplId, host,
e.taskTplsCache, e.targetCache, e.userCache, event, "")
e.taskTplsCache, e.targetCache, e.userCache, event)
}
}
}

View File

@@ -18,11 +18,11 @@ func LogEvent(event *models.AlertCurEvent, location string, err ...error) {
}
logger.Infof(
"alert_eval_%d event(%s %s) %s: sub_id:%d notify_rule_ids:%v cluster:%s %v%s@%d last_eval_time:%d %s",
event.RuleId,
"event(%s %s) %s: rule_id=%d sub_id:%d notify_rule_ids:%v cluster:%s %v%s@%d last_eval_time:%d %s",
event.Hash,
status,
location,
event.RuleId,
event.SubRuleId,
event.NotifyRuleIds,
event.Cluster,

View File

@@ -101,17 +101,17 @@ func (s *Scheduler) syncAlertRules() {
}
ds := s.datasourceCache.GetById(dsId)
if ds == nil {
logger.Debugf("alert_eval_%d datasource %d not found", rule.Id, dsId)
logger.Debugf("datasource %d not found", dsId)
continue
}
if ds.PluginType != ruleType {
logger.Debugf("alert_eval_%d datasource %d category is %s not %s", rule.Id, dsId, ds.PluginType, ruleType)
logger.Debugf("datasource %d category is %s not %s", dsId, ds.PluginType, ruleType)
continue
}
if ds.Status != "enabled" {
logger.Debugf("alert_eval_%d datasource %d status is %s", rule.Id, dsId, ds.Status)
logger.Debugf("datasource %d status is %s", dsId, ds.Status)
continue
}
processor := process.NewProcessor(s.aconf.Heartbeat.EngineName, rule, dsId, s.alertRuleCache, s.targetCache, s.targetsOfAlertRuleCache, s.busiGroupCache, s.alertMuteCache, s.datasourceCache, s.ctx, s.stats)
@@ -134,12 +134,12 @@ func (s *Scheduler) syncAlertRules() {
for _, dsId := range dsIds {
ds := s.datasourceCache.GetById(dsId)
if ds == nil {
logger.Debugf("alert_eval_%d datasource %d not found", rule.Id, dsId)
logger.Debugf("datasource %d not found", dsId)
continue
}
if ds.Status != "enabled" {
logger.Debugf("alert_eval_%d datasource %d status is %s", rule.Id, dsId, ds.Status)
logger.Debugf("datasource %d status is %s", dsId, ds.Status)
continue
}
processor := process.NewProcessor(s.aconf.Heartbeat.EngineName, rule, dsId, s.alertRuleCache, s.targetCache, s.targetsOfAlertRuleCache, s.busiGroupCache, s.alertMuteCache, s.datasourceCache, s.ctx, s.stats)

View File

@@ -11,7 +11,6 @@ import (
"strconv"
"strings"
"sync"
"text/template"
"time"
"github.com/ccfos/nightingale/v6/alert/astats"
@@ -25,7 +24,6 @@ import (
"github.com/ccfos/nightingale/v6/pkg/poster"
promsdk "github.com/ccfos/nightingale/v6/pkg/prom"
promql2 "github.com/ccfos/nightingale/v6/pkg/promql"
"github.com/ccfos/nightingale/v6/pkg/tplx"
"github.com/ccfos/nightingale/v6/pkg/unit"
"github.com/ccfos/nightingale/v6/prom"
"github.com/prometheus/common/model"
@@ -62,7 +60,6 @@ const (
CHECK_QUERY = "check_query_config"
GET_CLIENT = "get_client"
QUERY_DATA = "query_data"
EXEC_TEMPLATE = "exec_template"
)
const (
@@ -109,7 +106,7 @@ func NewAlertRuleWorker(rule *models.AlertRule, datasourceId int64, Processor *p
})
if err != nil {
logger.Errorf("alert_eval_%d datasource_%d add cron pattern error: %v", arw.Rule.Id, arw.DatasourceId, err)
logger.Errorf("alert rule %s add cron pattern error: %v", arw.Key(), err)
}
Processor.ScheduleEntry = arw.Scheduler.Entry(entryID)
@@ -152,9 +149,9 @@ func (arw *AlertRuleWorker) Eval() {
defer func() {
if len(message) == 0 {
logger.Infof("alert_eval_%d datasource_%d finished, duration:%v", arw.Rule.Id, arw.DatasourceId, time.Since(begin))
logger.Infof("rule_eval:%s finished, duration:%v", arw.Key(), time.Since(begin))
} else {
logger.Warningf("alert_eval_%d datasource_%d finished, duration:%v, message:%s", arw.Rule.Id, arw.DatasourceId, time.Since(begin), message)
logger.Infof("rule_eval:%s finished, duration:%v, message:%s", arw.Key(), time.Since(begin), message)
}
}()
@@ -189,7 +186,8 @@ func (arw *AlertRuleWorker) Eval() {
}
if err != nil {
message = fmt.Sprintf("failed to get anomaly points: %v", err)
logger.Errorf("rule_eval:%s get anomaly point err:%s", arw.Key(), err.Error())
message = "failed to get anomaly points"
return
}
@@ -236,7 +234,7 @@ func (arw *AlertRuleWorker) Eval() {
}
func (arw *AlertRuleWorker) Stop() {
logger.Infof("alert_eval_%d datasource_%d stopped", arw.Rule.Id, arw.DatasourceId)
logger.Infof("rule_eval:%s stopped", arw.Key())
close(arw.Quit)
c := arw.Scheduler.Stop()
<-c.Done()
@@ -252,7 +250,7 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
var rule *models.PromRuleConfig
if err := json.Unmarshal([]byte(ruleConfig), &rule); err != nil {
logger.Errorf("alert_eval_%d datasource_%d rule_config:%s, error:%v", arw.Rule.Id, arw.DatasourceId, ruleConfig, err)
logger.Errorf("rule_eval:%s rule_config:%s, error:%v", arw.Key(), ruleConfig, err)
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_RULE_CONFIG, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
fmt.Sprintf("%v", arw.Rule.Id),
@@ -263,7 +261,7 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
}
if rule == nil {
logger.Errorf("alert_eval_%d datasource_%d rule_config:%s, error:rule is nil", arw.Rule.Id, arw.DatasourceId, ruleConfig)
logger.Errorf("rule_eval:%s rule_config:%s, error:rule is nil", arw.Key(), ruleConfig)
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_RULE_CONFIG, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
fmt.Sprintf("%v", arw.Rule.Id),
@@ -278,7 +276,7 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
readerClient := arw.PromClients.GetCli(arw.DatasourceId)
if readerClient == nil {
logger.Warningf("alert_eval_%d datasource_%d error reader client is nil", arw.Rule.Id, arw.DatasourceId)
logger.Warningf("rule_eval:%s error reader client is nil", arw.Key())
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_CLIENT, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
fmt.Sprintf("%v", arw.Rule.Id),
@@ -314,13 +312,13 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
// 无变量
promql := strings.TrimSpace(query.PromQl)
if promql == "" {
logger.Warningf("alert_eval_%d datasource_%d promql is blank", arw.Rule.Id, arw.DatasourceId)
logger.Warningf("rule_eval:%s promql is blank", arw.Key())
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), CHECK_QUERY, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
continue
}
if arw.PromClients.IsNil(arw.DatasourceId) {
logger.Warningf("alert_eval_%d datasource_%d error reader client is nil", arw.Rule.Id, arw.DatasourceId)
logger.Warningf("rule_eval:%s error reader client is nil", arw.Key())
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_CLIENT, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
continue
}
@@ -329,7 +327,7 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
arw.Processor.Stats.CounterQueryDataTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId), fmt.Sprintf("%d", arw.Rule.Id)).Inc()
value, warnings, err := readerClient.Query(context.Background(), promql, time.Now())
if err != nil {
logger.Errorf("alert_eval_%d datasource_%d promql:%s, error:%v", arw.Rule.Id, arw.DatasourceId, promql, err)
logger.Errorf("rule_eval:%s promql:%s, error:%v", arw.Key(), promql, err)
arw.Processor.Stats.CounterQueryDataErrorTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId)).Inc()
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), QUERY_DATA, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
@@ -341,12 +339,12 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
}
if len(warnings) > 0 {
logger.Errorf("alert_eval_%d datasource_%d promql:%s, warnings:%v", arw.Rule.Id, arw.DatasourceId, promql, warnings)
logger.Errorf("rule_eval:%s promql:%s, warnings:%v", arw.Key(), promql, warnings)
arw.Processor.Stats.CounterQueryDataErrorTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId)).Inc()
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), QUERY_DATA, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
}
logger.Infof("alert_eval_%d datasource_%d query:%+v, value:%v", arw.Rule.Id, arw.DatasourceId, query, value)
logger.Infof("rule_eval:%s query:%+v, value:%v", arw.Key(), query, value)
points := models.ConvertAnomalyPoints(value)
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
fmt.Sprintf("%v", arw.Rule.Id),
@@ -388,10 +386,21 @@ type sample struct {
// 每个节点先查询无参数的 query, 即 mem_used_percent{} > curVal, 得到满足值变量的所有结果
// 结果中有满足本节点参数变量的值,加入异常点列表
// 参数变量的值不满足的组合,需要覆盖上层筛选中产生的异常点
// VarFillingAfterQuery 先查询再过滤变量,效率较高,但无法处理有聚合函数导致标签丢失的情况
//
// 修复说明 (Issue #2971):
// 原实现中使用参数变量组合作为 key 存储异常点,导致同一参数值下的多条时序数据互相覆盖。
// 修复方案:
// 1. 同一层内:使用完整的标签 hash 作为 key避免不同时序数据覆盖
// 2. 跨层级时:子层按参数变量组合前缀删除父层的所有相关告警,实现子筛选覆盖父筛选
func (arw *AlertRuleWorker) VarFillingAfterQuery(query models.PromQuery, readerClient promsdk.API) []models.AnomalyPoint {
varToLabel := ExtractVarMapping(query.PromQl)
fullQuery := removeVal(query.PromQl)
// 存储所有的异常点key 为参数变量的组合,可以实现子筛选对上一层筛选的覆盖
// 存储所有的异常点
// key 格式: {参数变量组合}@@{标签hash}
// 这样可以:
// 1. 同层内不同时序数据有不同的 key标签hash不同
// 2. 跨层时可以按参数变量组合前缀删除父层的告警
anomalyPointsMap := make(map[string]models.AnomalyPoint)
// 统一变量配置格式
VarConfigForCalc := &models.ChildVarConfig{
@@ -418,7 +427,16 @@ func (arw *AlertRuleWorker) VarFillingAfterQuery(query models.PromQuery, readerC
})
// 遍历变量配置链表
curNode := VarConfigForCalc
isFirstLayer := true
for curNode != nil {
// 当前层收集到的所有异常点,按参数组合分组
// key: 参数变量组合, value: 该组合下的所有异常点及其完整key
type pointWithKey struct {
point models.AnomalyPoint
fullKey string
}
currentLayerPointsByParam := make(map[string][]pointWithKey)
for _, param := range curNode.ParamVal {
// curQuery 当前节点的无参数 query用于时序库查询
curQuery := fullQuery
@@ -440,14 +458,14 @@ func (arw *AlertRuleWorker) VarFillingAfterQuery(query models.PromQuery, readerC
arw.Processor.Stats.CounterQueryDataTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId), fmt.Sprintf("%d", arw.Rule.Id)).Inc()
value, _, err := readerClient.Query(context.Background(), curQuery, time.Now())
if err != nil {
logger.Errorf("alert_eval_%d datasource_%d promql:%s, error:%v", arw.Rule.Id, arw.DatasourceId, curQuery, err)
logger.Errorf("rule_eval:%s, promql:%s, error:%v", arw.Key(), curQuery, err)
continue
}
seqVals := getSamples(value)
// 得到参数变量的所有组合
paramPermutation, err := arw.getParamPermutation(param, ParamKeys, varToLabel, query.PromQl, readerClient)
if err != nil {
logger.Errorf("alert_eval_%d datasource_%d paramPermutation error:%v", arw.Rule.Id, arw.DatasourceId, err)
logger.Errorf("rule_eval:%s, paramPermutation error:%v", arw.Key(), err)
continue
}
// 判断哪些参数值符合条件
@@ -460,8 +478,13 @@ func (arw *AlertRuleWorker) VarFillingAfterQuery(query models.PromQuery, readerC
curRealQuery = fillVar(curRealQuery, paramKey, val)
}
if _, ok := paramPermutation[strings.Join(cur, JoinMark)]; ok {
anomalyPointsMap[strings.Join(cur, JoinMark)] = models.AnomalyPoint{
paramKey := strings.Join(cur, JoinMark)
if _, ok := paramPermutation[paramKey]; ok {
// 计算标签 hash确保不同的时序数据有不同的 key
tagHash := hash.GetTagHash(seqVals[i].Metric)
fullKey := paramKey + JoinMark + fmt.Sprintf("%d", tagHash)
point := models.AnomalyPoint{
Key: seqVals[i].Metric.String(),
Timestamp: seqVals[i].Timestamp.Unix(),
Value: float64(seqVals[i].Value),
@@ -469,17 +492,44 @@ func (arw *AlertRuleWorker) VarFillingAfterQuery(query models.PromQuery, readerC
Severity: query.Severity,
Query: curRealQuery,
}
// 生成异常点后,删除该参数组合
delete(paramPermutation, strings.Join(cur, JoinMark))
currentLayerPointsByParam[paramKey] = append(currentLayerPointsByParam[paramKey], pointWithKey{point: point, fullKey: fullKey})
}
}
// 剩余的参数组合为本层筛选不产生异常点的组合,需要覆盖上层筛选中产生的异常点
for k, _ := range paramPermutation {
delete(anomalyPointsMap, k)
// 初始化空的参数组合(用于子层覆盖父层的场景)
for paramKey := range paramPermutation {
if _, exists := currentLayerPointsByParam[paramKey]; !exists {
currentLayerPointsByParam[paramKey] = []pointWithKey{}
}
}
}
// 处理当前层的结果
for paramKey, pointsWithKeys := range currentLayerPointsByParam {
if !isFirstLayer {
// 非首层(子层):先删除父层中该参数组合的所有告警
// 这实现了 issue #2433 要求的子筛选覆盖父筛选功能
keysToDelete := make([]string, 0)
for k := range anomalyPointsMap {
// key 格式: {参数组合}@@{标签hash}
// 检查是否以当前参数组合开头(后面跟着 JoinMark
if strings.HasPrefix(k, paramKey+JoinMark) {
keysToDelete = append(keysToDelete, k)
}
}
for _, k := range keysToDelete {
delete(anomalyPointsMap, k)
}
}
// 添加当前层的所有异常点
for _, pwk := range pointsWithKeys {
anomalyPointsMap[pwk.fullKey] = pwk.point
}
}
curNode = curNode.ChildVarConfigs
isFirstLayer = false
}
anomalyPoints := make([]models.AnomalyPoint, 0)
@@ -580,14 +630,14 @@ func (arw *AlertRuleWorker) getParamPermutation(paramVal map[string]models.Param
case "host":
hostIdents, err := arw.getHostIdents(paramQuery)
if err != nil {
logger.Errorf("alert_eval_%d datasource_%d fail to get host idents, error:%v", arw.Rule.Id, arw.DatasourceId, err)
logger.Errorf("rule_eval:%s, fail to get host idents, error:%v", arw.Key(), err)
break
}
params = hostIdents
case "device":
deviceIdents, err := arw.getDeviceIdents(paramQuery)
if err != nil {
logger.Errorf("alert_eval_%d datasource_%d fail to get device idents, error:%v", arw.Rule.Id, arw.DatasourceId, err)
logger.Errorf("rule_eval:%s, fail to get device idents, error:%v", arw.Key(), err)
break
}
params = deviceIdents
@@ -596,12 +646,12 @@ func (arw *AlertRuleWorker) getParamPermutation(paramVal map[string]models.Param
var query []string
err := json.Unmarshal(q, &query)
if err != nil {
logger.Errorf("alert_eval_%d datasource_%d query:%s fail to unmarshalling into string slice, error:%v", arw.Rule.Id, arw.DatasourceId, paramQuery.Query, err)
logger.Errorf("query:%s fail to unmarshalling into string slice, error:%v", paramQuery.Query, err)
}
if len(query) == 0 {
paramsKeyAllLabel, err := getParamKeyAllLabel(varToLabel[paramKey], originPromql, readerClient, arw.DatasourceId, arw.Rule.Id, arw.Processor.Stats)
if err != nil {
logger.Errorf("alert_eval_%d datasource_%d fail to getParamKeyAllLabel, error:%v query:%s", arw.Rule.Id, arw.DatasourceId, err, paramQuery.Query)
logger.Errorf("rule_eval:%s, fail to getParamKeyAllLabel, error:%v query:%s", arw.Key(), err, paramQuery.Query)
}
params = paramsKeyAllLabel
} else {
@@ -615,7 +665,7 @@ func (arw *AlertRuleWorker) getParamPermutation(paramVal map[string]models.Param
return nil, fmt.Errorf("param key: %s, params is empty", paramKey)
}
logger.Infof("alert_eval_%d datasource_%d paramKey: %s, params: %v", arw.Rule.Id, arw.DatasourceId, paramKey, params)
logger.Infof("rule_eval:%s paramKey: %s, params: %v", arw.Key(), paramKey, params)
paramMap[paramKey] = params
}
@@ -766,7 +816,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
var rule *models.HostRuleConfig
if err := json.Unmarshal([]byte(ruleConfig), &rule); err != nil {
logger.Errorf("alert_eval_%d datasource_%d rule_config:%s, error:%v", arw.Rule.Id, arw.DatasourceId, ruleConfig, err)
logger.Errorf("rule_eval:%s rule_config:%s, error:%v", arw.Key(), ruleConfig, err)
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_RULE_CONFIG, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
fmt.Sprintf("%v", arw.Rule.Id),
@@ -777,7 +827,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
}
if rule == nil {
logger.Errorf("alert_eval_%d datasource_%d rule_config:%s, error:rule is nil", arw.Rule.Id, arw.DatasourceId, ruleConfig)
logger.Errorf("rule_eval:%s rule_config:%s, error:rule is nil", arw.Key(), ruleConfig)
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_RULE_CONFIG, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
fmt.Sprintf("%v", arw.Rule.Id),
@@ -800,7 +850,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
// 如果是中心节点, 将不再上报数据的主机 engineName 为空的机器,也加入到 targets 中
missEngineIdents, exists = arw.Processor.TargetsOfAlertRuleCache.Get("", arw.Rule.Id)
if !exists {
logger.Debugf("alert_eval_%d datasource_%d targets not found engineName:%s", arw.Rule.Id, arw.DatasourceId, arw.Processor.EngineName)
logger.Debugf("rule_eval:%s targets not found engineName:%s", arw.Key(), arw.Processor.EngineName)
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), QUERY_DATA, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
}
}
@@ -808,7 +858,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
engineIdents, exists = arw.Processor.TargetsOfAlertRuleCache.Get(arw.Processor.EngineName, arw.Rule.Id)
if !exists {
logger.Warningf("alert_eval_%d datasource_%d targets not found engineName:%s", arw.Rule.Id, arw.DatasourceId, arw.Processor.EngineName)
logger.Warningf("rule_eval:%s targets not found engineName:%s", arw.Key(), arw.Processor.EngineName)
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), QUERY_DATA, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
}
idents = append(idents, engineIdents...)
@@ -835,7 +885,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
"",
).Set(float64(len(missTargets)))
logger.Debugf("alert_eval_%d datasource_%d missTargets:%v", arw.Rule.Id, arw.DatasourceId, missTargets)
logger.Debugf("rule_eval:%s missTargets:%v", arw.Key(), missTargets)
targets := arw.Processor.TargetCache.Gets(missTargets)
for _, target := range targets {
m := make(map[string]string)
@@ -844,7 +894,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
}
m["ident"] = target.Ident
lst = append(lst, models.NewAnomalyPoint(trigger.Type, m, now, float64(now-target.BeatTime), trigger.Severity))
lst = append(lst, models.NewAnomalyPoint(trigger.Type, m, now, float64(now-target.UpdateAt), trigger.Severity))
}
case "offset":
idents, exists := arw.Processor.TargetsOfAlertRuleCache.Get(arw.Processor.EngineName, arw.Rule.Id)
@@ -854,7 +904,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
fmt.Sprintf("%v", arw.Processor.DatasourceId()),
"",
).Set(0)
logger.Warningf("alert_eval_%d datasource_%d targets not found", arw.Rule.Id, arw.DatasourceId)
logger.Warningf("rule_eval:%s targets not found", arw.Key())
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), QUERY_DATA, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
continue
}
@@ -873,7 +923,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
continue
}
if target, exists := targetMap[ident]; exists {
if now-target.BeatTime > 120 {
if now-target.UpdateAt > 120 {
// means this target is not a active host, do not check offset
continue
}
@@ -885,7 +935,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
}
}
logger.Debugf("alert_eval_%d datasource_%d offsetIdents:%v", arw.Rule.Id, arw.DatasourceId, offsetIdents)
logger.Debugf("rule_eval:%s offsetIdents:%v", arw.Key(), offsetIdents)
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
fmt.Sprintf("%v", arw.Rule.Id),
fmt.Sprintf("%v", arw.Processor.DatasourceId()),
@@ -912,7 +962,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
fmt.Sprintf("%v", arw.Processor.DatasourceId()),
"",
).Set(0)
logger.Warningf("alert_eval_%d datasource_%d targets not found", arw.Rule.Id, arw.DatasourceId)
logger.Warningf("rule_eval:%s targets not found", arw.Key())
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), QUERY_DATA, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
continue
}
@@ -924,7 +974,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
missTargets = append(missTargets, ident)
}
}
logger.Debugf("alert_eval_%d datasource_%d missTargets:%v", arw.Rule.Id, arw.DatasourceId, missTargets)
logger.Debugf("rule_eval:%s missTargets:%v", arw.Key(), missTargets)
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
fmt.Sprintf("%v", arw.Rule.Id),
fmt.Sprintf("%v", arw.Processor.DatasourceId()),
@@ -1120,7 +1170,7 @@ func ProcessJoins(ruleId int64, trigger models.Trigger, seriesTagIndexes map[str
// 有 join 条件,按条件依次合并
if len(seriesTagIndexes) < len(trigger.Joins)+1 {
logger.Errorf("alert_eval_%d queries' count: %d not match join condition's count: %d", ruleId, len(seriesTagIndexes), len(trigger.Joins))
logger.Errorf("rule_eval rid:%d queries' count: %d not match join condition's count: %d", ruleId, len(seriesTagIndexes), len(trigger.Joins))
return nil
}
@@ -1156,7 +1206,7 @@ func ProcessJoins(ruleId int64, trigger models.Trigger, seriesTagIndexes map[str
lastRehashed = exclude(curRehashed, lastRehashed)
last = flatten(lastRehashed)
default:
logger.Warningf("alert_eval_%d join type:%s not support", ruleId, trigger.Joins[i].JoinType)
logger.Warningf("rule_eval rid:%d join type:%s not support", ruleId, trigger.Joins[i].JoinType)
}
}
return last
@@ -1230,9 +1280,20 @@ func GetQueryRefAndUnit(query interface{}) (string, string, error) {
// 每个节点先填充参数再进行查询, 即先得到完整的 promql avg(mem_used_percent{host="127.0.0.1"}) > 5
// 再查询得到满足值变量的所有结果加入异常点列表
// 参数变量的值不满足的组合,需要覆盖上层筛选中产生的异常点
//
// 修复说明 (Issue #2971):
// 原实现中使用参数变量组合作为 key 存储异常点,导致同一参数值下的多条时序数据互相覆盖。
// 修复方案:
// 1. 同一层内:使用完整的标签 hash 作为 key避免不同时序数据覆盖
// 2. 跨层级时:子层按参数变量组合删除父层的所有相关告警,实现子筛选覆盖父筛选
func (arw *AlertRuleWorker) VarFillingBeforeQuery(query models.PromQuery, readerClient promsdk.API) []models.AnomalyPoint {
varToLabel := ExtractVarMapping(query.PromQl)
// 存储异常点的 mapkey 为参数变量的组合,可以实现子筛选对上一层筛选的覆盖
// 存储异常点的 map
// key 格式: {参数变量组合}@@{标签hash}
// 这样可以:
// 1. 同层内不同时序数据有不同的 key标签hash不同
// 2. 跨层时可以按参数变量组合前缀删除父层的告警
anomalyPointsMap := sync.Map{}
// 统一变量配置格式
VarConfigForCalc := &models.ChildVarConfig{
@@ -1257,11 +1318,19 @@ func (arw *AlertRuleWorker) VarFillingBeforeQuery(query models.PromQuery, reader
sort.Slice(ParamKeys, func(i, j int) bool {
return ParamKeys[i] < ParamKeys[j]
})
// 遍历变量配置链表
// 遍历变量配置链表(父层 -> 子层)
curNode := VarConfigForCalc
isFirstLayer := true
for curNode != nil {
// 当前层收集到的所有异常点,按参数组合分组
// key: 参数变量组合, value: 该组合下的所有异常点
currentLayerPointsByParam := make(map[string][]models.AnomalyPoint)
var currentLayerMutex sync.Mutex
for _, param := range curNode.ParamVal {
curPromql := query.PromQl
// 取出阈值变量
valMap := make(map[string]string)
for val, valQuery := range param {
@@ -1276,12 +1345,12 @@ func (arw *AlertRuleWorker) VarFillingBeforeQuery(query models.PromQuery, reader
// 得到参数变量的所有组合
paramPermutation, err := arw.getParamPermutation(param, ParamKeys, varToLabel, query.PromQl, readerClient)
if err != nil {
logger.Errorf("alert_eval_%d datasource_%d paramPermutation error:%v", arw.Rule.Id, arw.DatasourceId, err)
logger.Errorf("rule_eval:%s, paramPermutation error:%v", arw.Key(), err)
continue
}
keyToPromql := make(map[string]string)
for paramPermutationKeys, _ := range paramPermutation {
for paramPermutationKeys := range paramPermutation {
realPromql := curPromql
split := strings.Split(paramPermutationKeys, JoinMark)
for j := range ParamKeys {
@@ -1293,47 +1362,90 @@ func (arw *AlertRuleWorker) VarFillingBeforeQuery(query models.PromQuery, reader
// 并发查询
wg := sync.WaitGroup{}
semaphore := make(chan struct{}, 200)
for key, promql := range keyToPromql {
for paramKey, promql := range keyToPromql {
wg.Add(1)
semaphore <- struct{}{}
go func(key, promql string) {
go func(paramKey, promql string) {
defer func() {
<-semaphore
wg.Done()
}()
arw.Processor.Stats.CounterQueryDataTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId), fmt.Sprintf("%d", arw.Rule.Id)).Inc()
arw.Processor.Stats.CounterQueryDataTotal.WithLabelValues(
fmt.Sprintf("%d", arw.DatasourceId),
fmt.Sprintf("%d", arw.Rule.Id),
).Inc()
value, _, err := readerClient.Query(context.Background(), promql, time.Now())
if err != nil {
logger.Errorf("alert_eval_%d datasource_%d promql:%s, error:%v", arw.Rule.Id, arw.DatasourceId, promql, err)
logger.Errorf("rule_eval:%s, promql:%s, error:%v", arw.Key(), promql, err)
return
}
logger.Infof("alert_eval_%d datasource_%d promql:%s, value:%+v", arw.Rule.Id, arw.DatasourceId, promql, value)
logger.Infof("rule_eval:%s, promql:%s, value:%+v", arw.Key(), promql, value)
points := models.ConvertAnomalyPoints(value)
if len(points) == 0 {
anomalyPointsMap.Delete(key)
// 查询无结果时,标记该参数组合需要清除(用于子层覆盖父层)
currentLayerMutex.Lock()
if _, exists := currentLayerPointsByParam[paramKey]; !exists {
currentLayerPointsByParam[paramKey] = []models.AnomalyPoint{}
}
currentLayerMutex.Unlock()
return
}
for i := 0; i < len(points); i++ {
points[i].Severity = query.Severity
points[i].Query = promql
points[i].ValuesUnit = map[string]unit.FormattedValue{
"v": unit.ValueFormatter(query.Unit, 2, points[i].Value),
}
// 每个异常点都需要生成 key子筛选使用 key 覆盖上层筛选,解决 issue https://github.com/ccfos/nightingale/issues/2433 提的问题
var cur []string
for _, paramKey := range ParamKeys {
val := string(points[i].Labels[model.LabelName(varToLabel[paramKey])])
cur = append(cur, val)
}
anomalyPointsMap.Store(strings.Join(cur, JoinMark), points[i])
}
}(key, promql)
// 收集当前层的异常点
currentLayerMutex.Lock()
currentLayerPointsByParam[paramKey] = append(currentLayerPointsByParam[paramKey], points...)
currentLayerMutex.Unlock()
}(paramKey, promql)
}
wg.Wait()
}
// 处理当前层的结果
for paramKey, points := range currentLayerPointsByParam {
if !isFirstLayer {
// 非首层(子层):先删除父层中该参数组合的所有告警
// 这实现了 issue #2433 要求的子筛选覆盖父筛选功能
keysToDelete := make([]string, 0)
anomalyPointsMap.Range(func(k, v any) bool {
keyStr := k.(string)
// key 格式: {参数组合}@@{标签hash}
// 检查是否以当前参数组合开头
if strings.HasPrefix(keyStr, paramKey+JoinMark) {
keysToDelete = append(keysToDelete, keyStr)
}
return true
})
for _, k := range keysToDelete {
anomalyPointsMap.Delete(k)
}
}
// 添加当前层的所有异常点
// 使用 参数组合 + 标签hash 作为 key保证同一参数值下的不同时序数据不会互相覆盖
for _, point := range points {
// 计算标签 hash确保不同的时序数据有不同的 key
tagHash := hash.GetTagHash(point.Labels)
fullKey := paramKey + JoinMark + fmt.Sprintf("%d", tagHash)
anomalyPointsMap.Store(fullKey, point)
}
}
curNode = curNode.ChildVarConfigs
isFirstLayer = false
}
// 收集所有异常点
anomalyPoints := make([]models.AnomalyPoint, 0)
anomalyPointsMap.Range(func(key, value any) bool {
if point, ok := value.(models.AnomalyPoint); ok {
@@ -1341,6 +1453,7 @@ func (arw *AlertRuleWorker) VarFillingBeforeQuery(query models.PromQuery, reader
}
return true
})
return anomalyPoints
}
@@ -1446,7 +1559,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
recoverPoints := []models.AnomalyPoint{}
ruleConfig := strings.TrimSpace(rule.RuleConfig)
if ruleConfig == "" {
logger.Warningf("alert_eval_%d datasource_%d ruleConfig is blank", rule.Id, dsId)
logger.Warningf("rule_eval:%d ruleConfig is blank", rule.Id)
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_RULE_CONFIG, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
fmt.Sprintf("%v", arw.Rule.Id),
@@ -1454,15 +1567,15 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
"",
).Set(0)
return points, recoverPoints, fmt.Errorf("alert_eval_%d datasource_%d ruleConfig is blank", rule.Id, dsId)
return points, recoverPoints, fmt.Errorf("rule_eval:%d ruleConfig is blank", rule.Id)
}
var ruleQuery models.RuleQuery
err := json.Unmarshal([]byte(ruleConfig), &ruleQuery)
if err != nil {
logger.Warningf("alert_eval_%d datasource_%d promql parse error:%s", rule.Id, dsId, err.Error())
logger.Warningf("rule_eval:%d promql parse error:%s", rule.Id, err.Error())
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_RULE_CONFIG, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
return points, recoverPoints, fmt.Errorf("alert_eval_%d datasource_%d promql parse error:%s", rule.Id, dsId, err.Error())
return points, recoverPoints, fmt.Errorf("rule_eval:%d promql parse error:%s", rule.Id, err.Error())
}
arw.Inhibit = ruleQuery.Inhibit
@@ -1474,7 +1587,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
plug, exists := dscache.DsCache.Get(rule.Cate, dsId)
if !exists {
logger.Warningf("alert_eval_%d datasource_%d not exists", rule.Id, dsId)
logger.Warningf("rule_eval rid:%d datasource:%d not exists", rule.Id, dsId)
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_CLIENT, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
@@ -1483,24 +1596,14 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
fmt.Sprintf("%v", i),
).Set(-2)
return points, recoverPoints, fmt.Errorf("alert_eval_%d datasource_%d not exists", rule.Id, dsId)
}
if err = ExecuteQueryTemplate(rule.Cate, query, nil); err != nil {
logger.Warningf("alert_eval_%d datasource_%d execute query template error: %v", rule.Id, dsId, err)
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), EXEC_TEMPLATE, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
fmt.Sprintf("%v", arw.Rule.Id),
fmt.Sprintf("%v", arw.Processor.DatasourceId()),
fmt.Sprintf("%v", i),
).Set(-3)
return points, recoverPoints, fmt.Errorf("rule_eval:%d datasource:%d not exists", rule.Id, dsId)
}
ctx := context.WithValue(context.Background(), "delay", int64(rule.Delay))
series, err := plug.QueryData(ctx, query)
arw.Processor.Stats.CounterQueryDataTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId), fmt.Sprintf("%d", rule.Id)).Inc()
if err != nil {
logger.Warningf("alert_eval_%d datasource_%d query data error: %v", rule.Id, dsId, err)
logger.Warningf("rule_eval rid:%d query data error: %v", rule.Id, err)
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_CLIENT, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
fmt.Sprintf("%v", arw.Rule.Id),
@@ -1508,7 +1611,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
fmt.Sprintf("%v", i),
).Set(-1)
return points, recoverPoints, fmt.Errorf("alert_eval_%d datasource_%d query data error: %v", rule.Id, dsId, err)
return points, recoverPoints, fmt.Errorf("rule_eval:%d query data error: %v", rule.Id, err)
}
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
@@ -1518,7 +1621,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
).Set(float64(len(series)))
// 此条日志很重要,是告警判断的现场值
logger.Infof("alert_eval_%d datasource_%d req:%+v resp:%v", rule.Id, dsId, query, series)
logger.Infof("rule_eval rid:%d req:%+v resp:%v", rule.Id, query, series)
for i := 0; i < len(series); i++ {
seriesHash := hash.GetHash(series[i].Metric, series[i].Ref)
tagHash := hash.GetTagHash(series[i].Metric)
@@ -1532,7 +1635,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
}
ref, err := GetQueryRef(query)
if err != nil {
logger.Warningf("alert_eval_%d datasource_%d query:%+v get ref error:%s", rule.Id, dsId, query, err.Error())
logger.Warningf("rule_eval rid:%d query:%+v get ref error:%s", rule.Id, query, err.Error())
continue
}
seriesTagIndexes[ref] = seriesTagIndex
@@ -1542,7 +1645,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
for _, query := range ruleQuery.Queries {
ref, unit, err := GetQueryRefAndUnit(query)
if err != nil {
logger.Warningf("alert_eval_%d datasource_%d query:%+v get ref and unit error:%s", rule.Id, dsId, query, err.Error())
logger.Warningf("rule_eval rid:%d query:%+v get ref and unit error:%s", rule.Id, query, err.Error())
continue
}
unitMap[ref] = unit
@@ -1565,12 +1668,12 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
for _, seriesHash := range seriesHash {
series, exists := seriesStore[seriesHash]
if !exists {
logger.Warningf("alert_eval_%d datasource_%d series:%+v not found", rule.Id, dsId, series)
logger.Warningf("rule_eval rid:%d series:%+v not found", rule.Id, series)
continue
}
t, v, exists := series.Last()
if !exists {
logger.Warningf("alert_eval_%d datasource_%d series:%+v value not found", rule.Id, dsId, series)
logger.Warningf("rule_eval rid:%d series:%+v value not found", rule.Id, series)
continue
}
@@ -1601,12 +1704,12 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
ts = int64(t)
sample = series
value = v
logger.Infof("alert_eval_%d datasource_%d origin series labels:%+v", rule.Id, dsId, series.Metric)
logger.Infof("rule_eval rid:%d origin series labels:%+v", rule.Id, series.Metric)
}
isTriggered := parser.CalcWithRid(trigger.Exp, m, rule.Id)
// 此条日志很重要,是告警判断的现场值
logger.Infof("alert_eval_%d datasource_%d trigger:%+v exp:%s res:%v m:%v", rule.Id, dsId, trigger, trigger.Exp, isTriggered, m)
logger.Infof("rule_eval rid:%d trigger:%+v exp:%s res:%v m:%v", rule.Id, trigger, trigger.Exp, isTriggered, m)
var values string
for k, v := range m {
@@ -1614,15 +1717,11 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
continue
}
if u, exists := valuesUnitMap[k]; exists { // 配置了单位,优先用配置了单位的值
values += fmt.Sprintf("%s:%s ", k, u.Text)
} else {
switch v.(type) {
case float64:
values += fmt.Sprintf("%s:%.3f ", k, v)
case string:
values += fmt.Sprintf("%s:%s ", k, v)
}
switch v.(type) {
case float64:
values += fmt.Sprintf("%s:%.3f ", k, v)
case string:
values += fmt.Sprintf("%s:%s ", k, v)
}
}
@@ -1679,7 +1778,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
// 检查是否超过 resolve_after 时间
if now-int64(lastTs) > int64(ruleQuery.NodataTrigger.ResolveAfter) {
logger.Infof("alert_eval_%d datasource_%d series:%+v resolve after %d seconds now:%d lastTs:%d", rule.Id, dsId, lastSeries, ruleQuery.NodataTrigger.ResolveAfter, now, int64(lastTs))
logger.Infof("rule_eval rid:%d series:%+v resolve after %d seconds now:%d lastTs:%d", rule.Id, lastSeries, ruleQuery.NodataTrigger.ResolveAfter, now, int64(lastTs))
delete(arw.LastSeriesStore, hash)
continue
}
@@ -1700,7 +1799,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
TriggerType: models.TriggerTypeNodata,
}
points = append(points, point)
logger.Infof("alert_eval_%d datasource_%d nodata point:%+v", rule.Id, dsId, point)
logger.Infof("rule_eval rid:%d nodata point:%+v", rule.Id, point)
}
}
@@ -1715,61 +1814,3 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
return points, recoverPoints, nil
}
// ExecuteQueryTemplate 根据数据源类型对 Query 进行模板渲染处理
// cate: 数据源类别,如 "mysql", "pgsql" 等
// query: 查询对象,如果是数据库类型的数据源,会处理其中的 sql 字段
// data: 模板数据对象,如果为 nil 则使用空结构体(不支持变量渲染),如果不为 nil 则使用传入的数据(支持变量渲染)
func ExecuteQueryTemplate(cate string, query interface{}, data interface{}) error {
// 检查 query 是否是 map且包含 sql 字段
queryMap, ok := query.(map[string]interface{})
if !ok {
return nil
}
sqlVal, exists := queryMap["sql"]
if !exists {
return nil
}
sqlStr, ok := sqlVal.(string)
if !ok {
return nil
}
// 调用 ExecuteSqlTemplate 处理 sql 字段
processedSQL, err := ExecuteSqlTemplate(sqlStr, data)
if err != nil {
return fmt.Errorf("execute sql template error: %w", err)
}
// 更新 query 中的 sql 字段
queryMap["sql"] = processedSQL
return nil
}
// ExecuteSqlTemplate 执行 query 中的 golang 模板语法函数
// query: 要处理的 query 字符串
// data: 模板数据对象,如果为 nil 则使用空结构体(不支持变量渲染),如果不为 nil 则使用传入的数据(支持变量渲染)
func ExecuteSqlTemplate(query string, data interface{}) (string, error) {
if !strings.Contains(query, "{{") || !strings.Contains(query, "}}") {
return query, nil
}
tmpl, err := template.New("query").Funcs(tplx.TemplateFuncMap).Parse(query)
if err != nil {
return "", fmt.Errorf("query tmpl parse error: %w", err)
}
var buf strings.Builder
templateData := data
if templateData == nil {
templateData = struct{}{}
}
if err := tmpl.Execute(&buf, templateData); err != nil {
return "", fmt.Errorf("query tmpl execute error: %w", err)
}
return buf.String(), nil
}

View File

@@ -41,28 +41,8 @@ func IsMuted(rule *models.AlertRule, event *models.AlertCurEvent, targetCache *m
// TimeSpanMuteStrategy 根据规则配置的告警生效时间段过滤,如果产生的告警不在规则配置的告警生效时间段内,则不告警,即被mute
// 时间范围左闭右开默认范围00:00-24:00
// 如果规则配置了时区,则在该时区下进行时间判断;如果时区为空,则使用系统时区
func TimeSpanMuteStrategy(rule *models.AlertRule, event *models.AlertCurEvent) bool {
// 确定使用的时区
var targetLoc *time.Location
var err error
timezone := rule.TimeZone
if timezone == "" {
// 如果时区为空,使用系统时区(保持原有逻辑)
targetLoc = time.Local
} else {
// 加载规则配置的时区
targetLoc, err = time.LoadLocation(timezone)
if err != nil {
// 如果时区加载失败,记录错误并使用系统时区
logger.Warningf("Failed to load timezone %s for rule %d, using system timezone: %v", timezone, rule.Id, err)
targetLoc = time.Local
}
}
// 将触发时间转换到目标时区
tm := time.Unix(event.TriggerTime, 0).In(targetLoc)
tm := time.Unix(event.TriggerTime, 0)
triggerTime := tm.Format("15:04")
triggerWeek := strconv.Itoa(int(tm.Weekday()))
@@ -122,7 +102,7 @@ func IdentNotExistsMuteStrategy(rule *models.AlertRule, event *models.AlertCurEv
// 如果是target_up的告警,且ident已经不存在了,直接过滤掉
// 这里的判断有点太粗暴了,但是目前没有更好的办法
if !exists && strings.Contains(rule.PromQl, "target_up") {
logger.Debugf("alert_eval_%d [IdentNotExistsMuteStrategy] mute: cluster:%s ident:%s", rule.Id, event.Cluster, ident)
logger.Debugf("[%s] mute: rule_eval:%d cluster:%s ident:%s", "IdentNotExistsMuteStrategy", rule.Id, event.Cluster, ident)
return true
}
return false
@@ -144,7 +124,7 @@ func BgNotMatchMuteStrategy(rule *models.AlertRule, event *models.AlertCurEvent,
// 对于包含ident的告警事件check一下ident所属bg和rule所属bg是否相同
// 如果告警规则选择了只在本BG生效那其他BG的机器就不能因此规则产生告警
if exists && !target.MatchGroupId(rule.GroupId) {
logger.Debugf("alert_eval_%d [BgNotMatchMuteStrategy] mute: cluster:%s", rule.Id, event.Cluster)
logger.Debugf("[%s] mute: rule_eval:%d cluster:%s", "BgNotMatchMuteStrategy", rule.Id, event.Cluster)
return true
}
return false

View File

@@ -1,380 +0,0 @@
package engine
import (
"fmt"
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/google/uuid"
"github.com/toolkits/pkg/logger"
)
type WorkflowEngine struct {
ctx *ctx.Context
}
func NewWorkflowEngine(c *ctx.Context) *WorkflowEngine {
return &WorkflowEngine{ctx: c}
}
func (e *WorkflowEngine) Execute(pipeline *models.EventPipeline, event *models.AlertCurEvent, triggerCtx *models.WorkflowTriggerContext) (*models.AlertCurEvent, *models.WorkflowResult, error) {
startTime := time.Now()
wfCtx := e.initWorkflowContext(pipeline, event, triggerCtx)
nodes := pipeline.GetWorkflowNodes()
connections := pipeline.GetWorkflowConnections()
if len(nodes) == 0 {
return event, &models.WorkflowResult{
Event: event,
Status: models.ExecutionStatusSuccess,
Message: "no nodes to execute",
}, nil
}
nodeMap := make(map[string]*models.WorkflowNode)
for i := range nodes {
if nodes[i].RetryInterval == 0 {
nodes[i].RetryInterval = 1
}
if nodes[i].MaxRetries == 0 {
nodes[i].MaxRetries = 1
}
nodeMap[nodes[i].ID] = &nodes[i]
}
result := e.executeDAG(nodeMap, connections, wfCtx)
result.Event = wfCtx.Event
duration := time.Since(startTime).Milliseconds()
if triggerCtx != nil && triggerCtx.Mode != "" {
e.saveExecutionRecord(pipeline, wfCtx, result, triggerCtx, startTime.Unix(), duration)
}
return wfCtx.Event, result, nil
}
func (e *WorkflowEngine) initWorkflowContext(pipeline *models.EventPipeline, event *models.AlertCurEvent, triggerCtx *models.WorkflowTriggerContext) *models.WorkflowContext {
// 合并输入参数
inputs := pipeline.GetInputsMap()
if triggerCtx != nil && triggerCtx.InputsOverrides != nil {
for k, v := range triggerCtx.InputsOverrides {
inputs[k] = v
}
}
metadata := map[string]string{
"start_time": fmt.Sprintf("%d", time.Now().Unix()),
"pipeline_id": fmt.Sprintf("%d", pipeline.ID),
}
// 是否启用流式输出
stream := false
if triggerCtx != nil {
metadata["request_id"] = triggerCtx.RequestID
metadata["trigger_mode"] = triggerCtx.Mode
metadata["trigger_by"] = triggerCtx.TriggerBy
stream = triggerCtx.Stream
}
return &models.WorkflowContext{
Event: event,
Inputs: inputs,
Vars: make(map[string]interface{}), // 初始化空的 Vars供节点间传递数据
Metadata: metadata,
Stream: stream,
}
}
// executeDAG 使用 Kahn 算法执行 DAG
func (e *WorkflowEngine) executeDAG(nodeMap map[string]*models.WorkflowNode, connections models.Connections, wfCtx *models.WorkflowContext) *models.WorkflowResult {
result := &models.WorkflowResult{
Status: models.ExecutionStatusSuccess,
NodeResults: make([]*models.NodeExecutionResult, 0),
Stream: wfCtx.Stream, // 从上下文继承流式输出设置
}
// 计算每个节点的入度
inDegree := make(map[string]int)
for nodeID := range nodeMap {
inDegree[nodeID] = 0
}
// 遍历连接,计算入度
for _, nodeConns := range connections {
for _, targets := range nodeConns.Main {
for _, target := range targets {
inDegree[target.Node]++
}
}
}
// 找到所有入度为 0 的节点(起始节点)
queue := make([]string, 0)
for nodeID, degree := range inDegree {
if degree == 0 {
queue = append(queue, nodeID)
}
}
// 如果没有起始节点,说明存在循环依赖
if len(queue) == 0 && len(nodeMap) > 0 {
result.Status = models.ExecutionStatusFailed
result.Message = "workflow has circular dependency"
return result
}
// 记录已执行的节点
executed := make(map[string]bool)
// 记录节点的分支选择结果
branchResults := make(map[string]*int)
for len(queue) > 0 {
// 取出队首节点
nodeID := queue[0]
queue = queue[1:]
// 检查是否已执行
if executed[nodeID] {
continue
}
node, exists := nodeMap[nodeID]
if !exists {
continue
}
// 执行节点
nodeResult, nodeOutput := e.executeNode(node, wfCtx)
result.NodeResults = append(result.NodeResults, nodeResult)
if nodeOutput != nil && nodeOutput.Stream && nodeOutput.StreamChan != nil {
// 流式输出节点通常是最后一个节点
// 直接传递 StreamChan 给 WorkflowResult不阻塞等待
result.Stream = true
result.StreamChan = nodeOutput.StreamChan
result.Event = wfCtx.Event
result.Status = "streaming"
result.Message = fmt.Sprintf("streaming output from node: %s", node.Name)
// 更新节点状态为 streaming
nodeResult.Status = "streaming"
nodeResult.Message = "streaming in progress"
// 立即返回,让 API 层处理流式响应
return result
}
executed[nodeID] = true
// 保存分支结果
if nodeResult.BranchIndex != nil {
branchResults[nodeID] = nodeResult.BranchIndex
}
// 检查执行状态
if nodeResult.Status == "failed" {
if !node.ContinueOnFail {
result.Status = models.ExecutionStatusFailed
result.ErrorNode = nodeID
result.Message = fmt.Sprintf("node %s failed: %s", node.Name, nodeResult.Error)
}
}
// 检查是否终止
if nodeResult.Status == "terminated" {
result.Message = fmt.Sprintf("workflow terminated at node %s", node.Name)
return result
}
// 更新后继节点的入度
if nodeConns, ok := connections[nodeID]; ok {
for outputIndex, targets := range nodeConns.Main {
// 检查是否应该走这个分支
if !e.shouldFollowBranch(nodeID, outputIndex, branchResults) {
continue
}
for _, target := range targets {
inDegree[target.Node]--
if inDegree[target.Node] == 0 {
queue = append(queue, target.Node)
}
}
}
}
}
return result
}
// executeNode 执行单个节点
// 返回:节点执行结果、节点输出(用于流式输出检测)
func (e *WorkflowEngine) executeNode(node *models.WorkflowNode, wfCtx *models.WorkflowContext) (*models.NodeExecutionResult, *models.NodeOutput) {
startTime := time.Now()
nodeResult := &models.NodeExecutionResult{
NodeID: node.ID,
NodeName: node.Name,
NodeType: node.Type,
StartedAt: startTime.Unix(),
}
var nodeOutput *models.NodeOutput
// 跳过禁用的节点
if node.Disabled {
nodeResult.Status = "skipped"
nodeResult.Message = "node is disabled"
nodeResult.FinishedAt = time.Now().Unix()
nodeResult.DurationMs = time.Since(startTime).Milliseconds()
return nodeResult, nil
}
// 获取处理器
processor, err := models.GetProcessorByType(node.Type, node.Config)
if err != nil {
nodeResult.Status = "failed"
nodeResult.Error = fmt.Sprintf("failed to get processor: %v", err)
nodeResult.FinishedAt = time.Now().Unix()
nodeResult.DurationMs = time.Since(startTime).Milliseconds()
return nodeResult, nil
}
// 执行处理器(带重试)
var retries int
maxRetries := node.MaxRetries
if !node.RetryOnFail {
maxRetries = 0
}
for retries <= maxRetries {
// 检查是否为分支处理器
if branchProcessor, ok := processor.(models.BranchProcessor); ok {
output, err := branchProcessor.ProcessWithBranch(e.ctx, wfCtx)
if err != nil {
if retries < maxRetries {
retries++
time.Sleep(time.Duration(node.RetryInterval) * time.Second)
continue
}
nodeResult.Status = "failed"
nodeResult.Error = err.Error()
} else {
nodeResult.Status = "success"
if output != nil {
nodeOutput = output
if output.WfCtx != nil {
wfCtx = output.WfCtx
}
nodeResult.Message = output.Message
nodeResult.BranchIndex = output.BranchIndex
if output.Terminate {
nodeResult.Status = "terminated"
}
}
}
break
}
// 普通处理器
newWfCtx, msg, err := processor.Process(e.ctx, wfCtx)
if err != nil {
if retries < maxRetries {
retries++
time.Sleep(time.Duration(node.RetryInterval) * time.Second)
continue
}
nodeResult.Status = "failed"
nodeResult.Error = err.Error()
} else {
nodeResult.Status = "success"
nodeResult.Message = msg
if newWfCtx != nil {
wfCtx = newWfCtx
// 检测流式输出标记
if newWfCtx.Stream && newWfCtx.StreamChan != nil {
nodeOutput = &models.NodeOutput{
WfCtx: newWfCtx,
Message: msg,
Stream: true,
StreamChan: newWfCtx.StreamChan,
}
}
}
// 如果事件被 drop返回 nil 或 Event 为 nil标记为终止
if newWfCtx == nil || newWfCtx.Event == nil {
nodeResult.Status = "terminated"
nodeResult.Message = msg
}
}
break
}
nodeResult.FinishedAt = time.Now().Unix()
nodeResult.DurationMs = time.Since(startTime).Milliseconds()
logger.Infof("workflow: executed node %s (type=%s) status=%s msg=%s duration=%dms",
node.Name, node.Type, nodeResult.Status, nodeResult.Message, nodeResult.DurationMs)
return nodeResult, nodeOutput
}
// shouldFollowBranch 判断是否应该走某个分支
func (e *WorkflowEngine) shouldFollowBranch(nodeID string, outputIndex int, branchResults map[string]*int) bool {
branchIndex, hasBranch := branchResults[nodeID]
if !hasBranch {
// 没有分支结果,说明不是分支节点,只走第一个输出
return outputIndex == 0
}
if branchIndex == nil {
// branchIndex 为 nil走默认分支通常是最后一个
return true
}
// 只走选中的分支
return outputIndex == *branchIndex
}
func (e *WorkflowEngine) saveExecutionRecord(pipeline *models.EventPipeline, wfCtx *models.WorkflowContext, result *models.WorkflowResult, triggerCtx *models.WorkflowTriggerContext, startTime int64, duration int64) {
executionID := triggerCtx.RequestID
if executionID == "" {
executionID = uuid.New().String()
}
execution := &models.EventPipelineExecution{
ID: executionID,
PipelineID: pipeline.ID,
PipelineName: pipeline.Name,
Mode: triggerCtx.Mode,
Status: result.Status,
ErrorMessage: result.Message,
ErrorNode: result.ErrorNode,
CreatedAt: startTime,
FinishedAt: time.Now().Unix(),
DurationMs: duration,
TriggerBy: triggerCtx.TriggerBy,
}
if wfCtx.Event != nil {
execution.EventID = wfCtx.Event.Id
}
if err := execution.SetNodeResults(result.NodeResults); err != nil {
logger.Errorf("workflow: failed to set node results: pipeline_id=%d, error=%v", pipeline.ID, err)
}
if err := execution.SetInputsSnapshot(wfCtx.Inputs); err != nil {
logger.Errorf("workflow: failed to set inputs snapshot: pipeline_id=%d, error=%v", pipeline.ID, err)
}
if err := models.CreateEventPipelineExecution(e.ctx, execution); err != nil {
logger.Errorf("workflow: failed to save execution record: pipeline_id=%d, error=%v", pipeline.ID, err)
}
}

View File

@@ -5,7 +5,6 @@ import (
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/callback"
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/eventdrop"
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/eventupdate"
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/logic"
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/relabel"
)

View File

@@ -55,24 +55,23 @@ func (c *AISummaryConfig) Init(settings interface{}) (models.Processor, error) {
return result, err
}
func (c *AISummaryConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
event := wfCtx.Event
func (c *AISummaryConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) (*models.AlertCurEvent, string, error) {
if c.Client == nil {
if err := c.initHTTPClient(); err != nil {
return wfCtx, "", fmt.Errorf("failed to initialize HTTP client: %v processor: %v", err, c)
return event, "", fmt.Errorf("failed to initialize HTTP client: %v processor: %v", err, c)
}
}
// 准备告警事件信息
eventInfo, err := c.prepareEventInfo(wfCtx)
eventInfo, err := c.prepareEventInfo(event)
if err != nil {
return wfCtx, "", fmt.Errorf("failed to prepare event info: %v processor: %v", err, c)
return event, "", fmt.Errorf("failed to prepare event info: %v processor: %v", err, c)
}
// 调用AI模型生成总结
summary, err := c.generateAISummary(eventInfo)
if err != nil {
return wfCtx, "", fmt.Errorf("failed to generate AI summary: %v processor: %v", err, c)
return event, "", fmt.Errorf("failed to generate AI summary: %v processor: %v", err, c)
}
// 将总结添加到annotations字段
@@ -84,11 +83,11 @@ func (c *AISummaryConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContex
// 更新Annotations字段
b, err := json.Marshal(event.AnnotationsJSON)
if err != nil {
return wfCtx, "", fmt.Errorf("failed to marshal annotations: %v processor: %v", err, c)
return event, "", fmt.Errorf("failed to marshal annotations: %v processor: %v", err, c)
}
event.Annotations = string(b)
return wfCtx, "", nil
return event, "", nil
}
func (c *AISummaryConfig) initHTTPClient() error {
@@ -111,10 +110,9 @@ func (c *AISummaryConfig) initHTTPClient() error {
return nil
}
func (c *AISummaryConfig) prepareEventInfo(wfCtx *models.WorkflowContext) (string, error) {
func (c *AISummaryConfig) prepareEventInfo(event *models.AlertCurEvent) (string, error) {
var defs = []string{
"{{$event := .Event}}",
"{{$inputs := .Inputs}}",
"{{$event := .}}",
}
text := strings.Join(append(defs, c.PromptTemplate), "")
@@ -124,7 +122,7 @@ func (c *AISummaryConfig) prepareEventInfo(wfCtx *models.WorkflowContext) (strin
}
var body bytes.Buffer
err = t.Execute(&body, wfCtx)
err = t.Execute(&body, event)
if err != nil {
return "", fmt.Errorf("failed to execute prompt template: %v", err)
}

View File

@@ -42,14 +42,8 @@ func TestAISummaryConfig_Process(t *testing.T) {
},
}
// 创建 WorkflowContext
wfCtx := &models.WorkflowContext{
Event: event,
Inputs: map[string]string{},
}
// 测试模板处理
eventInfo, err := config.prepareEventInfo(wfCtx)
eventInfo, err := config.prepareEventInfo(event)
assert.NoError(t, err)
assert.Contains(t, eventInfo, "Test Rule")
assert.Contains(t, eventInfo, "1")
@@ -60,18 +54,18 @@ func TestAISummaryConfig_Process(t *testing.T) {
assert.NotNil(t, processor)
// 测试处理函数
result, _, err := processor.Process(&ctx.Context{}, wfCtx)
result, _, err := processor.Process(&ctx.Context{}, event)
assert.NoError(t, err)
assert.NotNil(t, result)
assert.NotEmpty(t, result.Event.AnnotationsJSON["ai_summary"])
assert.NotEmpty(t, result.AnnotationsJSON["ai_summary"])
// 展示处理结果
t.Log("\n=== 处理结果 ===")
t.Logf("告警规则: %s", result.Event.RuleName)
t.Logf("严重程度: %d", result.Event.Severity)
t.Logf("标签: %v", result.Event.TagsMap)
t.Logf("原始注释: %v", result.Event.AnnotationsJSON["description"])
t.Logf("AI总结: %s", result.Event.AnnotationsJSON["ai_summary"])
t.Logf("告警规则: %s", result.RuleName)
t.Logf("严重程度: %d", result.Severity)
t.Logf("标签: %v", result.TagsMap)
t.Logf("原始注释: %v", result.AnnotationsJSON["description"])
t.Logf("AI总结: %s", result.AnnotationsJSON["ai_summary"])
}
func TestConvertCustomParam(t *testing.T) {

View File

@@ -11,7 +11,6 @@ import (
"time"
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/common"
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/utils"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/toolkits/pkg/logger"
@@ -44,8 +43,7 @@ func (c *CallbackConfig) Init(settings interface{}) (models.Processor, error) {
return result, err
}
func (c *CallbackConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
event := wfCtx.Event
func (c *CallbackConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) (*models.AlertCurEvent, string, error) {
if c.Client == nil {
transport := &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: c.SkipSSLVerify},
@@ -54,7 +52,7 @@ func (c *CallbackConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext
if c.Proxy != "" {
proxyURL, err := url.Parse(c.Proxy)
if err != nil {
return wfCtx, "", fmt.Errorf("failed to parse proxy url: %v processor: %v", err, c)
return event, "", fmt.Errorf("failed to parse proxy url: %v processor: %v", err, c)
} else {
transport.Proxy = http.ProxyURL(proxyURL)
}
@@ -72,19 +70,14 @@ func (c *CallbackConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext
headers[k] = v
}
url, err := utils.TplRender(wfCtx, c.URL)
if err != nil {
return wfCtx, "", fmt.Errorf("failed to render url template: %v processor: %v", err, c)
}
body, err := json.Marshal(event)
if err != nil {
return wfCtx, "", fmt.Errorf("failed to marshal event: %v processor: %v", err, c)
return event, "", fmt.Errorf("failed to marshal event: %v processor: %v", err, c)
}
req, err := http.NewRequest("POST", url, strings.NewReader(string(body)))
req, err := http.NewRequest("POST", c.URL, strings.NewReader(string(body)))
if err != nil {
return wfCtx, "", fmt.Errorf("failed to create request: %v processor: %v", err, c)
return event, "", fmt.Errorf("failed to create request: %v processor: %v", err, c)
}
for k, v := range headers {
@@ -97,14 +90,14 @@ func (c *CallbackConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext
resp, err := c.Client.Do(req)
if err != nil {
return wfCtx, "", fmt.Errorf("failed to send request: %v processor: %v", err, c)
return event, "", fmt.Errorf("failed to send request: %v processor: %v", err, c)
}
b, err := io.ReadAll(resp.Body)
if err != nil {
return wfCtx, "", fmt.Errorf("failed to read response body: %v processor: %v", err, c)
return event, "", fmt.Errorf("failed to read response body: %v processor: %v", err, c)
}
logger.Debugf("callback processor response body: %s", string(b))
return wfCtx, "callback success", nil
return event, "callback success", nil
}

View File

@@ -26,38 +26,35 @@ func (c *EventDropConfig) Init(settings interface{}) (models.Processor, error) {
return result, err
}
func (c *EventDropConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
func (c *EventDropConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) (*models.AlertCurEvent, string, error) {
// 使用背景是可以根据此处理器,实现对事件进行更加灵活的过滤的逻辑
// 在标签过滤和属性过滤都不满足需求时可以使用
// 如果模板执行结果为 true则删除该事件
event := wfCtx.Event
var defs = []string{
"{{ $event := .Event }}",
"{{ $labels := .Event.TagsMap }}",
"{{ $value := .Event.TriggerValue }}",
"{{ $inputs := .Inputs }}",
"{{ $event := . }}",
"{{ $labels := .TagsMap }}",
"{{ $value := .TriggerValue }}",
}
text := strings.Join(append(defs, c.Content), "")
tpl, err := texttemplate.New("eventdrop").Funcs(tplx.TemplateFuncMap).Parse(text)
if err != nil {
return wfCtx, "", fmt.Errorf("processor failed to parse template: %v processor: %v", err, c)
return event, "", fmt.Errorf("processor failed to parse template: %v processor: %v", err, c)
}
var body bytes.Buffer
if err = tpl.Execute(&body, wfCtx); err != nil {
return wfCtx, "", fmt.Errorf("processor failed to execute template: %v processor: %v", err, c)
if err = tpl.Execute(&body, event); err != nil {
return event, "", fmt.Errorf("processor failed to execute template: %v processor: %v", err, c)
}
result := strings.TrimSpace(body.String())
logger.Infof("processor eventdrop result: %v", result)
if result == "true" {
wfCtx.Event = nil
logger.Infof("processor eventdrop drop event: %s", event.Hash)
return wfCtx, "drop event success", nil
logger.Infof("processor eventdrop drop event: %v", event)
return nil, "drop event success", nil
}
return wfCtx, "drop event failed", nil
return event, "drop event failed", nil
}

View File

@@ -31,8 +31,7 @@ func (c *EventUpdateConfig) Init(settings interface{}) (models.Processor, error)
return result, err
}
func (c *EventUpdateConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
event := wfCtx.Event
func (c *EventUpdateConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) (*models.AlertCurEvent, string, error) {
if c.Client == nil {
transport := &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: c.SkipSSLVerify},
@@ -41,7 +40,7 @@ func (c *EventUpdateConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowCont
if c.Proxy != "" {
proxyURL, err := url.Parse(c.Proxy)
if err != nil {
return wfCtx, "", fmt.Errorf("failed to parse proxy url: %v processor: %v", err, c)
return event, "", fmt.Errorf("failed to parse proxy url: %v processor: %v", err, c)
} else {
transport.Proxy = http.ProxyURL(proxyURL)
}
@@ -61,12 +60,12 @@ func (c *EventUpdateConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowCont
body, err := json.Marshal(event)
if err != nil {
return wfCtx, "", fmt.Errorf("failed to marshal event: %v processor: %v", err, c)
return event, "", fmt.Errorf("failed to marshal event: %v processor: %v", err, c)
}
req, err := http.NewRequest("POST", c.URL, strings.NewReader(string(body)))
if err != nil {
return wfCtx, "", fmt.Errorf("failed to create request: %v processor: %v", err, c)
return event, "", fmt.Errorf("failed to create request: %v processor: %v", err, c)
}
for k, v := range headers {
@@ -79,7 +78,7 @@ func (c *EventUpdateConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowCont
resp, err := c.Client.Do(req)
if err != nil {
return wfCtx, "", fmt.Errorf("failed to send request: %v processor: %v", err, c)
return event, "", fmt.Errorf("failed to send request: %v processor: %v", err, c)
}
b, err := io.ReadAll(resp.Body)
@@ -90,8 +89,8 @@ func (c *EventUpdateConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowCont
err = json.Unmarshal(b, &event)
if err != nil {
return wfCtx, "", fmt.Errorf("failed to unmarshal response body: %v processor: %v", err, c)
return event, "", fmt.Errorf("failed to unmarshal response body: %v processor: %v", err, c)
}
return wfCtx, "", nil
return event, "", nil
}

View File

@@ -1,197 +0,0 @@
package logic
import (
"bytes"
"fmt"
"strings"
"text/template"
alertCommon "github.com/ccfos/nightingale/v6/alert/common"
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/common"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/tplx"
)
// 判断模式常量
const (
ConditionModeExpression = "expression" // 表达式模式(默认)
ConditionModeTags = "tags" // 标签/属性模式
)
// IfConfig If 条件处理器配置
type IfConfig struct {
// 判断模式expression表达式或 tags标签/属性)
Mode string `json:"mode,omitempty"`
// 表达式模式配置
// 条件表达式(支持 Go 模板语法)
// 例如:{{ if eq .Severity 1 }}true{{ end }}
Condition string `json:"condition,omitempty"`
// 标签/属性模式配置
LabelKeys []models.TagFilter `json:"label_keys,omitempty"` // 适用标签
Attributes []models.TagFilter `json:"attributes,omitempty"` // 适用属性
// 内部使用,解析后的过滤器
parsedLabelKeys []models.TagFilter `json:"-"`
parsedAttributes []models.TagFilter `json:"-"`
}
func init() {
models.RegisterProcessor("logic.if", &IfConfig{})
}
func (c *IfConfig) Init(settings interface{}) (models.Processor, error) {
result, err := common.InitProcessor[*IfConfig](settings)
if err != nil {
return nil, err
}
// 解析标签过滤器
if len(result.LabelKeys) > 0 {
// Deep copy to avoid concurrent map writes on cached objects
labelKeysCopy := make([]models.TagFilter, len(result.LabelKeys))
copy(labelKeysCopy, result.LabelKeys)
for i := range labelKeysCopy {
if labelKeysCopy[i].Func == "" {
labelKeysCopy[i].Func = labelKeysCopy[i].Op
}
}
result.parsedLabelKeys, err = models.ParseTagFilter(labelKeysCopy)
if err != nil {
return nil, fmt.Errorf("failed to parse label_keys: %v", err)
}
}
// 解析属性过滤器
if len(result.Attributes) > 0 {
// Deep copy to avoid concurrent map writes on cached objects
attributesCopy := make([]models.TagFilter, len(result.Attributes))
copy(attributesCopy, result.Attributes)
for i := range attributesCopy {
if attributesCopy[i].Func == "" {
attributesCopy[i].Func = attributesCopy[i].Op
}
}
result.parsedAttributes, err = models.ParseTagFilter(attributesCopy)
if err != nil {
return nil, fmt.Errorf("failed to parse attributes: %v", err)
}
}
return result, nil
}
// Process 实现 Processor 接口(兼容旧模式)
func (c *IfConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
result, err := c.evaluateCondition(wfCtx)
if err != nil {
return wfCtx, "", fmt.Errorf("if processor: failed to evaluate condition: %v", err)
}
if result {
return wfCtx, "condition matched (true branch)", nil
}
return wfCtx, "condition not matched (false branch)", nil
}
// ProcessWithBranch 实现 BranchProcessor 接口
func (c *IfConfig) ProcessWithBranch(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.NodeOutput, error) {
result, err := c.evaluateCondition(wfCtx)
if err != nil {
return nil, fmt.Errorf("if processor: failed to evaluate condition: %v", err)
}
output := &models.NodeOutput{
WfCtx: wfCtx,
}
if result {
// 条件为 true走输出 0true 分支)
branchIndex := 0
output.BranchIndex = &branchIndex
output.Message = "condition matched (true branch)"
} else {
// 条件为 false走输出 1false 分支)
branchIndex := 1
output.BranchIndex = &branchIndex
output.Message = "condition not matched (false branch)"
}
return output, nil
}
// evaluateCondition 评估条件
func (c *IfConfig) evaluateCondition(wfCtx *models.WorkflowContext) (bool, error) {
mode := c.Mode
if mode == "" {
mode = ConditionModeExpression // 默认表达式模式
}
switch mode {
case ConditionModeTags:
return c.evaluateTagsCondition(wfCtx.Event)
default:
return c.evaluateExpressionCondition(wfCtx)
}
}
// evaluateExpressionCondition 评估表达式条件
func (c *IfConfig) evaluateExpressionCondition(wfCtx *models.WorkflowContext) (bool, error) {
if c.Condition == "" {
return true, nil
}
// 构建模板数据
var defs = []string{
"{{ $event := .Event }}",
"{{ $labels := .Event.TagsMap }}",
"{{ $value := .Event.TriggerValue }}",
"{{ $inputs := .Inputs }}",
}
text := strings.Join(append(defs, c.Condition), "")
tpl, err := template.New("if_condition").Funcs(tplx.TemplateFuncMap).Parse(text)
if err != nil {
return false, err
}
var buf bytes.Buffer
if err = tpl.Execute(&buf, wfCtx); err != nil {
return false, err
}
result := strings.TrimSpace(strings.ToLower(buf.String()))
return result == "true" || result == "1", nil
}
// evaluateTagsCondition 评估标签/属性条件
func (c *IfConfig) evaluateTagsCondition(event *models.AlertCurEvent) (bool, error) {
// 如果没有配置任何过滤条件,默认返回 true
if len(c.parsedLabelKeys) == 0 && len(c.parsedAttributes) == 0 {
return true, nil
}
// 匹配标签 (TagsMap)
if len(c.parsedLabelKeys) > 0 {
tagsMap := event.TagsMap
if tagsMap == nil {
tagsMap = make(map[string]string)
}
if !alertCommon.MatchTags(tagsMap, c.parsedLabelKeys) {
return false, nil
}
}
// 匹配属性 (JsonTagsAndValue - 所有 JSON 字段)
if len(c.parsedAttributes) > 0 {
attributesMap := event.JsonTagsAndValue()
if !alertCommon.MatchTags(attributesMap, c.parsedAttributes) {
return false, nil
}
}
return true, nil
}

View File

@@ -1,224 +0,0 @@
package logic
import (
"bytes"
"fmt"
"strings"
"text/template"
alertCommon "github.com/ccfos/nightingale/v6/alert/common"
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/common"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/tplx"
)
// SwitchCase Switch 分支定义
type SwitchCase struct {
// 判断模式expression表达式或 tags标签/属性)
Mode string `json:"mode,omitempty"`
// 表达式模式配置
// 条件表达式(支持 Go 模板语法)
Condition string `json:"condition,omitempty"`
// 标签/属性模式配置
LabelKeys []models.TagFilter `json:"label_keys,omitempty"` // 适用标签
Attributes []models.TagFilter `json:"attributes,omitempty"` // 适用属性
// 分支名称(可选,用于日志)
Name string `json:"name,omitempty"`
// 内部使用,解析后的过滤器
parsedLabelKeys []models.TagFilter `json:"-"`
parsedAttributes []models.TagFilter `json:"-"`
}
// SwitchConfig Switch 多分支处理器配置
type SwitchConfig struct {
// 分支条件列表
// 按顺序匹配,第一个为 true 的分支将被选中
Cases []SwitchCase `json:"cases"`
// 是否允许多个分支同时匹配(默认 false只走第一个匹配的
AllowMultiple bool `json:"allow_multiple,omitempty"`
}
func init() {
models.RegisterProcessor("logic.switch", &SwitchConfig{})
}
func (c *SwitchConfig) Init(settings interface{}) (models.Processor, error) {
result, err := common.InitProcessor[*SwitchConfig](settings)
if err != nil {
return nil, err
}
// 解析每个 case 的标签和属性过滤器
for i := range result.Cases {
if len(result.Cases[i].LabelKeys) > 0 {
// Deep copy to avoid concurrent map writes on cached objects
labelKeysCopy := make([]models.TagFilter, len(result.Cases[i].LabelKeys))
copy(labelKeysCopy, result.Cases[i].LabelKeys)
for j := range labelKeysCopy {
if labelKeysCopy[j].Func == "" {
labelKeysCopy[j].Func = labelKeysCopy[j].Op
}
}
result.Cases[i].parsedLabelKeys, err = models.ParseTagFilter(labelKeysCopy)
if err != nil {
return nil, fmt.Errorf("failed to parse label_keys for case[%d]: %v", i, err)
}
}
if len(result.Cases[i].Attributes) > 0 {
// Deep copy to avoid concurrent map writes on cached objects
attributesCopy := make([]models.TagFilter, len(result.Cases[i].Attributes))
copy(attributesCopy, result.Cases[i].Attributes)
for j := range attributesCopy {
if attributesCopy[j].Func == "" {
attributesCopy[j].Func = attributesCopy[j].Op
}
}
result.Cases[i].parsedAttributes, err = models.ParseTagFilter(attributesCopy)
if err != nil {
return nil, fmt.Errorf("failed to parse attributes for case[%d]: %v", i, err)
}
}
}
return result, nil
}
// Process 实现 Processor 接口(兼容旧模式)
func (c *SwitchConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
index, caseName, err := c.evaluateCases(wfCtx)
if err != nil {
return wfCtx, "", fmt.Errorf("switch processor: failed to evaluate cases: %v", err)
}
if index >= 0 {
if caseName != "" {
return wfCtx, fmt.Sprintf("matched case[%d]: %s", index, caseName), nil
}
return wfCtx, fmt.Sprintf("matched case[%d]", index), nil
}
// 走默认分支(最后一个输出)
return wfCtx, "no case matched, using default branch", nil
}
// ProcessWithBranch 实现 BranchProcessor 接口
func (c *SwitchConfig) ProcessWithBranch(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.NodeOutput, error) {
index, caseName, err := c.evaluateCases(wfCtx)
if err != nil {
return nil, fmt.Errorf("switch processor: failed to evaluate cases: %v", err)
}
output := &models.NodeOutput{
WfCtx: wfCtx,
}
if index >= 0 {
output.BranchIndex = &index
if caseName != "" {
output.Message = fmt.Sprintf("matched case[%d]: %s", index, caseName)
} else {
output.Message = fmt.Sprintf("matched case[%d]", index)
}
} else {
// 默认分支的索引是 cases 数量(即最后一个输出端口)
defaultIndex := len(c.Cases)
output.BranchIndex = &defaultIndex
output.Message = "no case matched, using default branch"
}
return output, nil
}
// evaluateCases 评估所有分支条件
// 返回匹配的分支索引和分支名称,如果没有匹配返回 -1
func (c *SwitchConfig) evaluateCases(wfCtx *models.WorkflowContext) (int, string, error) {
for i := range c.Cases {
matched, err := c.evaluateCaseCondition(&c.Cases[i], wfCtx)
if err != nil {
return -1, "", fmt.Errorf("case[%d] evaluation error: %v", i, err)
}
if matched {
return i, c.Cases[i].Name, nil
}
}
return -1, "", nil
}
// evaluateCaseCondition 评估单个分支条件
func (c *SwitchConfig) evaluateCaseCondition(caseItem *SwitchCase, wfCtx *models.WorkflowContext) (bool, error) {
mode := caseItem.Mode
if mode == "" {
mode = ConditionModeExpression // 默认表达式模式
}
switch mode {
case ConditionModeTags:
return c.evaluateTagsCondition(caseItem, wfCtx.Event)
default:
return c.evaluateExpressionCondition(caseItem.Condition, wfCtx)
}
}
// evaluateExpressionCondition 评估表达式条件
func (c *SwitchConfig) evaluateExpressionCondition(condition string, wfCtx *models.WorkflowContext) (bool, error) {
if condition == "" {
return false, nil
}
var defs = []string{
"{{ $event := .Event }}",
"{{ $labels := .Event.TagsMap }}",
"{{ $value := .Event.TriggerValue }}",
"{{ $inputs := .Inputs }}",
}
text := strings.Join(append(defs, condition), "")
tpl, err := template.New("switch_condition").Funcs(tplx.TemplateFuncMap).Parse(text)
if err != nil {
return false, err
}
var buf bytes.Buffer
if err = tpl.Execute(&buf, wfCtx); err != nil {
return false, err
}
result := strings.TrimSpace(strings.ToLower(buf.String()))
return result == "true" || result == "1", nil
}
// evaluateTagsCondition 评估标签/属性条件
func (c *SwitchConfig) evaluateTagsCondition(caseItem *SwitchCase, event *models.AlertCurEvent) (bool, error) {
// 如果没有配置任何过滤条件,默认返回 false不匹配
if len(caseItem.parsedLabelKeys) == 0 && len(caseItem.parsedAttributes) == 0 {
return false, nil
}
// 匹配标签 (TagsMap)
if len(caseItem.parsedLabelKeys) > 0 {
tagsMap := event.TagsMap
if tagsMap == nil {
tagsMap = make(map[string]string)
}
if !alertCommon.MatchTags(tagsMap, caseItem.parsedLabelKeys) {
return false, nil
}
}
// 匹配属性 (JsonTagsAndValue - 所有 JSON 字段)
if len(caseItem.parsedAttributes) > 0 {
attributesMap := event.JsonTagsAndValue()
if !alertCommon.MatchTags(attributesMap, caseItem.parsedAttributes) {
return false, nil
}
}
return true, nil
}

View File

@@ -42,7 +42,7 @@ func (r *RelabelConfig) Init(settings interface{}) (models.Processor, error) {
return result, err
}
func (r *RelabelConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
func (r *RelabelConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) (*models.AlertCurEvent, string, error) {
sourceLabels := make([]model.LabelName, len(r.SourceLabels))
for i := range r.SourceLabels {
sourceLabels[i] = model.LabelName(strings.ReplaceAll(r.SourceLabels[i], ".", REPLACE_DOT))
@@ -63,8 +63,8 @@ func (r *RelabelConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext)
},
}
EventRelabel(wfCtx.Event, relabelConfigs)
return wfCtx, "", nil
EventRelabel(event, relabelConfigs)
return event, "", nil
}
func EventRelabel(event *models.AlertCurEvent, relabelConfigs []*pconf.RelabelConfig) {

View File

@@ -1,32 +0,0 @@
package utils
import (
"bytes"
"fmt"
"strings"
"text/template"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/tplx"
)
func TplRender(wfCtx *models.WorkflowContext, content string) (string, error) {
var defs = []string{
"{{ $event := .Event }}",
"{{ $labels := .Event.TagsMap }}",
"{{ $value := .Event.TriggerValue }}",
"{{ $inputs := .Inputs }}",
}
text := strings.Join(append(defs, content), "")
tpl, err := template.New("tpl").Funcs(tplx.TemplateFuncMap).Parse(text)
if err != nil {
return "", fmt.Errorf("failed to parse template: %v", err)
}
var body bytes.Buffer
if err = tpl.Execute(&body, wfCtx); err != nil {
return "", fmt.Errorf("failed to execute template: %v", err)
}
return strings.TrimSpace(body.String()), nil
}

View File

@@ -131,7 +131,7 @@ func (p *Processor) Handle(anomalyPoints []models.AnomalyPoint, from string, inh
p.inhibit = inhibit
cachedRule := p.alertRuleCache.Get(p.rule.Id)
if cachedRule == nil {
logger.Warningf("alert_eval_%d datasource_%d handle error: rule not found, maybe rule has been deleted, anomalyPoints:%+v", p.rule.Id, p.datasourceId, anomalyPoints)
logger.Warningf("process handle error: rule not found %+v rule_id:%d maybe rule has been deleted", anomalyPoints, p.rule.Id)
p.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", p.DatasourceId()), "handle_event", p.BusiGroupCache.GetNameByBusiGroupId(p.rule.GroupId), fmt.Sprintf("%v", p.rule.Id)).Inc()
return
}
@@ -156,14 +156,14 @@ func (p *Processor) Handle(anomalyPoints []models.AnomalyPoint, from string, inh
eventCopy := event.DeepCopy()
event = dispatch.HandleEventPipeline(cachedRule.PipelineConfigs, eventCopy, event, dispatch.EventProcessorCache, p.ctx, cachedRule.Id, "alert_rule")
if event == nil {
logger.Infof("alert_eval_%d datasource_%d is muted drop by pipeline event:%s", p.rule.Id, p.datasourceId, eventCopy.Hash)
logger.Infof("rule_eval:%s is muted drop by pipeline event:%v", p.Key(), eventCopy)
continue
}
// event mute
isMuted, detail, muteId := mute.IsMuted(cachedRule, event, p.TargetCache, p.alertMuteCache)
if isMuted {
logger.Infof("alert_eval_%d datasource_%d is muted, detail:%s event:%s", p.rule.Id, p.datasourceId, detail, event.Hash)
logger.Infof("rule_eval:%s is muted, detail:%s event:%v", p.Key(), detail, event)
p.Stats.CounterMuteTotal.WithLabelValues(
fmt.Sprintf("%v", event.GroupName),
fmt.Sprintf("%v", p.rule.Id),
@@ -174,7 +174,7 @@ func (p *Processor) Handle(anomalyPoints []models.AnomalyPoint, from string, inh
}
if dispatch.EventMuteHook(event) {
logger.Infof("alert_eval_%d datasource_%d is muted by hook event:%s", p.rule.Id, p.datasourceId, event.Hash)
logger.Infof("rule_eval:%s is muted by hook event:%v", p.Key(), event)
p.Stats.CounterMuteTotal.WithLabelValues(
fmt.Sprintf("%v", event.GroupName),
fmt.Sprintf("%v", p.rule.Id),
@@ -247,7 +247,7 @@ func (p *Processor) BuildEvent(anomalyPoint models.AnomalyPoint, from string, no
if err := json.Unmarshal([]byte(p.rule.Annotations), &event.AnnotationsJSON); err != nil {
event.AnnotationsJSON = make(map[string]string) // 解析失败时使用空 map
logger.Warningf("alert_eval_%d datasource_%d unmarshal annotations json failed: %v", p.rule.Id, p.datasourceId, err)
logger.Warningf("unmarshal annotations json failed: %v, rule: %d", err, p.rule.Id)
}
if event.TriggerValues != "" && strings.Count(event.TriggerValues, "$") > 1 {
@@ -272,7 +272,7 @@ func (p *Processor) BuildEvent(anomalyPoint models.AnomalyPoint, from string, no
pt.GroupNames = p.BusiGroupCache.GetNamesByBusiGroupIds(pt.GroupIds)
event.Target = pt
} else {
logger.Infof("alert_eval_%d datasource_%d fill event target error, ident: %s doesn't exist in cache.", p.rule.Id, p.datasourceId, event.TargetIdent)
logger.Infof("fill event target error, ident: %s doesn't exist in cache.", event.TargetIdent)
}
}
@@ -371,19 +371,19 @@ func (p *Processor) RecoverSingle(byRecover bool, hash string, now int64, value
lastPendingEvent, has := p.pendingsUseByRecover.Get(hash)
if !has {
// 说明没有产生过异常点,就不需要恢复了
logger.Debugf("alert_eval_%d datasource_%d event:%s do not has pending event, not recover", p.rule.Id, p.datasourceId, event.Hash)
logger.Debugf("rule_eval:%s event:%v do not has pending event, not recover", p.Key(), event)
return
}
if now-lastPendingEvent.LastEvalTime < cachedRule.RecoverDuration {
logger.Debugf("alert_eval_%d datasource_%d event:%s not recover", p.rule.Id, p.datasourceId, event.Hash)
logger.Debugf("rule_eval:%s event:%v not recover", p.Key(), event)
return
}
}
// 如果设置了恢复条件,则不能在此处恢复,必须依靠 recoverPoint 来恢复
if event.RecoverConfig.JudgeType != models.Origin && !byRecover {
logger.Debugf("alert_eval_%d datasource_%d event:%s not recover", p.rule.Id, p.datasourceId, event.Hash)
logger.Debugf("rule_eval:%s event:%v not recover", p.Key(), event)
return
}
@@ -460,7 +460,7 @@ func (p *Processor) handleEvent(events []*models.AlertCurEvent) {
func (p *Processor) inhibitEvent(events []*models.AlertCurEvent, highSeverity int) {
for _, event := range events {
if p.inhibit && event.Severity > highSeverity {
logger.Debugf("alert_eval_%d datasource_%d event:%s inhibit highSeverity:%d", p.rule.Id, p.datasourceId, event.Hash, highSeverity)
logger.Debugf("rule_eval:%s event:%+v inhibit highSeverity:%d", p.Key(), event, highSeverity)
continue
}
p.fireEvent(event)
@@ -476,7 +476,7 @@ func (p *Processor) fireEvent(event *models.AlertCurEvent) {
message := "unknown"
defer func() {
logger.Infof("alert_eval_%d datasource_%d event-hash-%s %s", p.rule.Id, p.datasourceId, event.Hash, message)
logger.Infof("rule_eval:%s event-hash-%s %s", p.Key(), event.Hash, message)
}()
if fired, has := p.fires.Get(event.Hash); has {
@@ -527,7 +527,7 @@ func (p *Processor) pushEventToQueue(e *models.AlertCurEvent) {
dispatch.LogEvent(e, "push_queue")
if !queue.EventQueue.PushFront(e) {
logger.Warningf("alert_eval_%d datasource_%d event_push_queue: queue is full, event:%s", p.rule.Id, p.datasourceId, e.Hash)
logger.Warningf("event_push_queue: queue is full, event:%+v", e)
p.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", p.DatasourceId()), "push_event_queue", p.BusiGroupCache.GetNameByBusiGroupId(p.rule.GroupId), fmt.Sprintf("%v", p.rule.Id)).Inc()
}
}
@@ -538,7 +538,7 @@ func (p *Processor) RecoverAlertCurEventFromDb() {
curEvents, err := models.AlertCurEventGetByRuleIdAndDsId(p.ctx, p.rule.Id, p.datasourceId)
if err != nil {
logger.Errorf("alert_eval_%d datasource_%d recover event from db failed, err:%s", p.rule.Id, p.datasourceId, err)
logger.Errorf("recover event from db for rule:%s failed, err:%s", p.Key(), err)
p.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", p.DatasourceId()), "get_recover_event", p.BusiGroupCache.GetNameByBusiGroupId(p.rule.GroupId), fmt.Sprintf("%v", p.rule.Id)).Inc()
p.fires = NewAlertCurEventMap(nil)
return

View File

@@ -22,11 +22,10 @@ type Router struct {
AlertStats *astats.Stats
Ctx *ctx.Context
ExternalProcessors *process.ExternalProcessorsType
LogDir string
}
func New(httpConfig httpx.Config, alert aconf.Alert, amc *memsto.AlertMuteCacheType, tc *memsto.TargetCacheType, bgc *memsto.BusiGroupCacheType,
astats *astats.Stats, ctx *ctx.Context, externalProcessors *process.ExternalProcessorsType, logDir string) *Router {
astats *astats.Stats, ctx *ctx.Context, externalProcessors *process.ExternalProcessorsType) *Router {
return &Router{
HTTP: httpConfig,
Alert: alert,
@@ -36,7 +35,6 @@ func New(httpConfig httpx.Config, alert aconf.Alert, amc *memsto.AlertMuteCacheT
AlertStats: astats,
Ctx: ctx,
ExternalProcessors: externalProcessors,
LogDir: logDir,
}
}
@@ -52,9 +50,6 @@ func (rt *Router) Config(r *gin.Engine) {
service.POST("/event", rt.pushEventToQueue)
service.POST("/event-persist", rt.eventPersist)
service.POST("/make-event", rt.makeEvent)
service.GET("/event-detail/:hash", rt.eventDetail)
service.GET("/alert-eval-detail/:id", rt.alertEvalDetail)
service.GET("/trace-logs/:traceid", rt.traceLogs)
}
func Render(c *gin.Context, data, msg interface{}) {

View File

@@ -1,28 +0,0 @@
package router
import (
"fmt"
"github.com/ccfos/nightingale/v6/pkg/loggrep"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
)
func (rt *Router) alertEvalDetail(c *gin.Context) {
id := ginx.UrlParamStr(c, "id")
if !loggrep.IsValidRuleID(id) {
ginx.Bomb(200, "invalid rule id format")
}
instance := fmt.Sprintf("%s:%d", rt.Alert.Heartbeat.IP, rt.HTTP.Port)
keyword := fmt.Sprintf("alert_eval_%s", id)
logs, err := loggrep.GrepLogDir(rt.LogDir, keyword)
ginx.Dangerous(err)
ginx.NewRender(c).Data(loggrep.EventDetailResp{
Logs: logs,
Instance: instance,
}, nil)
}

View File

@@ -13,9 +13,9 @@ import (
"github.com/ccfos/nightingale/v6/alert/queue"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/poster"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
@@ -75,7 +75,7 @@ func (rt *Router) pushEventToQueue(c *gin.Context) {
dispatch.LogEvent(event, "http_push_queue")
if !queue.EventQueue.PushFront(event) {
msg := fmt.Sprintf("event:%s push_queue err: queue is full", event.Hash)
msg := fmt.Sprintf("event:%+v push_queue err: queue is full", event)
ginx.Bomb(200, msg)
logger.Warningf(msg)
}
@@ -105,21 +105,21 @@ func (rt *Router) makeEvent(c *gin.Context) {
for i := 0; i < len(events); i++ {
node, err := naming.DatasourceHashRing.GetNode(strconv.FormatInt(events[i].DatasourceId, 10), fmt.Sprintf("%d", events[i].RuleId))
if err != nil {
logger.Warningf("event(rule_id=%d ds_id=%d) get node err:%v", events[i].RuleId, events[i].DatasourceId, err)
logger.Warningf("event:%+v get node err:%v", events[i], err)
ginx.Bomb(200, "event node not exists")
}
if node != rt.Alert.Heartbeat.Endpoint {
err := forwardEvent(events[i], node)
if err != nil {
logger.Warningf("event(rule_id=%d ds_id=%d) forward err:%v", events[i].RuleId, events[i].DatasourceId, err)
logger.Warningf("event:%+v forward err:%v", events[i], err)
ginx.Bomb(200, "event forward error")
}
continue
}
ruleWorker, exists := rt.ExternalProcessors.GetExternalAlertRule(events[i].DatasourceId, events[i].RuleId)
logger.Debugf("handle event(rule_id=%d ds_id=%d) exists:%v", events[i].RuleId, events[i].DatasourceId, exists)
logger.Debugf("handle event:%+v exists:%v", events[i], exists)
if !exists {
ginx.Bomb(200, "rule not exists")
}
@@ -143,6 +143,6 @@ func forwardEvent(event *eventForm, instance string) error {
if err != nil {
return err
}
logger.Infof("forward event: result=succ url=%s code=%d rule_id=%d response=%s", ur, code, event.RuleId, string(res))
logger.Infof("forward event: result=succ url=%s code=%d event:%v response=%s", ur, code, event, string(res))
return nil
}

View File

@@ -1,27 +0,0 @@
package router
import (
"fmt"
"github.com/ccfos/nightingale/v6/pkg/loggrep"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
)
func (rt *Router) eventDetail(c *gin.Context) {
hash := ginx.UrlParamStr(c, "hash")
if !loggrep.IsValidHash(hash) {
ginx.Bomb(200, "invalid hash format")
}
instance := fmt.Sprintf("%s:%d", rt.Alert.Heartbeat.IP, rt.HTTP.Port)
logs, err := loggrep.GrepLogDir(rt.LogDir, hash)
ginx.Dangerous(err)
ginx.NewRender(c).Data(loggrep.EventDetailResp{
Logs: logs,
Instance: instance,
}, nil)
}

View File

@@ -1,28 +0,0 @@
package router
import (
"fmt"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/ccfos/nightingale/v6/pkg/loggrep"
"github.com/gin-gonic/gin"
)
func (rt *Router) traceLogs(c *gin.Context) {
traceId := ginx.UrlParamStr(c, "traceid")
if !loggrep.IsValidTraceID(traceId) {
ginx.Bomb(200, "invalid trace id format")
}
instance := fmt.Sprintf("%s:%d", rt.Alert.Heartbeat.IP, rt.HTTP.Port)
keyword := "trace_id=" + traceId
logs, err := loggrep.GrepLatestLogFiles(rt.LogDir, keyword)
ginx.Dangerous(err)
ginx.NewRender(c).Data(loggrep.EventDetailResp{
Logs: logs,
Instance: instance,
}, nil)
}

View File

@@ -135,7 +135,9 @@ func (c *DefaultCallBacker) CallBack(ctx CallBackContext) {
func doSendAndRecord(ctx *ctx.Context, url, token string, body interface{}, channel string,
stats *astats.Stats, events []*models.AlertCurEvent) {
start := time.Now()
res, err := doSend(url, body, channel, stats)
res = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), res)
NotifyRecord(ctx, events, 0, channel, token, res, err)
}
@@ -169,11 +171,11 @@ func doSend(url string, body interface{}, channel string, stats *astats.Stats) (
start := time.Now()
res, code, err := poster.PostJSON(url, time.Second*5, body, 3)
res = []byte(fmt.Sprintf("duration: %d ms status_code:%d, response:%s", time.Since(start).Milliseconds(), code, string(res)))
res = []byte(fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), res))
if err != nil {
logger.Errorf("%s_sender: result=fail url=%s code=%d error=%v req:%v response=%s", channel, url, code, err, body, string(res))
stats.AlertNotifyErrorTotal.WithLabelValues(channel).Inc()
return string(res), err
return "", err
}
logger.Infof("%s_sender: result=succ url=%s code=%d req:%v response=%s", channel, url, code, body, string(res))
@@ -205,6 +207,6 @@ func PushCallbackEvent(ctx *ctx.Context, webhook *models.Webhook, event *models.
succ := queue.eventQueue.Push(event)
if !succ {
logger.Warningf("Write channel(%s) full, current channel size: %d event:%s", webhook.Url, queue.eventQueue.Len(), event.Hash)
logger.Warningf("Write channel(%s) full, current channel size: %d event:%v", webhook.Url, queue.eventQueue.Len(), event)
}
}

View File

@@ -30,14 +30,14 @@ type IbexCallBacker struct {
func (c *IbexCallBacker) CallBack(ctx CallBackContext) {
if len(ctx.CallBackURL) == 0 || len(ctx.Events) == 0 {
logger.Warningf("event_callback_ibex: url or events is empty, url: %s", ctx.CallBackURL)
logger.Warningf("event_callback_ibex: url or events is empty, url: %s, events: %+v", ctx.CallBackURL, ctx.Events)
return
}
event := ctx.Events[0]
if event.IsRecovered {
logger.Infof("event_callback_ibex: event is recovered, event: %s", event.Hash)
logger.Infof("event_callback_ibex: event is recovered, event: %+v", event)
return
}
@@ -45,9 +45,9 @@ func (c *IbexCallBacker) CallBack(ctx CallBackContext) {
}
func (c *IbexCallBacker) handleIbex(ctx *ctx.Context, url string, event *models.AlertCurEvent) {
logger.Infof("event_callback_ibex: url: %s, event: %s", url, event.Hash)
logger.Infof("event_callback_ibex: url: %s, event: %+v", url, event)
if imodels.DB() == nil && ctx.IsCenter {
logger.Warningf("event_callback_ibex: db is nil, event: %s", event.Hash)
logger.Warningf("event_callback_ibex: db is nil, event: %+v", event)
return
}
@@ -66,7 +66,7 @@ func (c *IbexCallBacker) handleIbex(ctx *ctx.Context, url string, event *models.
id, err := strconv.ParseInt(idstr, 10, 64)
if err != nil {
logger.Errorf("event_callback_ibex: failed to parse url: %s event: %s", url, event.Hash)
logger.Errorf("event_callback_ibex: failed to parse url: %s event: %+v", url, event)
return
}
@@ -82,37 +82,34 @@ func (c *IbexCallBacker) handleIbex(ctx *ctx.Context, url string, event *models.
}
if host == "" {
logger.Errorf("event_callback_ibex: failed to get host, id: %d, event: %s", id, event.Hash)
logger.Errorf("event_callback_ibex: failed to get host, id: %d, event: %+v", id, event)
return
}
CallIbex(ctx, id, host, c.taskTplCache, c.targetCache, c.userCache, event, "")
CallIbex(ctx, id, host, c.taskTplCache, c.targetCache, c.userCache, event)
}
func CallIbex(ctx *ctx.Context, id int64, host string,
taskTplCache *memsto.TaskTplCache, targetCache *memsto.TargetCacheType,
userCache *memsto.UserCacheType, event *models.AlertCurEvent, args string) (int64, error) {
logger.Infof("event_callback_ibex: id: %d, host: %s, args: %s, event: %s", id, host, args, event.Hash)
userCache *memsto.UserCacheType, event *models.AlertCurEvent) {
logger.Infof("event_callback_ibex: id: %d, host: %s, event: %+v", id, host, event)
tpl := taskTplCache.Get(id)
if tpl == nil {
err := fmt.Errorf("event_callback_ibex: no such tpl(%d), event: %s", id, event.Hash)
logger.Errorf("%s", err)
return 0, err
logger.Errorf("event_callback_ibex: no such tpl(%d), event: %+v", id, event)
return
}
// check perm
// tpl.GroupId - host - account 三元组校验权限
can, err := CanDoIbex(tpl.UpdateBy, tpl, host, targetCache, userCache)
can, err := canDoIbex(tpl.UpdateBy, tpl, host, targetCache, userCache)
if err != nil {
err = fmt.Errorf("event_callback_ibex: check perm fail: %v, event: %s", err, event.Hash)
logger.Errorf("%s", err)
return 0, err
logger.Errorf("event_callback_ibex: check perm fail: %v, event: %+v", err, event)
return
}
if !can {
err = fmt.Errorf("event_callback_ibex: user(%s) no permission, event: %s", tpl.UpdateBy, event.Hash)
logger.Errorf("%s", err)
return 0, err
logger.Errorf("event_callback_ibex: user(%s) no permission, event: %+v", tpl.UpdateBy, event)
return
}
tagsMap := make(map[string]string)
@@ -136,16 +133,11 @@ func CallIbex(ctx *ctx.Context, id int64, host string,
tags, err := json.Marshal(tagsMap)
if err != nil {
err = fmt.Errorf("event_callback_ibex: failed to marshal tags to json: %v, event: %s", tagsMap, event.Hash)
logger.Errorf("%s", err)
return 0, err
logger.Errorf("event_callback_ibex: failed to marshal tags to json: %v, event: %+v", tagsMap, event)
return
}
// call ibex
taskArgs := tpl.Args
if args != "" {
taskArgs = args
}
in := models.TaskForm{
Title: tpl.Title + " FH: " + host,
Account: tpl.Account,
@@ -154,7 +146,7 @@ func CallIbex(ctx *ctx.Context, id int64, host string,
Timeout: tpl.Timeout,
Pause: tpl.Pause,
Script: tpl.Script,
Args: taskArgs,
Args: tpl.Args,
Stdin: string(tags),
Action: "start",
Creator: tpl.UpdateBy,
@@ -164,9 +156,8 @@ func CallIbex(ctx *ctx.Context, id int64, host string,
id, err = TaskAdd(in, tpl.UpdateBy, ctx.IsCenter)
if err != nil {
err = fmt.Errorf("event_callback_ibex: call ibex fail: %v, event: %s", err, event.Hash)
logger.Errorf("%s", err)
return 0, err
logger.Errorf("event_callback_ibex: call ibex fail: %v, event: %+v", err, event)
return
}
// write db
@@ -187,14 +178,11 @@ func CallIbex(ctx *ctx.Context, id int64, host string,
}
if err = record.Add(ctx); err != nil {
err = fmt.Errorf("event_callback_ibex: persist task_record fail: %v, event: %s", err, event.Hash)
logger.Errorf("%s", err)
return id, err
logger.Errorf("event_callback_ibex: persist task_record fail: %v, event: %+v", err, event)
}
return id, nil
}
func CanDoIbex(username string, tpl *models.TaskTpl, host string, targetCache *memsto.TargetCacheType, userCache *memsto.UserCacheType) (bool, error) {
func canDoIbex(username string, tpl *models.TaskTpl, host string, targetCache *memsto.TargetCacheType, userCache *memsto.UserCacheType) (bool, error) {
user := userCache.GetByUsername(username)
if user != nil && user.IsAdmin() {
return true, nil

View File

@@ -89,7 +89,7 @@ func alertingCallScript(ctx *ctx.Context, stdinBytes []byte, notifyScript models
err, isTimeout := sys.WrapTimeout(cmd, time.Duration(config.Timeout)*time.Second)
res := buf.String()
res = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), res)
res = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), res)
// 截断超出长度的输出
if len(res) > 512 {

View File

@@ -13,53 +13,10 @@ import (
"github.com/ccfos/nightingale/v6/alert/astats"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/poster"
"github.com/toolkits/pkg/logger"
)
// webhookClientCache 缓存 http.Client避免每次请求都创建新的 Client 导致连接泄露
var webhookClientCache sync.Map // key: clientKey (string), value: *http.Client
// 相同配置的 webhook 会复用同一个 Client
func getWebhookClient(webhook *models.Webhook) *http.Client {
clientKey := webhook.Hash()
if client, ok := webhookClientCache.Load(clientKey); ok {
return client.(*http.Client)
}
// 创建新的 Client
transport := &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: webhook.SkipVerify},
MaxIdleConns: 100,
MaxIdleConnsPerHost: 10,
IdleConnTimeout: 90 * time.Second,
}
if poster.UseProxy(webhook.Url) {
transport.Proxy = http.ProxyFromEnvironment
}
timeout := webhook.Timeout
if timeout <= 0 {
timeout = 10
}
newClient := &http.Client{
Timeout: time.Duration(timeout) * time.Second,
Transport: transport,
}
// 使用 LoadOrStore 确保并发安全,避免重复创建
actual, loaded := webhookClientCache.LoadOrStore(clientKey, newClient)
if loaded {
return actual.(*http.Client)
}
return newClient
}
func sendWebhook(webhook *models.Webhook, event interface{}, stats *astats.Stats) (bool, string, error) {
channel := "webhook"
if webhook.Type == models.RuleCallback {
@@ -72,7 +29,7 @@ func sendWebhook(webhook *models.Webhook, event interface{}, stats *astats.Stats
}
bs, err := json.Marshal(event)
if err != nil {
logger.Errorf("%s alertingWebhook failed to marshal event err:%v", channel, err)
logger.Errorf("%s alertingWebhook failed to marshal event:%+v err:%v", channel, event, err)
return false, "", err
}
@@ -98,13 +55,25 @@ func sendWebhook(webhook *models.Webhook, event interface{}, stats *astats.Stats
req.Header.Set(conf.Headers[i], conf.Headers[i+1])
}
}
// 使用全局 Client 缓存,避免每次请求都创建新的 Client 导致连接泄露
client := getWebhookClient(conf)
insecureSkipVerify := false
if webhook != nil {
insecureSkipVerify = webhook.SkipVerify
}
if conf.Client == nil {
logger.Warningf("event_%s, event:%s, url: [%s], error: [%s]", channel, string(bs), conf.Url, "client is nil")
conf.Client = &http.Client{
Timeout: time.Duration(conf.Timeout) * time.Second,
Transport: &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: insecureSkipVerify},
},
}
}
stats.AlertNotifyTotal.WithLabelValues(channel).Inc()
var resp *http.Response
var body []byte
resp, err = client.Do(req)
resp, err = conf.Client.Do(req)
if err != nil {
stats.AlertNotifyErrorTotal.WithLabelValues(channel).Inc()
@@ -119,11 +88,11 @@ func sendWebhook(webhook *models.Webhook, event interface{}, stats *astats.Stats
if resp.StatusCode == 429 {
logger.Errorf("event_%s_fail, url: %s, response code: %d, body: %s event:%s", channel, conf.Url, resp.StatusCode, string(body), string(bs))
return true, fmt.Sprintf("status_code:%d, response:%s", resp.StatusCode, string(body)), fmt.Errorf("status code is 429")
return true, string(body), fmt.Errorf("status code is 429")
}
logger.Debugf("event_%s_succ, url: %s, response code: %d, body: %s event:%s", channel, conf.Url, resp.StatusCode, string(body), string(bs))
return false, fmt.Sprintf("status_code:%d, response:%s", resp.StatusCode, string(body)), nil
return false, string(body), nil
}
func SingleSendWebhooks(ctx *ctx.Context, webhooks map[string]*models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
@@ -132,7 +101,7 @@ func SingleSendWebhooks(ctx *ctx.Context, webhooks map[string]*models.Webhook, e
for retryCount < 3 {
start := time.Now()
needRetry, res, err := sendWebhook(conf, event, stats)
res = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), res)
res = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), res)
NotifyRecord(ctx, []*models.AlertCurEvent{event}, 0, "webhook", conf.Url, res, err)
if !needRetry {
break
@@ -145,7 +114,7 @@ func SingleSendWebhooks(ctx *ctx.Context, webhooks map[string]*models.Webhook, e
func BatchSendWebhooks(ctx *ctx.Context, webhooks map[string]*models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
for _, conf := range webhooks {
logger.Infof("push event:%s to queue:%v", event.Hash, conf)
logger.Infof("push event:%+v to queue:%v", event, conf)
PushEvent(ctx, conf, event, stats)
}
}
@@ -183,7 +152,7 @@ func PushEvent(ctx *ctx.Context, webhook *models.Webhook, event *models.AlertCur
succ := queue.eventQueue.Push(event)
if !succ {
stats.AlertNotifyErrorTotal.WithLabelValues("push_event_queue").Inc()
logger.Warningf("Write channel(%s) full, current channel size: %d event:%s", webhook.Url, queue.eventQueue.Len(), event.Hash)
logger.Warningf("Write channel(%s) full, current channel size: %d event:%v", webhook.Url, queue.eventQueue.Len(), event)
}
}
@@ -204,7 +173,7 @@ func StartConsumer(ctx *ctx.Context, queue *WebhookQueue, popSize int, webhook *
for retryCount < webhook.RetryCount {
start := time.Now()
needRetry, res, err := sendWebhook(webhook, events, stats)
res = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), res)
res = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), res)
go NotifyRecord(ctx, events, 0, "webhook", webhook.Url, res, err)
if !needRetry {
break

View File

@@ -1,26 +1,20 @@
package cconf
import (
"time"
"github.com/ccfos/nightingale/v6/pkg/httpx"
)
import "time"
type Center struct {
Plugins []Plugin
MetricsYamlFile string
OpsYamlFile string
BuiltinIntegrationsDir string
I18NHeaderKey string
MetricDesc MetricDescType
AnonymousAccess AnonymousAccess
UseFileAssets bool
FlashDuty FlashDuty
EventHistoryGroupView bool
CleanNotifyRecordDay int
CleanPipelineExecutionDay int
MigrateBusiGroupLabel bool
RSA httpx.RSAConfig
Plugins []Plugin
MetricsYamlFile string
OpsYamlFile string
BuiltinIntegrationsDir string
I18NHeaderKey string
MetricDesc MetricDescType
AnonymousAccess AnonymousAccess
UseFileAssets bool
FlashDuty FlashDuty
EventHistoryGroupView bool
CleanNotifyRecordDay int
MigrateBusiGroupLabel bool
}
type Plugin struct {

View File

@@ -55,10 +55,4 @@ var Plugins = []Plugin{
Type: "opensearch",
TypeName: "OpenSearch",
},
{
Id: 10,
Category: "logging",
Type: "victorialogs",
TypeName: "VictoriaLogs",
},
}

View File

@@ -134,12 +134,11 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
go version.GetGithubVersion()
go cron.CleanNotifyRecord(ctx, config.Center.CleanNotifyRecordDay)
go cron.CleanPipelineExecution(ctx, config.Center.CleanPipelineExecutionDay)
alertrtRouter := alertrt.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors, config.Log.Dir)
alertrtRouter := alertrt.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors)
centerRouter := centerrt.New(config.HTTP, config.Center, config.Alert, config.Ibex,
cconf.Operations, dsCache, notifyConfigCache, promClients,
redis, sso, ctx, metas, idents, targetCache, userCache, userGroupCache, userTokenCache, config.Log.Dir)
redis, sso, ctx, metas, idents, targetCache, userCache, userGroupCache, userTokenCache)
pushgwRouter := pushgwrt.New(config.HTTP, config.Pushgw, config.Alert, targetCache, busiGroupCache, idents, metas, writers, ctx)
r := httpx.GinEngine(config.Global.RunMode, config.HTTP, configCvalCache.PrintBodyPaths, configCvalCache.PrintAccessLog)

View File

@@ -271,8 +271,10 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
}
for _, metric := range metrics {
time.Sleep(time.Microsecond)
metric.UUID = time.Now().UnixMicro()
if metric.UUID == 0 {
time.Sleep(time.Microsecond)
metric.UUID = time.Now().UnixMicro()
}
metric.ID = metric.UUID
metric.CreatedBy = SYSTEM
metric.UpdatedBy = SYSTEM

View File

@@ -118,7 +118,7 @@ func (s *Set) updateTargets(m map[string]models.HostMeta) error {
}
start := time.Now()
err := storage.MSet(context.Background(), s.redis, newMap, 7*24*time.Hour)
err := storage.MSet(context.Background(), s.redis, newMap)
if err != nil {
cstats.RedisOperationLatency.WithLabelValues("mset_target_meta", "fail").Observe(time.Since(start).Seconds())
return err
@@ -127,7 +127,7 @@ func (s *Set) updateTargets(m map[string]models.HostMeta) error {
}
if len(extendMap) > 0 {
err = storage.MSet(context.Background(), s.redis, extendMap, 7*24*time.Hour)
err = storage.MSet(context.Background(), s.redis, extendMap)
if err != nil {
cstats.RedisOperationLatency.WithLabelValues("mset_target_extend", "fail").Observe(time.Since(start).Seconds())
return err

View File

@@ -24,11 +24,11 @@ import (
"github.com/ccfos/nightingale/v6/prom"
"github.com/ccfos/nightingale/v6/pushgw/idents"
"github.com/ccfos/nightingale/v6/storage"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"gorm.io/gorm"
"github.com/gin-gonic/gin"
"github.com/rakyll/statik/fs"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/runner"
)
@@ -51,7 +51,6 @@ type Router struct {
UserGroupCache *memsto.UserGroupCacheType
UserTokenCache *memsto.UserTokenCacheType
Ctx *ctx.Context
LogDir string
HeartbeatHook HeartbeatHookFunc
TargetDeleteHook models.TargetDeleteHookFunc
@@ -62,7 +61,7 @@ func New(httpConfig httpx.Config, center cconf.Center, alert aconf.Alert, ibex c
operations cconf.Operation, ds *memsto.DatasourceCacheType, ncc *memsto.NotifyConfigCacheType,
pc *prom.PromClientMap, redis storage.Redis,
sso *sso.SsoClient, ctx *ctx.Context, metaSet *metas.Set, idents *idents.Set,
tc *memsto.TargetCacheType, uc *memsto.UserCacheType, ugc *memsto.UserGroupCacheType, utc *memsto.UserTokenCacheType, logDir string) *Router {
tc *memsto.TargetCacheType, uc *memsto.UserCacheType, ugc *memsto.UserGroupCacheType, utc *memsto.UserTokenCacheType) *Router {
return &Router{
HTTP: httpConfig,
Center: center,
@@ -81,7 +80,6 @@ func New(httpConfig httpx.Config, center cconf.Center, alert aconf.Alert, ibex c
UserGroupCache: ugc,
UserTokenCache: utc,
Ctx: ctx,
LogDir: logDir,
HeartbeatHook: func(ident string) map[string]interface{} { return nil },
TargetDeleteHook: func(tx *gorm.DB, idents []string) error { return nil },
AlertRuleModifyHook: func(ar *models.AlertRule) {},
@@ -213,8 +211,8 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/datasource/brief", rt.auth(), rt.user(), rt.datasourceBriefs)
pages.POST("/datasource/query", rt.auth(), rt.user(), rt.datasourceQuery)
pages.POST("/ds-query", rt.auth(), rt.user(), rt.QueryData)
pages.POST("/logs-query", rt.auth(), rt.user(), rt.QueryLogV2)
pages.POST("/ds-query", rt.auth(), rt.QueryData)
pages.POST("/logs-query", rt.auth(), rt.QueryLogV2)
pages.POST("/tdengine-databases", rt.auth(), rt.tdengineDatabases)
pages.POST("/tdengine-tables", rt.auth(), rt.tdengineTables)
@@ -253,12 +251,10 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/auth/redirect/cas", rt.loginRedirectCas)
pages.GET("/auth/redirect/oauth", rt.loginRedirectOAuth)
pages.GET("/auth/redirect/dingtalk", rt.loginRedirectDingTalk)
pages.GET("/auth/redirect/feishu", rt.loginRedirectFeiShu)
pages.GET("/auth/callback", rt.loginCallback)
pages.GET("/auth/callback/cas", rt.loginCallbackCas)
pages.GET("/auth/callback/oauth", rt.loginCallbackOAuth)
pages.GET("/auth/callback/dingtalk", rt.loginCallbackDingTalk)
pages.GET("/auth/callback/feishu", rt.loginCallbackFeiShu)
pages.GET("/auth/perms", rt.allPerms)
pages.GET("/metrics/desc", rt.metricsDescGetFile)
@@ -370,7 +366,6 @@ func (rt *Router) Config(r *gin.Engine) {
// pages.GET("/alert-rules/builtin/alerts-cates", rt.auth(), rt.user(), rt.builtinAlertCateGets)
// pages.GET("/alert-rules/builtin/list", rt.auth(), rt.user(), rt.builtinAlertRules)
pages.GET("/alert-rules/callbacks", rt.auth(), rt.user(), rt.alertRuleCallbacks)
pages.GET("/timezones", rt.auth(), rt.user(), rt.timezonesGet)
pages.GET("/busi-groups/alert-rules", rt.auth(), rt.user(), rt.perm("/alert-rules"), rt.alertRuleGetsByGids)
pages.GET("/busi-group/:id/alert-rules", rt.auth(), rt.user(), rt.perm("/alert-rules"), rt.alertRuleGets)
@@ -394,8 +389,8 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/busi-group/:id/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules"), rt.recordingRuleGets)
pages.POST("/busi-group/:id/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules/add"), rt.bgrw(), rt.recordingRuleAddByFE)
pages.DELETE("/busi-group/:id/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules/del"), rt.bgrw(), rt.recordingRuleDel)
pages.PUT("/busi-group/:id/recording-rule/:rrid", rt.auth(), rt.user(), rt.perm("/recording-rules/put"), rt.bgrw(), rt.recordingRulePutByFE)
pages.GET("/recording-rule/:rrid", rt.auth(), rt.user(), rt.perm("/recording-rules"), rt.recordingRuleGet)
pages.PUT("/recording-rule/:rrid", rt.auth(), rt.user(), rt.perm("/recording-rules"), rt.recordingRulePutByFE)
pages.PUT("/busi-group/:id/recording-rules/fields", rt.auth(), rt.user(), rt.perm("/recording-rules/put"), rt.recordingRulePutFields)
pages.GET("/busi-groups/alert-mutes", rt.auth(), rt.user(), rt.perm("/alert-mutes"), rt.alertMuteGetsByGids)
@@ -419,9 +414,6 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/alert-cur-event/:eid", rt.alertCurEventGet)
pages.GET("/alert-his-event/:eid", rt.alertHisEventGet)
pages.GET("/event-notify-records/:eid", rt.notificationRecordList)
pages.GET("/event-detail/:hash", rt.eventDetailPage)
pages.GET("/alert-eval-detail/:id", rt.alertEvalDetailPage)
pages.GET("/trace-logs/:traceid", rt.traceLogsPage)
// card logic
pages.GET("/alert-cur-events/list", rt.auth(), rt.user(), rt.alertCurEventsList)
@@ -566,19 +558,6 @@ func (rt *Router) Config(r *gin.Engine) {
pages.POST("/event-pipeline-tryrun", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.tryRunEventPipeline)
pages.POST("/event-processor-tryrun", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.tryRunEventProcessor)
// API 触发工作流
pages.POST("/event-pipeline/:id/trigger", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.triggerEventPipelineByAPI)
// SSE 流式执行工作流
pages.POST("/event-pipeline/:id/stream", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.streamEventPipeline)
// 事件Pipeline执行记录路由
pages.GET("/event-pipeline-executions", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.listAllEventPipelineExecutions)
pages.GET("/event-pipeline/:id/executions", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.listEventPipelineExecutions)
pages.GET("/event-pipeline/:id/execution/:exec_id", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.getEventPipelineExecution)
pages.GET("/event-pipeline-execution/:exec_id", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.getEventPipelineExecution)
pages.GET("/event-pipeline/:id/execution-stats", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.getEventPipelineExecutionStats)
pages.POST("/event-pipeline-executions/clean", rt.auth(), rt.user(), rt.admin(), rt.cleanEventPipelineExecutions)
pages.POST("/notify-channel-configs", rt.auth(), rt.user(), rt.perm("/notification-channels/add"), rt.notifyChannelsAdd)
pages.DELETE("/notify-channel-configs", rt.auth(), rt.user(), rt.perm("/notification-channels/del"), rt.notifyChannelsDel)
pages.PUT("/notify-channel-config/:id", rt.auth(), rt.user(), rt.perm("/notification-channels/put"), rt.notifyChannelPut)
@@ -590,14 +569,6 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/pagerduty-service-list/:id", rt.auth(), rt.user(), rt.pagerDutyNotifyServicesGet)
pages.GET("/notify-channel-config", rt.auth(), rt.user(), rt.notifyChannelGetBy)
pages.GET("/notify-channel-config/idents", rt.notifyChannelIdentsGet)
// saved view 查询条件保存相关路由
pages.GET("/saved-views", rt.auth(), rt.user(), rt.savedViewGets)
pages.POST("/saved-views", rt.auth(), rt.user(), rt.savedViewAdd)
pages.PUT("/saved-view/:id", rt.auth(), rt.user(), rt.savedViewPut)
pages.DELETE("/saved-view/:id", rt.auth(), rt.user(), rt.savedViewDel)
pages.POST("/saved-view/:id/favorite", rt.auth(), rt.user(), rt.savedViewFavoriteAdd)
pages.DELETE("/saved-view/:id/favorite", rt.auth(), rt.user(), rt.savedViewFavoriteDel)
}
r.GET("/api/n9e/versions", func(c *gin.Context) {
@@ -654,7 +625,6 @@ func (rt *Router) Config(r *gin.Engine) {
service.GET("/busi-groups", rt.busiGroupGetsByService)
service.GET("/datasources", rt.datasourceGetsByService)
service.GET("/datasource-rsa-config", rt.datasourceRsaConfigGet)
service.GET("/datasource-ids", rt.getDatasourceIds)
service.POST("/server-heartbeat", rt.serverHeartbeat)
service.GET("/servers-active", rt.serversActive)
@@ -711,9 +681,6 @@ func (rt *Router) Config(r *gin.Engine) {
service.GET("/message-templates", rt.messageTemplateGets)
service.GET("/event-pipelines", rt.eventPipelinesListByService)
service.POST("/event-pipeline/:id/trigger", rt.triggerEventPipelineByService)
service.POST("/event-pipeline/:id/stream", rt.streamEventPipelineByService)
service.POST("/event-pipeline-execution", rt.eventPipelineExecutionAdd)
// 手机号加密存储配置接口
service.POST("/users/phone/encrypt", rt.usersPhoneEncrypt)

View File

@@ -4,9 +4,9 @@ import (
"net/http"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
// no param

View File

@@ -10,9 +10,9 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)

View File

@@ -1,168 +0,0 @@
package router
import (
"encoding/json"
"fmt"
"io"
"net/http"
"sort"
"strconv"
"strings"
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/loggrep"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
)
// alertEvalDetailPage renders an HTML log viewer page for alert rule evaluation logs.
func (rt *Router) alertEvalDetailPage(c *gin.Context) {
id := ginx.UrlParamStr(c, "id")
if !loggrep.IsValidRuleID(id) {
c.String(http.StatusBadRequest, "invalid rule id format")
return
}
logs, instance, err := rt.getAlertEvalLogs(id)
if err != nil {
c.String(http.StatusInternalServerError, "Error: %v", err)
return
}
c.Header("Content-Type", "text/html; charset=utf-8")
err = loggrep.RenderAlertEvalHTML(c.Writer, loggrep.AlertEvalPageData{
RuleID: id,
Instance: instance,
Logs: logs,
Total: len(logs),
})
if err != nil {
c.String(http.StatusInternalServerError, "render error: %v", err)
}
}
// alertEvalDetailJSON returns JSON for alert rule evaluation logs.
func (rt *Router) alertEvalDetailJSON(c *gin.Context) {
id := ginx.UrlParamStr(c, "id")
if !loggrep.IsValidRuleID(id) {
ginx.Bomb(200, "invalid rule id format")
}
logs, instance, err := rt.getAlertEvalLogs(id)
ginx.Dangerous(err)
ginx.NewRender(c).Data(loggrep.EventDetailResp{
Logs: logs,
Instance: instance,
}, nil)
}
// getAlertEvalLogs resolves the target instance(s) and retrieves alert eval logs.
func (rt *Router) getAlertEvalLogs(id string) ([]string, string, error) {
ruleId, _ := strconv.ParseInt(id, 10, 64)
rule, err := models.AlertRuleGetById(rt.Ctx, ruleId)
if err != nil {
return nil, "", err
}
if rule == nil {
return nil, "", fmt.Errorf("no such alert rule")
}
instance := fmt.Sprintf("%s:%d", rt.Alert.Heartbeat.IP, rt.HTTP.Port)
keyword := fmt.Sprintf("alert_eval_%s", id)
// Get datasource IDs for this rule
dsIds := rt.DatasourceCache.GetIDsByDsCateAndQueries(rule.Cate, rule.DatasourceQueries)
if len(dsIds) == 0 {
// No datasources found (e.g. host rule), try local grep
logs, err := loggrep.GrepLogDir(rt.LogDir, keyword)
return logs, instance, err
}
// Find unique target nodes via hash ring, with DB fallback
nodeSet := make(map[string]struct{})
for _, dsId := range dsIds {
node, err := rt.getNodeForDatasource(dsId, id)
if err != nil {
continue
}
nodeSet[node] = struct{}{}
}
if len(nodeSet) == 0 {
// Hash ring not ready, grep locally
logs, err := loggrep.GrepLogDir(rt.LogDir, keyword)
return logs, instance, err
}
// Collect logs from all target nodes
var allLogs []string
var instances []string
for node := range nodeSet {
if node == instance {
logs, err := loggrep.GrepLogDir(rt.LogDir, keyword)
if err == nil {
allLogs = append(allLogs, logs...)
instances = append(instances, node)
}
} else {
logs, nodeAddr, err := rt.forwardAlertEvalDetail(node, id)
if err == nil {
allLogs = append(allLogs, logs...)
instances = append(instances, nodeAddr)
}
}
}
// Sort logs by timestamp descending
sort.Slice(allLogs, func(i, j int) bool {
return allLogs[i] > allLogs[j]
})
if len(allLogs) > loggrep.MaxLogLines {
allLogs = allLogs[:loggrep.MaxLogLines]
}
return allLogs, strings.Join(instances, ", "), nil
}
func (rt *Router) forwardAlertEvalDetail(node, id string) ([]string, string, error) {
url := fmt.Sprintf("http://%s/v1/n9e/alert-eval-detail/%s", node, id)
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, node, err
}
for user, pass := range rt.HTTP.APIForService.BasicAuth {
req.SetBasicAuth(user, pass)
break
}
client := &http.Client{Timeout: 15 * time.Second}
resp, err := client.Do(req)
if err != nil {
return nil, node, fmt.Errorf("forward to %s failed: %v", node, err)
}
defer resp.Body.Close()
body, err := io.ReadAll(io.LimitReader(resp.Body, 10*1024*1024)) // 10MB limit
if err != nil {
return nil, node, err
}
var result struct {
Dat loggrep.EventDetailResp `json:"dat"`
Err string `json:"err"`
}
if err := json.Unmarshal(body, &result); err != nil {
return nil, node, err
}
if result.Err != "" {
return nil, node, fmt.Errorf("%s", result.Err)
}
return result.Dat.Logs, result.Dat.Instance, nil
}

View File

@@ -8,9 +8,9 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
"golang.org/x/exp/slices"
)

View File

@@ -16,12 +16,12 @@ import (
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pushgw/pconf"
"github.com/ccfos/nightingale/v6/pushgw/writer"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/jinzhu/copier"
"github.com/pkg/errors"
"github.com/prometheus/prometheus/prompb"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
)
@@ -36,7 +36,6 @@ func (rt *Router) alertRuleGets(c *gin.Context) {
for i := 0; i < len(ars); i++ {
ars[i].FillNotifyGroups(rt.Ctx, cache)
}
models.FillUpdateByNicknames(rt.Ctx, ars)
}
ginx.NewRender(c).Data(ars, err)
}
@@ -77,6 +76,7 @@ func (rt *Router) alertRuleGetsByGids(c *gin.Context) {
if err == nil {
cache := make(map[int64]*models.UserGroup)
rids := make([]int64, 0, len(ars))
names := make([]string, 0, len(ars))
for i := 0; i < len(ars); i++ {
ars[i].FillNotifyGroups(rt.Ctx, cache)
@@ -85,6 +85,7 @@ func (rt *Router) alertRuleGetsByGids(c *gin.Context) {
}
rids = append(rids, ars[i].Id)
names = append(names, ars[i].UpdateBy)
}
stime, etime := GetAlertCueEventTimeRange(c)
@@ -95,7 +96,14 @@ func (rt *Router) alertRuleGetsByGids(c *gin.Context) {
}
}
models.FillUpdateByNicknames(rt.Ctx, ars)
users := models.UserMapGet(rt.Ctx, "username in (?)", names)
if users != nil {
for i := 0; i < len(ars); i++ {
if user, exist := users[ars[i].UpdateBy]; exist {
ars[i].UpdateByNickname = user.Nickname
}
}
}
}
ginx.NewRender(c).Data(ars, err)
}
@@ -127,7 +135,6 @@ func (rt *Router) alertRulesGetByService(c *gin.Context) {
ars[i].DatasourceIdsJson = rt.DatasourceCache.GetIDsByDsCateAndQueries(ars[i].Cate, ars[i].DatasourceQueries)
}
}
models.FillUpdateByNicknames(rt.Ctx, ars)
}
ginx.NewRender(c).Data(ars, err)
}
@@ -882,27 +889,3 @@ func (rt *Router) batchAlertRuleClone(c *gin.Context) {
ginx.NewRender(c).Data(reterr, nil)
}
func (rt *Router) timezonesGet(c *gin.Context) {
// 返回常用时区列表(按时差去重,每个时差只保留一个代表性时区)
timezones := []string{
"UTC",
"Asia/Shanghai", // UTC+8 (代表 Asia/Hong_Kong, Asia/Singapore 等)
"Asia/Tokyo", // UTC+9 (代表 Asia/Seoul 等)
"Asia/Dubai", // UTC+4
"Asia/Kolkata", // UTC+5:30
"Asia/Bangkok", // UTC+7 (代表 Asia/Jakarta 等)
"Europe/London", // UTC+0 (代表 UTC)
"Europe/Paris", // UTC+1 (代表 Europe/Berlin, Europe/Rome, Europe/Madrid 等)
"Europe/Moscow", // UTC+3
"America/New_York", // UTC-5 (代表 America/Toronto 等)
"America/Chicago", // UTC-6 (代表 America/Mexico_City 等)
"America/Denver", // UTC-7
"America/Los_Angeles", // UTC-8
"America/Sao_Paulo", // UTC-3
"Australia/Sydney", // UTC+10 (代表 Australia/Melbourne 等)
"Pacific/Auckland", // UTC+12
}
ginx.NewRender(c).Data(timezones, nil)
}

View File

@@ -9,9 +9,9 @@ import (
"github.com/ccfos/nightingale/v6/alert/common"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
)
@@ -30,7 +30,6 @@ func (rt *Router) alertSubscribeGets(c *gin.Context) {
ginx.Dangerous(lst[i].FillDatasourceIds(rt.Ctx))
ginx.Dangerous(lst[i].DB2FE())
}
models.FillUpdateByNicknames(rt.Ctx, lst)
ginx.NewRender(c).Data(lst, err)
}
@@ -67,7 +66,6 @@ func (rt *Router) alertSubscribeGetsByGids(c *gin.Context) {
ginx.Dangerous(lst[i].FillDatasourceIds(rt.Ctx))
ginx.Dangerous(lst[i].DB2FE())
}
models.FillUpdateByNicknames(rt.Ctx, lst)
ginx.NewRender(c).Data(lst, err)
}

View File

@@ -7,9 +7,9 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
)
@@ -260,9 +260,6 @@ func (rt *Router) boardGets(c *gin.Context) {
query := ginx.QueryStr(c, "query", "")
boards, err := models.BoardGetsByGroupId(rt.Ctx, bgid, query)
if err == nil {
models.FillUpdateByNicknames(rt.Ctx, boards)
}
ginx.NewRender(c).Data(boards, err)
}
@@ -276,9 +273,6 @@ func (rt *Router) publicBoardGets(c *gin.Context) {
ginx.Dangerous(err)
boards, err := models.BoardGets(rt.Ctx, "", "public=1 and (public_cate in (?) or id in (?))", []int64{0, 1}, boardIds)
if err == nil {
models.FillUpdateByNicknames(rt.Ctx, boards)
}
ginx.NewRender(c).Data(boards, err)
}
@@ -318,7 +312,6 @@ func (rt *Router) boardGetsByGids(c *gin.Context) {
boards[i].Bgids = ids
}
}
models.FillUpdateByNicknames(rt.Ctx, boards)
ginx.NewRender(c).Data(boards, err)
}

View File

@@ -8,10 +8,10 @@ import (
"strings"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/file"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/runner"
)

View File

@@ -5,9 +5,9 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"gorm.io/gorm"
)

View File

@@ -3,8 +3,8 @@ package router
import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/prom"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) metricFilterGets(c *gin.Context) {
@@ -27,8 +27,6 @@ func (rt *Router) metricFilterGets(c *gin.Context) {
}
}
models.FillUpdateByNicknames(rt.Ctx, arr)
ginx.NewRender(c).Data(arr, err)
}

View File

@@ -7,9 +7,9 @@ import (
"github.com/ccfos/nightingale/v6/center/integration"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
)

View File

@@ -9,8 +9,8 @@ import (
"github.com/BurntSushi/toml"
"github.com/ccfos/nightingale/v6/center/integration"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
)

View File

@@ -5,9 +5,9 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
@@ -119,9 +119,6 @@ func (rt *Router) busiGroupGets(c *gin.Context) {
if len(lst) == 0 {
lst = []models.BusiGroup{}
}
if err == nil {
models.FillUpdateByNicknames(rt.Ctx, lst)
}
ginx.NewRender(c).Data(lst, err)
}

View File

@@ -5,9 +5,9 @@ import (
"time"
"github.com/ccfos/nightingale/v6/storage"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
captcha "github.com/mojocn/base64Captcha"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)

View File

@@ -5,9 +5,9 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) chartShareGets(c *gin.Context) {

View File

@@ -4,9 +4,9 @@ import (
"encoding/json"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) notifyChannelsGets(c *gin.Context) {

View File

@@ -4,9 +4,9 @@ import (
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
const EMBEDDEDDASHBOARD = "embedded-dashboards"
@@ -15,9 +15,6 @@ func (rt *Router) configsGet(c *gin.Context) {
prefix := ginx.QueryStr(c, "prefix", "")
limit := ginx.QueryInt(c, "limit", 10)
configs, err := models.ConfigsGets(rt.Ctx, prefix, limit, ginx.Offset(c, limit))
if err == nil {
models.FillUpdateByNicknames(rt.Ctx, configs)
}
ginx.NewRender(c).Data(configs, err)
}

View File

@@ -2,9 +2,9 @@ package router
import (
"github.com/ccfos/nightingale/v6/pkg/secu"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
type confPropCrypto struct {

View File

@@ -7,9 +7,9 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func checkAnnotationPermission(c *gin.Context, ctx *ctx.Context, dashboardId int64) {

View File

@@ -3,20 +3,19 @@ package router
import (
"context"
"crypto/tls"
"encoding/base64"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"
"github.com/ccfos/nightingale/v6/datasource/opensearch"
"github.com/ccfos/nightingale/v6/dskit/clickhouse"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
"github.com/toolkits/pkg/logger"
)
@@ -53,41 +52,9 @@ func (rt *Router) datasourceList(c *gin.Context) {
func (rt *Router) datasourceGetsByService(c *gin.Context) {
typ := ginx.QueryStr(c, "typ", "")
lst, err := models.GetDatasourcesGetsBy(rt.Ctx, typ, "", "", "")
openRsa := rt.Center.RSA.OpenRSA
for _, item := range lst {
if err := item.Encrypt(openRsa, rt.HTTP.RSA.RSAPublicKey); err != nil {
logger.Errorf("datasource %+v encrypt failed: %v", item, err)
continue
}
}
ginx.NewRender(c).Data(lst, err)
}
func (rt *Router) datasourceRsaConfigGet(c *gin.Context) {
if rt.Center.RSA.OpenRSA {
publicKey := ""
privateKey := ""
if len(rt.HTTP.RSA.RSAPublicKey) > 0 {
publicKey = base64.StdEncoding.EncodeToString(rt.HTTP.RSA.RSAPublicKey)
}
if len(rt.HTTP.RSA.RSAPrivateKey) > 0 {
privateKey = base64.StdEncoding.EncodeToString(rt.HTTP.RSA.RSAPrivateKey)
}
logger.Debugf("OpenRSA=%v", rt.Center.RSA.OpenRSA)
ginx.NewRender(c).Data(models.RsaConfig{
OpenRSA: rt.Center.RSA.OpenRSA,
RSAPublicKey: publicKey,
RSAPrivateKey: privateKey,
RSAPassWord: rt.HTTP.RSA.RSAPassWord,
}, nil)
} else {
ginx.NewRender(c).Data(models.RsaConfig{
OpenRSA: rt.Center.RSA.OpenRSA,
}, nil)
}
}
func (rt *Router) datasourceBriefs(c *gin.Context) {
var dss []*models.Datasource
list, err := models.GetDatasourcesGetsBy(rt.Ctx, "", "", "", "")
@@ -230,37 +197,6 @@ func (rt *Router) datasourceUpsert(c *gin.Context) {
}
}
if req.PluginType == models.ELASTICSEARCH {
skipAuto := false
// 若用户输入了versionversion字符串存在且不为空则不自动获取
if req.SettingsJson != nil {
if v, ok := req.SettingsJson["version"]; ok {
switch vv := v.(type) {
case string:
if strings.TrimSpace(vv) != "" {
skipAuto = true
}
default:
if strings.TrimSpace(fmt.Sprint(vv)) != "" {
skipAuto = true
}
}
}
}
if !skipAuto {
version, err := getElasticsearchVersion(req, 10*time.Second)
if err != nil {
logger.Warningf("failed to get elasticsearch version: %v", err)
} else {
if req.SettingsJson == nil {
req.SettingsJson = make(map[string]interface{})
}
req.SettingsJson["version"] = version
}
}
}
if req.Id == 0 {
req.CreatedBy = username
req.Status = "enabled"
@@ -276,7 +212,7 @@ func (rt *Router) datasourceUpsert(c *gin.Context) {
}
err = req.Add(rt.Ctx)
} else {
err = req.Update(rt.Ctx, "name", "identifier", "description", "cluster_name", "settings", "http", "auth", "updated_by", "updated_at", "is_default", "weight")
err = req.Update(rt.Ctx, "name", "identifier", "description", "cluster_name", "settings", "http", "auth", "updated_by", "updated_at", "is_default")
}
Render(c, nil, err)
@@ -293,15 +229,11 @@ func DatasourceCheck(c *gin.Context, ds models.Datasource) error {
}
}
// 使用 TLS 配置(支持 mTLS
tlsConfig, err := ds.HTTPJson.TLS.TLSConfig()
if err != nil {
return fmt.Errorf("failed to create TLS config: %v", err)
}
client := &http.Client{
Transport: &http.Transport{
TLSClientConfig: tlsConfig,
TLSClientConfig: &tls.Config{
InsecureSkipVerify: ds.HTTPJson.TLS.SkipTlsVerify,
},
},
}
@@ -459,82 +391,3 @@ func (rt *Router) datasourceQuery(c *gin.Context) {
}
ginx.NewRender(c).Data(req, err)
}
// getElasticsearchVersion 该函数尝试从提供的Elasticsearch数据源中获取版本号遍历所有URL
// 直到成功获取版本号或所有URL均尝试失败为止。
func getElasticsearchVersion(ds models.Datasource, timeout time.Duration) (string, error) {
client := &http.Client{
Timeout: timeout,
Transport: &http.Transport{
TLSClientConfig: &tls.Config{
InsecureSkipVerify: ds.HTTPJson.TLS.SkipTlsVerify,
},
},
}
urls := make([]string, 0)
if len(ds.HTTPJson.Urls) > 0 {
urls = append(urls, ds.HTTPJson.Urls...)
}
if ds.HTTPJson.Url != "" {
urls = append(urls, ds.HTTPJson.Url)
}
if len(urls) == 0 {
return "", fmt.Errorf("no url provided")
}
var lastErr error
for _, raw := range urls {
baseURL := strings.TrimRight(raw, "/") + "/"
req, err := http.NewRequest("GET", baseURL, nil)
if err != nil {
lastErr = err
continue
}
if ds.AuthJson.BasicAuthUser != "" {
req.SetBasicAuth(ds.AuthJson.BasicAuthUser, ds.AuthJson.BasicAuthPassword)
}
for k, v := range ds.HTTPJson.Headers {
req.Header.Set(k, v)
}
resp, err := client.Do(req)
if err != nil {
lastErr = err
continue
}
body, err := io.ReadAll(resp.Body)
resp.Body.Close()
if err != nil {
lastErr = err
continue
}
if resp.StatusCode != 200 {
lastErr = fmt.Errorf("request to %s failed with status: %d body:%s", baseURL, resp.StatusCode, string(body))
continue
}
var result map[string]interface{}
if err := json.Unmarshal(body, &result); err != nil {
lastErr = err
continue
}
if version, ok := result["version"].(map[string]interface{}); ok {
if number, ok := version["number"].(string); ok && number != "" {
return number, nil
}
}
lastErr = fmt.Errorf("version not found in response from %s", baseURL)
}
if lastErr != nil {
return "", lastErr
}
return "", fmt.Errorf("failed to get elasticsearch version")
}

View File

@@ -6,10 +6,10 @@ import (
"github.com/ccfos/nightingale/v6/dscache"
"github.com/ccfos/nightingale/v6/dskit/types"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/ccfos/nightingale/v6/pkg/logx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
func (rt *Router) ShowDatabases(c *gin.Context) {
@@ -18,7 +18,7 @@ func (rt *Router) ShowDatabases(c *gin.Context) {
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logx.Warningf(c.Request.Context(), "cluster:%d not exists", f.DatasourceId)
logger.Warningf("cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
@@ -48,7 +48,7 @@ func (rt *Router) ShowTables(c *gin.Context) {
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logx.Warningf(c.Request.Context(), "cluster:%d not exists", f.DatasourceId)
logger.Warningf("cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
@@ -78,7 +78,7 @@ func (rt *Router) DescribeTable(c *gin.Context) {
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logx.Warningf(c.Request.Context(), "cluster:%d not exists", f.DatasourceId)
logger.Warningf("cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
// 只接受一个入参

View File

@@ -5,15 +5,14 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) embeddedProductGets(c *gin.Context) {
products, err := models.EmbeddedProductGets(rt.Ctx)
ginx.Dangerous(err)
models.FillUpdateByNicknames(rt.Ctx, products)
// 获取当前用户可访问的Group ID 列表
me := c.MustGet("user").(*models.User)

View File

@@ -3,10 +3,10 @@ package router
import (
"github.com/ccfos/nightingale/v6/datasource/es"
"github.com/ccfos/nightingale/v6/dscache"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/ccfos/nightingale/v6/pkg/logx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
type IndexReq struct {
@@ -34,7 +34,7 @@ func (rt *Router) QueryIndices(c *gin.Context) {
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logx.Warningf(c.Request.Context(), "cluster:%d not exists", f.DatasourceId)
logger.Warningf("cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
@@ -50,7 +50,7 @@ func (rt *Router) QueryFields(c *gin.Context) {
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logx.Warningf(c.Request.Context(), "cluster:%d not exists", f.DatasourceId)
logger.Warningf("cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
@@ -66,7 +66,7 @@ func (rt *Router) QueryESVariable(c *gin.Context) {
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logx.Warningf(c.Request.Context(), "cluster:%d not exists", f.DatasourceId)
logger.Warningf("cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}

View File

@@ -5,8 +5,8 @@ import (
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
// 创建 ES Index Pattern
@@ -69,10 +69,6 @@ func (rt *Router) esIndexPatternGetList(c *gin.Context) {
lst, err = models.EsIndexPatternGets(rt.Ctx, "")
}
if err == nil {
models.FillUpdateByNicknames(rt.Ctx, lst)
}
ginx.NewRender(c).Data(lst, err)
}

View File

@@ -1,149 +0,0 @@
package router
import (
"encoding/json"
"fmt"
"io"
"net/http"
"strconv"
"time"
"github.com/ccfos/nightingale/v6/alert/naming"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/loggrep"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
)
// eventDetailPage renders an HTML log viewer page (for pages group).
func (rt *Router) eventDetailPage(c *gin.Context) {
hash := ginx.UrlParamStr(c, "hash")
if !loggrep.IsValidHash(hash) {
c.String(http.StatusBadRequest, "invalid hash format")
return
}
logs, instance, err := rt.getEventLogs(hash)
if err != nil {
c.String(http.StatusInternalServerError, "Error: %v", err)
return
}
c.Header("Content-Type", "text/html; charset=utf-8")
err = loggrep.RenderHTML(c.Writer, loggrep.PageData{
Hash: hash,
Instance: instance,
Logs: logs,
Total: len(logs),
})
if err != nil {
c.String(http.StatusInternalServerError, "render error: %v", err)
}
}
// eventDetailJSON returns JSON (for service group).
func (rt *Router) eventDetailJSON(c *gin.Context) {
hash := ginx.UrlParamStr(c, "hash")
if !loggrep.IsValidHash(hash) {
ginx.Bomb(200, "invalid hash format")
}
logs, instance, err := rt.getEventLogs(hash)
ginx.Dangerous(err)
ginx.NewRender(c).Data(loggrep.EventDetailResp{
Logs: logs,
Instance: instance,
}, nil)
}
// getNodeForDatasource returns the alert engine instance responsible for the given
// datasource and primary key. It first checks the local hashring, and falls back
// to querying the database for active instances if the hashring is empty
// (e.g. when the datasource belongs to another engine cluster).
func (rt *Router) getNodeForDatasource(datasourceId int64, pk string) (string, error) {
dsIdStr := strconv.FormatInt(datasourceId, 10)
node, err := naming.DatasourceHashRing.GetNode(dsIdStr, pk)
if err == nil {
return node, nil
}
// Hashring is empty for this datasource (likely belongs to another engine cluster).
// Query the DB for active instances.
servers, dbErr := models.AlertingEngineGetsInstances(rt.Ctx,
"datasource_id = ? and clock > ?",
datasourceId, time.Now().Unix()-30)
if dbErr != nil {
return "", dbErr
}
if len(servers) == 0 {
return "", fmt.Errorf("no active instances for datasource %d", datasourceId)
}
ring := naming.NewConsistentHashRing(int32(naming.NodeReplicas), servers)
return ring.Get(pk)
}
// getEventLogs resolves the target instance and retrieves logs.
func (rt *Router) getEventLogs(hash string) ([]string, string, error) {
event, err := models.AlertHisEventGetByHash(rt.Ctx, hash)
if err != nil {
return nil, "", err
}
if event == nil {
return nil, "", fmt.Errorf("no such alert event")
}
ruleId := strconv.FormatInt(event.RuleId, 10)
instance := fmt.Sprintf("%s:%d", rt.Alert.Heartbeat.IP, rt.HTTP.Port)
node, err := rt.getNodeForDatasource(event.DatasourceId, ruleId)
if err != nil || node == instance {
// hashring not ready or target is self, handle locally
logs, err := loggrep.GrepLogDir(rt.LogDir, hash)
return logs, instance, err
}
// forward to the target alert instance
return rt.forwardEventDetail(node, hash)
}
func (rt *Router) forwardEventDetail(node, hash string) ([]string, string, error) {
url := fmt.Sprintf("http://%s/v1/n9e/event-detail/%s", node, hash)
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, node, err
}
for user, pass := range rt.HTTP.APIForService.BasicAuth {
req.SetBasicAuth(user, pass)
break
}
client := &http.Client{Timeout: 15 * time.Second}
resp, err := client.Do(req)
if err != nil {
return nil, node, fmt.Errorf("forward to %s failed: %v", node, err)
}
defer resp.Body.Close()
body, err := io.ReadAll(io.LimitReader(resp.Body, 10*1024*1024)) // 10MB limit
if err != nil {
return nil, node, err
}
var result struct {
Dat loggrep.EventDetailResp `json:"dat"`
Err string `json:"err"`
}
if err := json.Unmarshal(body, &result); err != nil {
return nil, node, err
}
if result.Err != "" {
return nil, node, fmt.Errorf("%s", result.Err)
}
return result.Dat.Logs, result.Dat.Instance, nil
}

View File

@@ -1,19 +1,14 @@
package router
import (
"encoding/json"
"fmt"
"net/http"
"time"
"github.com/ccfos/nightingale/v6/alert/pipeline/engine"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
"github.com/toolkits/pkg/logger"
)
// 获取事件Pipeline列表
@@ -32,38 +27,18 @@ func (rt *Router) eventPipelinesList(c *gin.Context) {
for _, tid := range pipeline.TeamIds {
pipeline.TeamNames = append(pipeline.TeamNames, ugMap[tid])
}
// 兼容处理:自动填充工作流字段
pipeline.FillWorkflowFields()
}
models.FillUpdateByNicknames(rt.Ctx, pipelines)
gids, err := models.MyGroupIdsMap(rt.Ctx, me.Id)
ginx.Dangerous(err)
if me.IsAdmin() {
for _, pipeline := range pipelines {
if pipeline.TriggerMode == "" {
pipeline.TriggerMode = models.TriggerModeEvent
}
if pipeline.UseCase == "" {
pipeline.UseCase = models.UseCaseEventPipeline
}
}
ginx.NewRender(c).Data(pipelines, nil)
return
}
res := make([]*models.EventPipeline, 0)
for _, pipeline := range pipelines {
if pipeline.TriggerMode == "" {
pipeline.TriggerMode = models.TriggerModeEvent
}
if pipeline.UseCase == "" {
pipeline.UseCase = models.UseCaseEventPipeline
}
for _, tid := range pipeline.TeamIds {
if _, ok := gids[tid]; ok {
res = append(res, pipeline)
@@ -86,15 +61,6 @@ func (rt *Router) getEventPipeline(c *gin.Context) {
err = pipeline.FillTeamNames(rt.Ctx)
ginx.Dangerous(err)
// 兼容处理:自动填充工作流字段
pipeline.FillWorkflowFields()
if pipeline.TriggerMode == "" {
pipeline.TriggerMode = models.TriggerModeEvent
}
if pipeline.UseCase == "" {
pipeline.UseCase = models.UseCaseEventPipeline
}
ginx.NewRender(c).Data(pipeline, nil)
}
@@ -165,9 +131,7 @@ func (rt *Router) tryRunEventPipeline(c *gin.Context) {
var f struct {
EventId int64 `json:"event_id"`
PipelineConfig models.EventPipeline `json:"pipeline_config"`
InputVariables map[string]string `json:"input_variables,omitempty"`
}
ginx.BindJSON(c, &f)
hisEvent, err := models.AlertHisEventGetById(rt.Ctx, f.EventId)
@@ -177,33 +141,30 @@ func (rt *Router) tryRunEventPipeline(c *gin.Context) {
event := hisEvent.ToCur()
lang := c.GetHeader("X-Language")
me := c.MustGet("user").(*models.User)
var result string
for _, p := range f.PipelineConfig.ProcessorConfigs {
processor, err := models.GetProcessorByType(p.Typ, p.Config)
if err != nil {
ginx.Bomb(http.StatusBadRequest, "get processor: %+v err: %+v", p, err)
}
event, result, err = processor.Process(rt.Ctx, event)
if err != nil {
ginx.Bomb(http.StatusBadRequest, "processor: %+v err: %+v", p, err)
}
// 统一使用工作流引擎执行(兼容线性模式和工作流模式)
workflowEngine := engine.NewWorkflowEngine(rt.Ctx)
triggerCtx := &models.WorkflowTriggerContext{
Mode: models.TriggerModeAPI,
TriggerBy: me.Username,
InputsOverrides: f.InputVariables,
}
resultEvent, result, err := workflowEngine.Execute(&f.PipelineConfig, event, triggerCtx)
if err != nil {
ginx.Bomb(http.StatusBadRequest, "pipeline execute error: %v", err)
if event == nil {
ginx.NewRender(c).Data(map[string]interface{}{
"event": event,
"result": i18n.Sprintf(lang, "event is dropped"),
}, nil)
return
}
}
m := map[string]interface{}{
"event": resultEvent,
"result": i18n.Sprintf(lang, result.Message),
"status": result.Status,
"node_results": result.NodeResults,
"event": event,
"result": i18n.Sprintf(lang, result),
}
if resultEvent == nil {
m["result"] = i18n.Sprintf(lang, "event is dropped")
}
ginx.NewRender(c).Data(m, nil)
}
@@ -225,18 +186,14 @@ func (rt *Router) tryRunEventProcessor(c *gin.Context) {
if err != nil {
ginx.Bomb(200, "get processor err: %+v", err)
}
wfCtx := &models.WorkflowContext{
Event: event,
Vars: make(map[string]interface{}),
}
wfCtx, res, err := processor.Process(rt.Ctx, wfCtx)
event, res, err := processor.Process(rt.Ctx, event)
if err != nil {
ginx.Bomb(200, "processor err: %+v", err)
}
lang := c.GetHeader("X-Language")
ginx.NewRender(c).Data(map[string]interface{}{
"event": wfCtx.Event,
"event": event,
"result": i18n.Sprintf(lang, res),
}, nil)
}
@@ -266,10 +223,6 @@ func (rt *Router) tryRunEventProcessorByNotifyRule(c *gin.Context) {
ginx.Bomb(http.StatusBadRequest, "processors not found")
}
wfCtx := &models.WorkflowContext{
Event: event,
Vars: make(map[string]interface{}),
}
for _, pl := range pipelines {
for _, p := range pl.ProcessorConfigs {
processor, err := models.GetProcessorByType(p.Typ, p.Config)
@@ -277,14 +230,14 @@ func (rt *Router) tryRunEventProcessorByNotifyRule(c *gin.Context) {
ginx.Bomb(http.StatusBadRequest, "get processor: %+v err: %+v", p, err)
}
wfCtx, _, err = processor.Process(rt.Ctx, wfCtx)
event, _, err := processor.Process(rt.Ctx, event)
if err != nil {
ginx.Bomb(http.StatusBadRequest, "processor: %+v err: %+v", p, err)
}
if wfCtx == nil || wfCtx.Event == nil {
if event == nil {
lang := c.GetHeader("X-Language")
ginx.NewRender(c).Data(map[string]interface{}{
"event": nil,
"event": event,
"result": i18n.Sprintf(lang, "event is dropped"),
}, nil)
return
@@ -292,348 +245,10 @@ func (rt *Router) tryRunEventProcessorByNotifyRule(c *gin.Context) {
}
}
ginx.NewRender(c).Data(wfCtx.Event, nil)
ginx.NewRender(c).Data(event, nil)
}
func (rt *Router) eventPipelinesListByService(c *gin.Context) {
pipelines, err := models.ListEventPipelines(rt.Ctx)
ginx.NewRender(c).Data(pipelines, err)
}
type EventPipelineRequest struct {
// 事件数据(可选,如果不传则使用空事件)
Event *models.AlertCurEvent `json:"event,omitempty"`
// 输入参数覆盖
InputsOverrides map[string]string `json:"inputs_overrides,omitempty"`
Username string `json:"username,omitempty"`
}
// executePipelineTrigger 执行 Pipeline 触发的公共逻辑
func (rt *Router) executePipelineTrigger(pipeline *models.EventPipeline, req *EventPipelineRequest, triggerBy string) (string, error) {
// 准备事件数据
var event *models.AlertCurEvent
if req.Event != nil {
event = req.Event
} else {
// 创建空事件
event = &models.AlertCurEvent{
TriggerTime: time.Now().Unix(),
}
}
// 生成执行ID
executionID := uuid.New().String()
// 创建触发上下文
triggerCtx := &models.WorkflowTriggerContext{
Mode: models.TriggerModeAPI,
TriggerBy: triggerBy,
InputsOverrides: req.InputsOverrides,
RequestID: executionID,
}
// 异步执行工作流
go func() {
workflowEngine := engine.NewWorkflowEngine(rt.Ctx)
_, _, err := workflowEngine.Execute(pipeline, event, triggerCtx)
if err != nil {
logger.Errorf("async workflow execute error: pipeline_id=%d execution_id=%s err=%v",
pipeline.ID, executionID, err)
}
}()
return executionID, nil
}
// triggerEventPipelineByService Service 调用触发工作流执行
func (rt *Router) triggerEventPipelineByService(c *gin.Context) {
pipelineID := ginx.UrlParamInt64(c, "id")
var f EventPipelineRequest
ginx.BindJSON(c, &f)
// 获取 Pipeline
pipeline, err := models.GetEventPipeline(rt.Ctx, pipelineID)
if err != nil {
ginx.Bomb(http.StatusNotFound, "pipeline not found: %v", err)
}
executionID, err := rt.executePipelineTrigger(pipeline, &f, f.Username)
if err != nil {
ginx.Bomb(http.StatusBadRequest, "%v", err)
}
ginx.NewRender(c).Data(gin.H{
"execution_id": executionID,
"message": "workflow execution started",
}, nil)
}
// triggerEventPipelineByAPI API 触发工作流执行
func (rt *Router) triggerEventPipelineByAPI(c *gin.Context) {
pipelineID := ginx.UrlParamInt64(c, "id")
var f EventPipelineRequest
ginx.BindJSON(c, &f)
// 获取 Pipeline
pipeline, err := models.GetEventPipeline(rt.Ctx, pipelineID)
if err != nil {
ginx.Bomb(http.StatusNotFound, "pipeline not found: %v", err)
}
// 检查权限
me := c.MustGet("user").(*models.User)
ginx.Dangerous(me.CheckGroupPermission(rt.Ctx, pipeline.TeamIds))
executionID, err := rt.executePipelineTrigger(pipeline, &f, me.Username)
if err != nil {
ginx.Bomb(http.StatusBadRequest, err.Error())
}
ginx.NewRender(c).Data(gin.H{
"execution_id": executionID,
"message": "workflow execution started",
}, nil)
}
func (rt *Router) listAllEventPipelineExecutions(c *gin.Context) {
pipelineId := ginx.QueryInt64(c, "pipeline_id", 0)
pipelineName := ginx.QueryStr(c, "pipeline_name", "")
mode := ginx.QueryStr(c, "mode", "")
status := ginx.QueryStr(c, "status", "")
limit := ginx.QueryInt(c, "limit", 20)
offset := ginx.QueryInt(c, "p", 1)
if limit <= 0 || limit > 1000 {
limit = 20
}
if offset <= 0 {
offset = 1
}
executions, total, err := models.ListAllEventPipelineExecutions(rt.Ctx, pipelineId, pipelineName, mode, status, limit, (offset-1)*limit)
ginx.Dangerous(err)
ginx.NewRender(c).Data(gin.H{
"list": executions,
"total": total,
}, nil)
}
func (rt *Router) listEventPipelineExecutions(c *gin.Context) {
pipelineID := ginx.UrlParamInt64(c, "id")
mode := ginx.QueryStr(c, "mode", "")
status := ginx.QueryStr(c, "status", "")
limit := ginx.QueryInt(c, "limit", 20)
offset := ginx.QueryInt(c, "p", 1)
if limit <= 0 || limit > 1000 {
limit = 20
}
if offset <= 0 {
offset = 1
}
executions, total, err := models.ListEventPipelineExecutions(rt.Ctx, pipelineID, mode, status, limit, (offset-1)*limit)
ginx.Dangerous(err)
ginx.NewRender(c).Data(gin.H{
"list": executions,
"total": total,
}, nil)
}
func (rt *Router) getEventPipelineExecution(c *gin.Context) {
execID := ginx.UrlParamStr(c, "exec_id")
detail, err := models.GetEventPipelineExecutionDetail(rt.Ctx, execID)
if err != nil {
ginx.Bomb(http.StatusNotFound, "execution not found: %v", err)
}
ginx.NewRender(c).Data(detail, nil)
}
func (rt *Router) getEventPipelineExecutionStats(c *gin.Context) {
pipelineID := ginx.UrlParamInt64(c, "id")
stats, err := models.GetEventPipelineExecutionStatistics(rt.Ctx, pipelineID)
ginx.Dangerous(err)
ginx.NewRender(c).Data(stats, nil)
}
func (rt *Router) cleanEventPipelineExecutions(c *gin.Context) {
var f struct {
BeforeDays int `json:"before_days"`
}
ginx.BindJSON(c, &f)
if f.BeforeDays <= 0 {
f.BeforeDays = 30
}
beforeTime := time.Now().AddDate(0, 0, -f.BeforeDays).Unix()
affected, err := models.DeleteEventPipelineExecutions(rt.Ctx, beforeTime)
ginx.Dangerous(err)
ginx.NewRender(c).Data(gin.H{
"deleted": affected,
}, nil)
}
func (rt *Router) streamEventPipeline(c *gin.Context) {
pipelineID := ginx.UrlParamInt64(c, "id")
var f EventPipelineRequest
ginx.BindJSON(c, &f)
pipeline, err := models.GetEventPipeline(rt.Ctx, pipelineID)
if err != nil {
ginx.Bomb(http.StatusNotFound, "pipeline not found: %v", err)
}
me := c.MustGet("user").(*models.User)
ginx.Dangerous(me.CheckGroupPermission(rt.Ctx, pipeline.TeamIds))
var event *models.AlertCurEvent
if f.Event != nil {
event = f.Event
} else {
event = &models.AlertCurEvent{
TriggerTime: time.Now().Unix(),
}
}
triggerCtx := &models.WorkflowTriggerContext{
Mode: models.TriggerModeAPI,
TriggerBy: me.Username,
InputsOverrides: f.InputsOverrides,
RequestID: uuid.New().String(),
Stream: true, // 流式端点强制启用流式输出
}
workflowEngine := engine.NewWorkflowEngine(rt.Ctx)
_, result, err := workflowEngine.Execute(pipeline, event, triggerCtx)
if err != nil {
ginx.Bomb(http.StatusInternalServerError, "execute failed: %v", err)
}
if result.Stream && result.StreamChan != nil {
rt.handleStreamResponse(c, result, triggerCtx.RequestID)
return
}
ginx.NewRender(c).Data(result, nil)
}
func (rt *Router) handleStreamResponse(c *gin.Context, result *models.WorkflowResult, requestID string) {
// 设置 SSE 响应头
c.Header("Content-Type", "text/event-stream")
c.Header("Cache-Control", "no-cache")
c.Header("Connection", "keep-alive")
c.Header("X-Accel-Buffering", "no") // 禁用 nginx 缓冲
c.Header("X-Request-ID", requestID)
flusher, ok := c.Writer.(http.Flusher)
if !ok {
ginx.Bomb(http.StatusInternalServerError, "streaming not supported")
return
}
// 发送初始连接成功消息
initData := fmt.Sprintf(`{"type":"connected","request_id":"%s","timestamp":%d}`, requestID, time.Now().UnixMilli())
fmt.Fprintf(c.Writer, "data: %s\n\n", initData)
flusher.Flush()
// 从 channel 读取并发送 SSE
timeout := time.After(30 * time.Minute) // 最长流式输出时间
for {
select {
case chunk, ok := <-result.StreamChan:
if !ok {
// channel 关闭,发送结束标记
return
}
data, err := json.Marshal(chunk)
if err != nil {
logger.Errorf("stream: failed to marshal chunk: %v", err)
continue
}
fmt.Fprintf(c.Writer, "data: %s\n\n", data)
flusher.Flush()
if chunk.Done {
return
}
case <-c.Request.Context().Done():
// 客户端断开连接
logger.Infof("stream: client disconnected, request_id=%s", requestID)
return
case <-timeout:
logger.Errorf("stream: timeout, request_id=%s", requestID)
return
}
}
}
func (rt *Router) streamEventPipelineByService(c *gin.Context) {
pipelineID := ginx.UrlParamInt64(c, "id")
var f EventPipelineRequest
ginx.BindJSON(c, &f)
pipeline, err := models.GetEventPipeline(rt.Ctx, pipelineID)
if err != nil {
ginx.Bomb(http.StatusNotFound, "pipeline not found: %v", err)
}
var event *models.AlertCurEvent
if f.Event != nil {
event = f.Event
} else {
event = &models.AlertCurEvent{
TriggerTime: time.Now().Unix(),
}
}
triggerCtx := &models.WorkflowTriggerContext{
Mode: models.TriggerModeAPI,
TriggerBy: f.Username,
InputsOverrides: f.InputsOverrides,
RequestID: uuid.New().String(),
Stream: true, // 流式端点强制启用流式输出
}
workflowEngine := engine.NewWorkflowEngine(rt.Ctx)
_, result, err := workflowEngine.Execute(pipeline, event, triggerCtx)
if err != nil {
ginx.Bomb(http.StatusInternalServerError, "execute failed: %v", err)
}
// 检查是否是流式输出
if result.Stream && result.StreamChan != nil {
rt.handleStreamResponse(c, result, triggerCtx.RequestID)
return
}
ginx.NewRender(c).Data(result, nil)
}
// eventPipelineExecutionAdd 接收 edge 节点同步的 Pipeline 执行记录
func (rt *Router) eventPipelineExecutionAdd(c *gin.Context) {
var execution models.EventPipelineExecution
ginx.BindJSON(c, &execution)
if execution.ID == "" {
ginx.Bomb(http.StatusBadRequest, "id is required")
}
if execution.PipelineID <= 0 {
ginx.Bomb(http.StatusBadRequest, "pipeline_id is required")
}
ginx.NewRender(c).Message(models.DB(rt.Ctx).Create(&execution).Error)
}

View File

@@ -7,9 +7,9 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
const defaultLimit = 300

View File

@@ -15,9 +15,9 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pushgw/idents"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)

View File

@@ -12,18 +12,17 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/cas"
"github.com/ccfos/nightingale/v6/pkg/dingtalk"
"github.com/ccfos/nightingale/v6/pkg/feishu"
"github.com/ccfos/nightingale/v6/pkg/ldapx"
"github.com/ccfos/nightingale/v6/pkg/logx"
"github.com/ccfos/nightingale/v6/pkg/oauth2x"
"github.com/ccfos/nightingale/v6/pkg/oidcx"
"github.com/ccfos/nightingale/v6/pkg/secu"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/dgrijalva/jwt-go"
"github.com/gin-gonic/gin"
"github.com/pelletier/go-toml/v2"
"github.com/pkg/errors"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
"gorm.io/gorm"
)
@@ -37,9 +36,7 @@ type loginForm struct {
func (rt *Router) loginPost(c *gin.Context) {
var f loginForm
ginx.BindJSON(c, &f)
rctx := c.Request.Context()
logx.Infof(rctx, "username:%s login from:%s", f.Username, c.ClientIP())
logger.Infof("username:%s login from:%s", f.Username, c.ClientIP())
if rt.HTTP.ShowCaptcha.Enable {
if !CaptchaVerify(f.Captchaid, f.Verifyvalue) {
@@ -52,25 +49,23 @@ func (rt *Router) loginPost(c *gin.Context) {
if rt.HTTP.RSA.OpenRSA {
decPassWord, err := secu.Decrypt(f.Password, rt.HTTP.RSA.RSAPrivateKey, rt.HTTP.RSA.RSAPassWord)
if err != nil {
logx.Errorf(rctx, "RSA Decrypt failed: %v username: %s", err, f.Username)
logger.Errorf("RSA Decrypt failed: %v username: %s", err, f.Username)
ginx.NewRender(c).Message(err)
return
}
authPassWord = decPassWord
}
reqCtx := rt.Ctx.WithContext(rctx)
var user *models.User
var err error
lc := rt.Sso.LDAP.Copy()
if lc.Enable {
user, err = ldapx.LdapLogin(reqCtx, f.Username, authPassWord, lc.DefaultRoles, lc.DefaultTeams, lc)
user, err = ldapx.LdapLogin(rt.Ctx, f.Username, authPassWord, lc.DefaultRoles, lc.DefaultTeams, lc)
if err != nil {
logx.Debugf(rctx, "ldap login failed: %v username: %s", err, f.Username)
logger.Debugf("ldap login failed: %v username: %s", err, f.Username)
var errLoginInN9e error
// to use n9e as the minimum guarantee for login
if user, errLoginInN9e = models.PassLogin(reqCtx, rt.Redis, f.Username, authPassWord); errLoginInN9e != nil {
if user, errLoginInN9e = models.PassLogin(rt.Ctx, rt.Redis, f.Username, authPassWord); errLoginInN9e != nil {
ginx.NewRender(c).Message("ldap login failed: %v; n9e login failed: %v", err, errLoginInN9e)
return
}
@@ -78,7 +73,7 @@ func (rt *Router) loginPost(c *gin.Context) {
user.RolesLst = strings.Fields(user.Roles)
}
} else {
user, err = models.PassLogin(reqCtx, rt.Redis, f.Username, authPassWord)
user, err = models.PassLogin(rt.Ctx, rt.Redis, f.Username, authPassWord)
ginx.Dangerous(err)
}
@@ -102,8 +97,7 @@ func (rt *Router) loginPost(c *gin.Context) {
}
func (rt *Router) logoutPost(c *gin.Context) {
rctx := c.Request.Context()
logx.Infof(rctx, "username:%s logout from:%s", c.GetString("username"), c.ClientIP())
logger.Infof("username:%s logout from:%s", c.GetString("username"), c.ClientIP())
metadata, err := rt.extractTokenMetadata(c.Request)
if err != nil {
ginx.NewRender(c, http.StatusBadRequest).Message("failed to parse jwt token")
@@ -122,7 +116,7 @@ func (rt *Router) logoutPost(c *gin.Context) {
// 获取用户的 id_token
idToken, err := rt.fetchIdToken(c.Request.Context(), user.Id)
if err != nil {
logx.Debugf(rctx, "fetch id_token failed: %v, user_id: %d", err, user.Id)
logger.Debugf("fetch id_token failed: %v, user_id: %d", err, user.Id)
idToken = "" // 如果获取失败,使用空字符串
}
@@ -225,7 +219,7 @@ func (rt *Router) refreshPost(c *gin.Context) {
// 注意:这里不会获取新的 id_token只是延长 Redis 中现有 id_token 的 TTL
if idToken, err := rt.fetchIdToken(c.Request.Context(), userid); err == nil && idToken != "" {
if err := rt.saveIdToken(c.Request.Context(), userid, idToken); err != nil {
logx.Debugf(c.Request.Context(), "refresh id_token ttl failed: %v, user_id: %d", err, userid)
logger.Debugf("refresh id_token ttl failed: %v, user_id: %d", err, userid)
}
}
@@ -276,13 +270,12 @@ type CallbackOutput struct {
}
func (rt *Router) loginCallback(c *gin.Context) {
rctx := c.Request.Context()
code := ginx.QueryStr(c, "code", "")
state := ginx.QueryStr(c, "state", "")
ret, err := rt.Sso.OIDC.Callback(rt.Redis, rctx, code, state)
ret, err := rt.Sso.OIDC.Callback(rt.Redis, c.Request.Context(), code, state)
if err != nil {
logx.Errorf(rctx, "sso_callback fail. code:%s, state:%s, get ret: %+v. error: %v", code, state, ret, err)
logger.Errorf("sso_callback fail. code:%s, state:%s, get ret: %+v. error: %v", code, state, ret, err)
ginx.NewRender(c).Data(CallbackOutput{}, err)
return
}
@@ -305,7 +298,7 @@ func (rt *Router) loginCallback(c *gin.Context) {
for _, gid := range rt.Sso.OIDC.DefaultTeams {
err = models.UserGroupMemberAdd(rt.Ctx, gid, user.Id)
if err != nil {
logx.Errorf(rctx, "user:%v UserGroupMemberAdd: %s", user, err)
logger.Errorf("user:%v UserGroupMemberAdd: %s", user, err)
}
}
}
@@ -315,12 +308,12 @@ func (rt *Router) loginCallback(c *gin.Context) {
userIdentity := fmt.Sprintf("%d-%s", user.Id, user.Username)
ts, err := rt.createTokens(rt.HTTP.JWTAuth.SigningKey, userIdentity)
ginx.Dangerous(err)
ginx.Dangerous(rt.createAuth(rctx, userIdentity, ts))
ginx.Dangerous(rt.createAuth(c.Request.Context(), userIdentity, ts))
// 保存 id_token 到 Redis用于登出时使用
if ret.IdToken != "" {
if err := rt.saveIdToken(rctx, user.Id, ret.IdToken); err != nil {
logx.Errorf(rctx, "save id_token failed: %v, user_id: %d", err, user.Id)
if err := rt.saveIdToken(c.Request.Context(), user.Id, ret.IdToken); err != nil {
logger.Errorf("save id_token failed: %v, user_id: %d", err, user.Id)
}
}
@@ -361,7 +354,7 @@ func (rt *Router) loginRedirectCas(c *gin.Context) {
}
if !rt.Sso.CAS.Enable {
logx.Errorf(c.Request.Context(), "cas is not enable")
logger.Error("cas is not enable")
ginx.NewRender(c).Data("", nil)
return
}
@@ -376,18 +369,17 @@ func (rt *Router) loginRedirectCas(c *gin.Context) {
}
func (rt *Router) loginCallbackCas(c *gin.Context) {
rctx := c.Request.Context()
ticket := ginx.QueryStr(c, "ticket", "")
state := ginx.QueryStr(c, "state", "")
ret, err := rt.Sso.CAS.ValidateServiceTicket(rctx, ticket, state, rt.Redis)
ret, err := rt.Sso.CAS.ValidateServiceTicket(c.Request.Context(), ticket, state, rt.Redis)
if err != nil {
logx.Errorf(rctx, "ValidateServiceTicket: %s", err)
logger.Errorf("ValidateServiceTicket: %s", err)
ginx.NewRender(c).Data("", err)
return
}
user, err := models.UserGet(rt.Ctx, "username=?", ret.Username)
if err != nil {
logx.Errorf(rctx, "UserGet: %s", err)
logger.Errorf("UserGet: %s", err)
}
ginx.Dangerous(err)
if user != nil {
@@ -406,10 +398,10 @@ func (rt *Router) loginCallbackCas(c *gin.Context) {
userIdentity := fmt.Sprintf("%d-%s", user.Id, user.Username)
ts, err := rt.createTokens(rt.HTTP.JWTAuth.SigningKey, userIdentity)
if err != nil {
logx.Errorf(rctx, "createTokens: %s", err)
logger.Errorf("createTokens: %s", err)
}
ginx.Dangerous(err)
ginx.Dangerous(rt.createAuth(rctx, userIdentity, ts))
ginx.Dangerous(rt.createAuth(c.Request.Context(), userIdentity, ts))
redirect := "/"
if ret.Redirect != "/login" {
@@ -482,13 +474,12 @@ func (rt *Router) loginRedirectDingTalk(c *gin.Context) {
}
func (rt *Router) loginCallbackDingTalk(c *gin.Context) {
rctx := c.Request.Context()
code := ginx.QueryStr(c, "code", "")
state := ginx.QueryStr(c, "state", "")
ret, err := rt.Sso.DingTalk.Callback(rt.Redis, rctx, code, state)
ret, err := rt.Sso.DingTalk.Callback(rt.Redis, c.Request.Context(), code, state)
if err != nil {
logx.Errorf(rctx, "sso_callback DingTalk fail. code:%s, state:%s, get ret: %+v. error: %v", code, state, ret, err)
logger.Errorf("sso_callback DingTalk fail. code:%s, state:%s, get ret: %+v. error: %v", code, state, ret, err)
ginx.NewRender(c).Data(CallbackOutput{}, err)
return
}
@@ -528,104 +519,13 @@ func (rt *Router) loginCallbackDingTalk(c *gin.Context) {
}
func (rt *Router) loginRedirectFeiShu(c *gin.Context) {
redirect := ginx.QueryStr(c, "redirect", "/")
v, exists := c.Get("userid")
if exists {
userid := v.(int64)
user, err := models.UserGetById(rt.Ctx, userid)
ginx.Dangerous(err)
if user == nil {
ginx.Bomb(200, "user not found")
}
if user.Username != "" { // already login
ginx.NewRender(c).Data(redirect, nil)
return
}
}
if rt.Sso.FeiShu == nil || !rt.Sso.FeiShu.Enable {
ginx.NewRender(c).Data("", nil)
return
}
redirect, err := rt.Sso.FeiShu.Authorize(rt.Redis, redirect)
ginx.Dangerous(err)
ginx.NewRender(c).Data(redirect, err)
}
func (rt *Router) loginCallbackFeiShu(c *gin.Context) {
rctx := c.Request.Context()
code := ginx.QueryStr(c, "code", "")
state := ginx.QueryStr(c, "state", "")
ret, err := rt.Sso.FeiShu.Callback(rt.Redis, rctx, code, state)
if err != nil {
logx.Errorf(rctx, "sso_callback FeiShu fail. code:%s, state:%s, get ret: %+v. error: %v", code, state, ret, err)
ginx.NewRender(c).Data(CallbackOutput{}, err)
return
}
user, err := models.UserGet(rt.Ctx, "username=?", ret.Username)
ginx.Dangerous(err)
if user != nil {
if rt.Sso.FeiShu != nil && rt.Sso.FeiShu.FeiShuConfig != nil && rt.Sso.FeiShu.FeiShuConfig.CoverAttributes {
updatedFields := user.UpdateSsoFields(feishu.SsoTypeName, ret.Nickname, ret.Phone, ret.Email)
ginx.Dangerous(user.Update(rt.Ctx, "update_at", updatedFields...))
}
} else {
user = new(models.User)
defaultRoles := []string{}
defaultUserGroups := []int64{}
if rt.Sso.FeiShu != nil && rt.Sso.FeiShu.FeiShuConfig != nil {
defaultRoles = rt.Sso.FeiShu.FeiShuConfig.DefaultRoles
defaultUserGroups = rt.Sso.FeiShu.FeiShuConfig.DefaultUserGroups
}
user.FullSsoFields(feishu.SsoTypeName, ret.Username, ret.Nickname, ret.Phone, ret.Email, defaultRoles)
ginx.Dangerous(user.Add(rt.Ctx))
if len(defaultUserGroups) > 0 {
err = user.AddToUserGroups(rt.Ctx, defaultUserGroups)
if err != nil {
logx.Errorf(rctx, "sso feishu add user group error %v %v", ret, err)
}
}
}
// set user login state
userIdentity := fmt.Sprintf("%d-%s", user.Id, user.Username)
ts, err := rt.createTokens(rt.HTTP.JWTAuth.SigningKey, userIdentity)
ginx.Dangerous(err)
ginx.Dangerous(rt.createAuth(c.Request.Context(), userIdentity, ts))
redirect := "/"
if ret.Redirect != "/login" {
redirect = ret.Redirect
}
ginx.NewRender(c).Data(CallbackOutput{
Redirect: redirect,
User: user,
AccessToken: ts.AccessToken,
RefreshToken: ts.RefreshToken,
}, nil)
}
func (rt *Router) loginCallbackOAuth(c *gin.Context) {
rctx := c.Request.Context()
code := ginx.QueryStr(c, "code", "")
state := ginx.QueryStr(c, "state", "")
ret, err := rt.Sso.OAuth2.Callback(rt.Redis, rctx, code, state)
ret, err := rt.Sso.OAuth2.Callback(rt.Redis, c.Request.Context(), code, state)
if err != nil {
logx.Debugf(rctx, "sso.callback() get ret %+v error %v", ret, err)
logger.Debugf("sso.callback() get ret %+v error %v", ret, err)
ginx.NewRender(c).Data(CallbackOutput{}, err)
return
}
@@ -669,11 +569,10 @@ type SsoConfigOutput struct {
CasDisplayName string `json:"casDisplayName"`
OauthDisplayName string `json:"oauthDisplayName"`
DingTalkDisplayName string `json:"dingTalkDisplayName"`
FeiShuDisplayName string `json:"feishuDisplayName"`
}
func (rt *Router) ssoConfigNameGet(c *gin.Context) {
var oidcDisplayName, casDisplayName, oauthDisplayName, dingTalkDisplayName, feiShuDisplayName string
var oidcDisplayName, casDisplayName, oauthDisplayName, dingTalkDisplayName string
if rt.Sso.OIDC != nil {
oidcDisplayName = rt.Sso.OIDC.GetDisplayName()
}
@@ -690,16 +589,11 @@ func (rt *Router) ssoConfigNameGet(c *gin.Context) {
dingTalkDisplayName = rt.Sso.DingTalk.GetDisplayName()
}
if rt.Sso.FeiShu != nil {
feiShuDisplayName = rt.Sso.FeiShu.GetDisplayName()
}
ginx.NewRender(c).Data(SsoConfigOutput{
OidcDisplayName: oidcDisplayName,
CasDisplayName: casDisplayName,
OauthDisplayName: oauthDisplayName,
DingTalkDisplayName: dingTalkDisplayName,
FeiShuDisplayName: feiShuDisplayName,
}, nil)
}
@@ -714,7 +608,6 @@ func (rt *Router) ssoConfigGets(c *gin.Context) {
// TODO: dingTalkExist 为了兼容当前前端配置, 后期单点登陆统一调整后不在预先设置默认内容
dingTalkExist := false
feiShuExist := false
for _, config := range lst {
var ssoReqConfig models.SsoConfig
ssoReqConfig.Id = config.Id
@@ -725,10 +618,6 @@ func (rt *Router) ssoConfigGets(c *gin.Context) {
dingTalkExist = true
err := json.Unmarshal([]byte(config.Content), &ssoReqConfig.SettingJson)
ginx.Dangerous(err)
case feishu.SsoTypeName:
feiShuExist = true
err := json.Unmarshal([]byte(config.Content), &ssoReqConfig.SettingJson)
ginx.Dangerous(err)
default:
ssoReqConfig.Content = config.Content
}
@@ -741,11 +630,6 @@ func (rt *Router) ssoConfigGets(c *gin.Context) {
ssoConfig.Name = dingtalk.SsoTypeName
ssoConfigs = append(ssoConfigs, ssoConfig)
}
if !feiShuExist {
var ssoConfig models.SsoConfig
ssoConfig.Name = feishu.SsoTypeName
ssoConfigs = append(ssoConfigs, ssoConfig)
}
ginx.NewRender(c).Data(ssoConfigs, nil)
}
@@ -773,23 +657,6 @@ func (rt *Router) ssoConfigUpdate(c *gin.Context) {
err = f.Update(rt.Ctx)
}
ginx.Dangerous(err)
case feishu.SsoTypeName:
f.Name = ssoConfig.Name
setting, err := json.Marshal(ssoConfig.SettingJson)
ginx.Dangerous(err)
f.Content = string(setting)
f.UpdateAt = time.Now().Unix()
sso, err := f.Query(rt.Ctx)
if !errors.Is(err, gorm.ErrRecordNotFound) {
ginx.Dangerous(err)
}
if errors.Is(err, gorm.ErrRecordNotFound) {
err = f.Create(rt.Ctx)
} else {
f.Id = sso.Id
err = f.Update(rt.Ctx)
}
ginx.Dangerous(err)
default:
f.Id = ssoConfig.Id
f.Name = ssoConfig.Name
@@ -828,14 +695,6 @@ func (rt *Router) ssoConfigUpdate(c *gin.Context) {
rt.Sso.DingTalk = dingtalk.New(config)
}
rt.Sso.DingTalk.Reload(config)
case feishu.SsoTypeName:
var config feishu.Config
err := json.Unmarshal([]byte(f.Content), &config)
ginx.Dangerous(err)
if rt.Sso.FeiShu == nil {
rt.Sso.FeiShu = feishu.New(config)
}
rt.Sso.FeiShu.Reload(config)
}
ginx.NewRender(c).Message(nil)

View File

@@ -12,10 +12,10 @@ import (
"github.com/ccfos/nightingale/v6/pkg/slice"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pkg/tplx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) messageTemplatesAdd(c *gin.Context) {
@@ -154,7 +154,6 @@ func (rt *Router) messageTemplatesGet(c *gin.Context) {
lst, err := models.MessageTemplatesGetBy(rt.Ctx, notifyChannelIdents)
ginx.Dangerous(err)
models.FillUpdateByNicknames(rt.Ctx, lst)
if me.IsAdmin() {
ginx.NewRender(c).Data(lst, nil)

View File

@@ -2,9 +2,9 @@ package router
import (
"github.com/ccfos/nightingale/v6/center/cconf"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) metricsDescGetFile(c *gin.Context) {

View File

@@ -4,9 +4,9 @@ import (
"net/http"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
// no param

View File

@@ -9,9 +9,9 @@ import (
"github.com/ccfos/nightingale/v6/alert/mute"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
)
@@ -22,9 +22,6 @@ func (rt *Router) alertMuteGetsByBG(c *gin.Context) {
query := ginx.QueryStr(c, "query", "")
expired := ginx.QueryInt(c, "expired", -1)
lst, err := models.AlertMuteGets(rt.Ctx, prods, bgid, -1, expired, query)
if err == nil {
models.FillUpdateByNicknames(rt.Ctx, lst)
}
ginx.NewRender(c).Data(lst, err)
}
@@ -50,9 +47,6 @@ func (rt *Router) alertMuteGetsByGids(c *gin.Context) {
}
lst, err := models.AlertMuteGetsByBGIds(rt.Ctx, gids)
if err == nil {
models.FillUpdateByNicknames(rt.Ctx, lst)
}
ginx.NewRender(c).Data(lst, err)
}
@@ -64,9 +58,6 @@ func (rt *Router) alertMuteGets(c *gin.Context) {
disabled := ginx.QueryInt(c, "disabled", -1)
expired := ginx.QueryInt(c, "expired", -1)
lst, err := models.AlertMuteGets(rt.Ctx, prods, bgid, disabled, expired, query)
if err == nil {
models.FillUpdateByNicknames(rt.Ctx, lst)
}
ginx.NewRender(c).Data(lst, err)
}

View File

@@ -11,11 +11,11 @@ import (
"github.com/ccfos/nightingale/v6/center/cstats"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/golang-jwt/jwt"
"github.com/google/uuid"
"github.com/toolkits/pkg/ginx"
)
const (

View File

@@ -6,9 +6,9 @@ import (
"github.com/ccfos/nightingale/v6/alert/sender"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)

View File

@@ -11,8 +11,8 @@ import (
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) notifyChannelsAdd(c *gin.Context) {
@@ -118,9 +118,6 @@ func (rt *Router) notifyChannelGetBy(c *gin.Context) {
func (rt *Router) notifyChannelsGet(c *gin.Context) {
lst, err := models.NotifyChannelsGet(rt.Ctx, "", nil)
if err == nil {
models.FillUpdateByNicknames(rt.Ctx, lst)
}
ginx.NewRender(c).Data(lst, err)
}

View File

@@ -10,10 +10,10 @@ import (
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/tplx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/pelletier/go-toml/v2"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/str"
)

View File

@@ -10,9 +10,9 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/slice"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
@@ -118,7 +118,6 @@ func (rt *Router) notifyRulesGet(c *gin.Context) {
lst, err := models.NotifyRulesGet(rt.Ctx, "", nil)
ginx.Dangerous(err)
models.FillUpdateByNicknames(rt.Ctx, lst)
if me.IsAdmin() {
ginx.NewRender(c).Data(lst, nil)
return
@@ -222,7 +221,7 @@ func SendNotifyChannelMessage(ctx *ctx.Context, userCache *memsto.UserCacheType,
return "", fmt.Errorf("failed to send flashduty notify: %v", err)
}
}
logger.Infof("channel_name: %v, event:%s, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0].Hash, tplContent, customParams, resp, err)
logger.Infof("channel_name: %v, event:%+v, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0], tplContent, customParams, resp, err)
return resp, nil
case "pagerduty":
client, err := models.GetHTTPClient(notifyChannel)
@@ -236,7 +235,7 @@ func SendNotifyChannelMessage(ctx *ctx.Context, userCache *memsto.UserCacheType,
return "", fmt.Errorf("failed to send pagerduty notify: %v", err)
}
}
logger.Infof("channel_name: %v, event:%s, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0].Hash, tplContent, customParams, resp, err)
logger.Infof("channel_name: %v, event:%+v, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0], tplContent, customParams, resp, err)
return resp, nil
case "http":
client, err := models.GetHTTPClient(notifyChannel)
@@ -254,7 +253,7 @@ func SendNotifyChannelMessage(ctx *ctx.Context, userCache *memsto.UserCacheType,
if dispatch.NeedBatchContacts(notifyChannel.RequestConfig.HTTPRequestConfig) || len(sendtos) == 0 {
resp, err = notifyChannel.SendHTTP(events, tplContent, customParams, sendtos, client)
logger.Infof("channel_name: %v, event:%s, sendtos:%+v, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0].Hash, sendtos, tplContent, customParams, resp, err)
logger.Infof("channel_name: %v, event:%+v, sendtos:%+v, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0], sendtos, tplContent, customParams, resp, err)
if err != nil {
return "", fmt.Errorf("failed to send http notify: %v", err)
}
@@ -262,7 +261,7 @@ func SendNotifyChannelMessage(ctx *ctx.Context, userCache *memsto.UserCacheType,
} else {
for i := range sendtos {
resp, err = notifyChannel.SendHTTP(events, tplContent, customParams, []string{sendtos[i]}, client)
logger.Infof("channel_name: %v, event:%s, tplContent:%s, customParams:%v, sendto:%+v, respBody: %v, err: %v", notifyChannel.Name, events[0].Hash, tplContent, customParams, sendtos[i], resp, err)
logger.Infof("channel_name: %v, event:%+v, tplContent:%s, customParams:%v, sendto:%+v, respBody: %v, err: %v", notifyChannel.Name, events[0], tplContent, customParams, sendtos[i], resp, err)
if err != nil {
return "", fmt.Errorf("failed to send http notify: %v", err)
}
@@ -281,7 +280,7 @@ func SendNotifyChannelMessage(ctx *ctx.Context, userCache *memsto.UserCacheType,
return resp, nil
case "script":
resp, _, err := notifyChannel.SendScript(events, tplContent, customParams, sendtos)
logger.Infof("channel_name: %v, event:%s, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0].Hash, tplContent, customParams, resp, err)
logger.Infof("channel_name: %v, event:%+v, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0], tplContent, customParams, resp, err)
return resp, err
default:
logger.Errorf("unsupported request type: %v", notifyChannel.RequestType)

View File

@@ -11,9 +11,9 @@ import (
"github.com/ccfos/nightingale/v6/center/cconf"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/tplx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/str"
)
@@ -25,14 +25,11 @@ func (rt *Router) notifyTplGets(c *gin.Context) {
m[models.EmailSubject] = struct{}{}
lst, err := models.NotifyTplGets(rt.Ctx)
ginx.Dangerous(err)
for i := 0; i < len(lst); i++ {
if _, exists := m[lst[i].Channel]; exists {
lst[i].BuiltIn = true
}
}
models.FillUpdateByNicknames(rt.Ctx, lst)
ginx.NewRender(c).Data(lst, err)
}
@@ -203,9 +200,6 @@ func (rt *Router) messageTemplateGets(c *gin.Context) {
ident := ginx.QueryStr(c, "ident", "")
tpls, err := models.MessageTemplateGets(rt.Ctx, id, name, ident)
if err == nil {
models.FillUpdateByNicknames(rt.Ctx, tpls)
}
ginx.NewRender(c).Data(tpls, err)
}

View File

@@ -3,9 +3,9 @@ package router
import (
"github.com/ccfos/nightingale/v6/datasource/opensearch"
"github.com/ccfos/nightingale/v6/dscache"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)

View File

@@ -2,6 +2,7 @@ package router
import (
"context"
"crypto/tls"
"fmt"
"net"
"net/http"
@@ -12,13 +13,12 @@ import (
"sync"
"time"
"github.com/ccfos/nightingale/v6/pkg/logx"
"github.com/ccfos/nightingale/v6/pkg/poster"
pkgprom "github.com/ccfos/nightingale/v6/pkg/prom"
"github.com/ccfos/nightingale/v6/prom"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/prometheus/common/model"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/net/httplib"
)
@@ -39,16 +39,15 @@ func (rt *Router) promBatchQueryRange(c *gin.Context) {
var f BatchQueryForm
ginx.Dangerous(c.BindJSON(&f))
lst, err := PromBatchQueryRange(c.Request.Context(), rt.PromClients, f)
lst, err := PromBatchQueryRange(rt.PromClients, f)
ginx.NewRender(c).Data(lst, err)
}
func PromBatchQueryRange(ctx context.Context, pc *prom.PromClientMap, f BatchQueryForm) ([]model.Value, error) {
func PromBatchQueryRange(pc *prom.PromClientMap, f BatchQueryForm) ([]model.Value, error) {
var lst []model.Value
cli := pc.GetCli(f.DatasourceId)
if cli == nil {
logx.Warningf(ctx, "no such datasource id: %d", f.DatasourceId)
return lst, fmt.Errorf("no such datasource id: %d", f.DatasourceId)
}
@@ -59,9 +58,8 @@ func PromBatchQueryRange(ctx context.Context, pc *prom.PromClientMap, f BatchQue
Step: time.Duration(item.Step) * time.Second,
}
resp, _, err := cli.QueryRange(ctx, item.Query, r)
resp, _, err := cli.QueryRange(context.Background(), item.Query, r)
if err != nil {
logx.Warningf(ctx, "query range error: query:%s err:%v", item.Query, err)
return lst, err
}
@@ -84,23 +82,22 @@ func (rt *Router) promBatchQueryInstant(c *gin.Context) {
var f BatchInstantForm
ginx.Dangerous(c.BindJSON(&f))
lst, err := PromBatchQueryInstant(c.Request.Context(), rt.PromClients, f)
lst, err := PromBatchQueryInstant(rt.PromClients, f)
ginx.NewRender(c).Data(lst, err)
}
func PromBatchQueryInstant(ctx context.Context, pc *prom.PromClientMap, f BatchInstantForm) ([]model.Value, error) {
func PromBatchQueryInstant(pc *prom.PromClientMap, f BatchInstantForm) ([]model.Value, error) {
var lst []model.Value
cli := pc.GetCli(f.DatasourceId)
if cli == nil {
logx.Warningf(ctx, "no such datasource id: %d", f.DatasourceId)
logger.Warningf("no such datasource id: %d", f.DatasourceId)
return lst, fmt.Errorf("no such datasource id: %d", f.DatasourceId)
}
for _, item := range f.Queries {
resp, _, err := cli.Query(ctx, item.Query, time.Unix(item.Time, 0))
resp, _, err := cli.Query(context.Background(), item.Query, time.Unix(item.Time, 0))
if err != nil {
logx.Warningf(ctx, "query instant error: query:%s err:%v", item.Query, err)
return lst, err
}
@@ -172,15 +169,8 @@ func (rt *Router) dsProxy(c *gin.Context) {
transport, has := transportGet(dsId, ds.UpdatedAt)
if !has {
// 使用 TLS 配置(支持 mTLS
tlsConfig, err := ds.HTTPJson.TLS.TLSConfig()
if err != nil {
c.String(http.StatusInternalServerError, "failed to create TLS config: %s", err.Error())
return
}
transport = &http.Transport{
TLSClientConfig: tlsConfig,
TLSClientConfig: &tls.Config{InsecureSkipVerify: ds.HTTPJson.TLS.SkipTlsVerify},
Proxy: http.ProxyFromEnvironment,
DialContext: (&net.Dialer{
Timeout: time.Duration(ds.HTTPJson.DialTimeout) * time.Millisecond,
@@ -193,7 +183,7 @@ func (rt *Router) dsProxy(c *gin.Context) {
modifyResponse := func(r *http.Response) error {
if r.StatusCode == http.StatusUnauthorized {
logx.Warningf(c.Request.Context(), "proxy path:%s unauthorized access ", c.Request.URL.Path)
logger.Warningf("proxy path:%s unauthorized access ", c.Request.URL.Path)
return fmt.Errorf("unauthorized access")
}

View File

@@ -5,17 +5,14 @@ import (
"sort"
"sync"
"github.com/ccfos/nightingale/v6/alert/eval"
"github.com/ccfos/nightingale/v6/dscache"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/logx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
type CheckDsPermFunc func(c *gin.Context, dsId int64, cate string, q interface{}) bool
var CheckDsPerm CheckDsPermFunc = func(c *gin.Context, dsId int64, cate string, q interface{}) bool {
func CheckDsPerm(c *gin.Context, dsId int64, cate string, q interface{}) bool {
// todo: 后续需要根据 cate 判断是否需要权限
return true
}
@@ -47,7 +44,6 @@ func QueryLogBatchConcurrently(anonymousAccess bool, ctx *gin.Context, f QueryFr
var mu sync.Mutex
var wg sync.WaitGroup
var errs []error
rctx := ctx.Request.Context()
for _, q := range f.Queries {
if !anonymousAccess && !CheckDsPerm(ctx, q.Did, q.DsCate, q) {
@@ -56,27 +52,20 @@ func QueryLogBatchConcurrently(anonymousAccess bool, ctx *gin.Context, f QueryFr
plug, exists := dscache.DsCache.Get(q.DsCate, q.Did)
if !exists {
logx.Warningf(rctx, "cluster:%d not exists query:%+v", q.Did, q)
logger.Warningf("cluster:%d not exists query:%+v", q.Did, q)
return LogResp{}, fmt.Errorf("cluster not exists")
}
// 根据数据源类型对 Query 进行模板渲染处理
err := eval.ExecuteQueryTemplate(q.DsCate, q.Query, nil)
if err != nil {
logx.Warningf(rctx, "query template execute error: %v", err)
return LogResp{}, fmt.Errorf("query template execute error: %v", err)
}
wg.Add(1)
go func(query Query) {
defer wg.Done()
data, total, err := plug.QueryLog(rctx, query.Query)
data, total, err := plug.QueryLog(ctx.Request.Context(), query.Query)
mu.Lock()
defer mu.Unlock()
if err != nil {
errMsg := fmt.Sprintf("query data error: %v query:%v\n ", err, query)
logx.Warningf(rctx, "%s", errMsg)
logger.Warningf(errMsg)
errs = append(errs, err)
return
}
@@ -122,7 +111,6 @@ func QueryDataConcurrently(anonymousAccess bool, ctx *gin.Context, f models.Quer
var mu sync.Mutex
var wg sync.WaitGroup
var errs []error
rctx := ctx.Request.Context()
for _, q := range f.Queries {
if !anonymousAccess && !CheckDsPerm(ctx, f.DatasourceId, f.Cate, q) {
@@ -131,7 +119,7 @@ func QueryDataConcurrently(anonymousAccess bool, ctx *gin.Context, f models.Quer
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logx.Warningf(rctx, "cluster:%d not exists", f.DatasourceId)
logger.Warningf("cluster:%d not exists", f.DatasourceId)
return nil, fmt.Errorf("cluster not exists")
}
@@ -139,16 +127,16 @@ func QueryDataConcurrently(anonymousAccess bool, ctx *gin.Context, f models.Quer
go func(query interface{}) {
defer wg.Done()
data, err := plug.QueryData(rctx, query)
data, err := plug.QueryData(ctx.Request.Context(), query)
if err != nil {
logx.Warningf(rctx, "query data error: req:%+v err:%v", query, err)
logger.Warningf("query data error: req:%+v err:%v", query, err)
mu.Lock()
errs = append(errs, err)
mu.Unlock()
return
}
logx.Debugf(rctx, "query data: req:%+v resp:%+v", query, data)
logger.Debugf("query data: req:%+v resp:%+v", query, data)
mu.Lock()
resp = append(resp, data...)
mu.Unlock()
@@ -194,7 +182,6 @@ func QueryLogConcurrently(anonymousAccess bool, ctx *gin.Context, f models.Query
var mu sync.Mutex
var wg sync.WaitGroup
var errs []error
rctx := ctx.Request.Context()
for _, q := range f.Queries {
if !anonymousAccess && !CheckDsPerm(ctx, f.DatasourceId, f.Cate, q) {
@@ -203,7 +190,7 @@ func QueryLogConcurrently(anonymousAccess bool, ctx *gin.Context, f models.Query
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logx.Warningf(rctx, "cluster:%d not exists query:%+v", f.DatasourceId, f)
logger.Warningf("cluster:%d not exists query:%+v", f.DatasourceId, f)
return LogResp{}, fmt.Errorf("cluster not exists")
}
@@ -211,11 +198,11 @@ func QueryLogConcurrently(anonymousAccess bool, ctx *gin.Context, f models.Query
go func(query interface{}) {
defer wg.Done()
data, total, err := plug.QueryLog(rctx, query)
logx.Debugf(rctx, "query log: req:%+v resp:%+v", query, data)
data, total, err := plug.QueryLog(ctx.Request.Context(), query)
logger.Debugf("query log: req:%+v resp:%+v", query, data)
if err != nil {
errMsg := fmt.Sprintf("query data error: %v query:%v\n ", err, query)
logx.Warningf(rctx, "%s", errMsg)
logger.Warningf(errMsg)
mu.Lock()
errs = append(errs, err)
mu.Unlock()
@@ -253,7 +240,6 @@ func (rt *Router) QueryLogV2(c *gin.Context) {
func (rt *Router) QueryLog(c *gin.Context) {
var f models.QueryParam
ginx.BindJSON(c, &f)
rctx := c.Request.Context()
var resp []interface{}
for _, q := range f.Queries {
@@ -263,13 +249,13 @@ func (rt *Router) QueryLog(c *gin.Context) {
plug, exists := dscache.DsCache.Get("elasticsearch", f.DatasourceId)
if !exists {
logx.Warningf(rctx, "cluster:%d not exists", f.DatasourceId)
logger.Warningf("cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
data, _, err := plug.QueryLog(rctx, q)
data, _, err := plug.QueryLog(c.Request.Context(), q)
if err != nil {
logx.Warningf(rctx, "query data error: %v", err)
logger.Warningf("query data error: %v", err)
ginx.Bomb(200, "err:%v", err)
continue
}

View File

@@ -7,17 +7,14 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) recordingRuleGets(c *gin.Context) {
busiGroupId := ginx.UrlParamInt64(c, "id")
ars, err := models.RecordingRuleGets(rt.Ctx, busiGroupId)
if err == nil {
models.FillUpdateByNicknames(rt.Ctx, ars)
}
ginx.NewRender(c).Data(ars, err)
}
@@ -42,9 +39,6 @@ func (rt *Router) recordingRuleGetsByGids(c *gin.Context) {
}
ars, err := models.RecordingRuleGetsByBGIds(rt.Ctx, gids)
if err == nil {
models.FillUpdateByNicknames(rt.Ctx, ars)
}
ginx.NewRender(c).Data(ars, err)
}
@@ -118,7 +112,6 @@ func (rt *Router) recordingRulePutByFE(c *gin.Context) {
}
rt.bgrwCheck(c, ar.GroupId)
rt.bgroCheck(c, f.GroupId)
f.UpdateBy = c.MustGet("username").(string)
ginx.NewRender(c).Message(ar.Update(rt.Ctx, f))

View File

@@ -6,9 +6,9 @@ import (
"github.com/ccfos/nightingale/v6/center/cconf"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) rolesGets(c *gin.Context) {

View File

@@ -5,8 +5,8 @@ import (
"github.com/ccfos/nightingale/v6/center/cconf"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
)

View File

@@ -1,145 +0,0 @@
package router
import (
"net/http"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/slice"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
)
func (rt *Router) savedViewGets(c *gin.Context) {
page := ginx.QueryStr(c, "page", "")
me := c.MustGet("user").(*models.User)
lst, err := models.SavedViewGets(rt.Ctx, page)
if err != nil {
ginx.NewRender(c).Data(nil, err)
return
}
models.FillUpdateByNicknames(rt.Ctx, lst)
userGids, err := models.MyGroupIds(rt.Ctx, me.Id)
if err != nil {
ginx.NewRender(c).Data(nil, err)
return
}
favoriteMap, err := models.SavedViewFavoriteGetByUserId(rt.Ctx, me.Id)
if err != nil {
ginx.NewRender(c).Data(nil, err)
return
}
favoriteViews := make([]models.SavedView, 0)
normalViews := make([]models.SavedView, 0)
for _, view := range lst {
visible := view.CreateBy == me.Username ||
view.PublicCate == 2 ||
(view.PublicCate == 1 && slice.HaveIntersection[int64](userGids, view.Gids))
if !visible {
continue
}
view.IsFavorite = favoriteMap[view.Id]
// 收藏的排前面
if view.IsFavorite {
favoriteViews = append(favoriteViews, view)
} else {
normalViews = append(normalViews, view)
}
}
ginx.NewRender(c).Data(append(favoriteViews, normalViews...), nil)
}
func (rt *Router) savedViewAdd(c *gin.Context) {
var f models.SavedView
ginx.BindJSON(c, &f)
me := c.MustGet("user").(*models.User)
f.Id = 0
f.CreateBy = me.Username
f.UpdateBy = me.Username
err := models.SavedViewAdd(rt.Ctx, &f)
ginx.NewRender(c).Data(f.Id, err)
}
func (rt *Router) savedViewPut(c *gin.Context) {
id := ginx.UrlParamInt64(c, "id")
view, err := models.SavedViewGetById(rt.Ctx, id)
if err != nil {
ginx.NewRender(c).Data(nil, err)
return
}
if view == nil {
ginx.NewRender(c, http.StatusNotFound).Message("saved view not found")
return
}
me := c.MustGet("user").(*models.User)
// 只有创建者可以更新
if view.CreateBy != me.Username && !me.IsAdmin() {
ginx.NewRender(c, http.StatusForbidden).Message("forbidden")
return
}
var f models.SavedView
ginx.BindJSON(c, &f)
view.Name = f.Name
view.Filter = f.Filter
view.PublicCate = f.PublicCate
view.Gids = f.Gids
err = models.SavedViewUpdate(rt.Ctx, view, me.Username)
ginx.NewRender(c).Message(err)
}
func (rt *Router) savedViewDel(c *gin.Context) {
id := ginx.UrlParamInt64(c, "id")
view, err := models.SavedViewGetById(rt.Ctx, id)
if err != nil {
ginx.NewRender(c).Data(nil, err)
return
}
if view == nil {
ginx.NewRender(c, http.StatusNotFound).Message("saved view not found")
return
}
me := c.MustGet("user").(*models.User)
// 只有创建者或管理员可以删除
if view.CreateBy != me.Username && !me.IsAdmin() {
ginx.NewRender(c, http.StatusForbidden).Message("forbidden")
return
}
err = models.SavedViewDel(rt.Ctx, id)
ginx.NewRender(c).Message(err)
}
func (rt *Router) savedViewFavoriteAdd(c *gin.Context) {
id := ginx.UrlParamInt64(c, "id")
me := c.MustGet("user").(*models.User)
err := models.UserViewFavoriteAdd(rt.Ctx, id, me.Id)
ginx.NewRender(c).Message(err)
}
func (rt *Router) savedViewFavoriteDel(c *gin.Context) {
id := ginx.UrlParamInt64(c, "id")
me := c.MustGet("user").(*models.User)
err := models.UserViewFavoriteDel(rt.Ctx, id, me.Id)
ginx.NewRender(c).Message(err)
}

View File

@@ -5,10 +5,10 @@ import (
"github.com/ccfos/nightingale/v6/pkg/flashduty"
"github.com/ccfos/nightingale/v6/pkg/ormx"
"github.com/ccfos/nightingale/v6/pkg/secu"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/google/uuid"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)

View File

@@ -4,9 +4,9 @@ import (
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) serversGet(c *gin.Context) {

View File

@@ -5,10 +5,10 @@ import (
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/google/uuid"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
// sourceTokenAdd 生成新的源令牌

View File

@@ -13,10 +13,10 @@ import (
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pushgw/idents"
"github.com/ccfos/nightingale/v6/storage"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/prometheus/common/model"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
@@ -38,16 +38,6 @@ func (rt *Router) targetGetsByHostFilter(c *gin.Context) {
total, err := models.TargetCountByFilter(rt.Ctx, query)
ginx.Dangerous(err)
models.FillTargetsBeatTime(rt.Redis, hosts)
now := time.Now().Unix()
for i := 0; i < len(hosts); i++ {
if now-hosts[i].BeatTime < 60 {
hosts[i].TargetUp = 2
} else if now-hosts[i].BeatTime < 180 {
hosts[i].TargetUp = 1
}
}
ginx.NewRender(c).Data(gin.H{
"list": hosts,
"total": total,
@@ -91,24 +81,9 @@ func (rt *Router) targetGets(c *gin.Context) {
models.BuildTargetWhereWithBgids(bgids),
models.BuildTargetWhereWithDsIds(dsIds),
models.BuildTargetWhereWithQuery(query),
models.BuildTargetWhereWithDowntime(downtime),
models.BuildTargetWhereWithHosts(hosts),
}
// downtime 筛选:从缓存获取心跳时间,选择较小的集合用 IN 或 NOT IN 过滤
if downtime != 0 {
downtimeOpt, hasMatch := rt.downtimeFilter(downtime)
if !hasMatch {
ginx.NewRender(c).Data(gin.H{
"list": []*models.Target{},
"total": 0,
}, nil)
return
}
if downtimeOpt != nil {
options = append(options, downtimeOpt)
}
}
total, err := models.TargetTotal(rt.Ctx, options...)
ginx.Dangerous(err)
@@ -127,17 +102,14 @@ func (rt *Router) targetGets(c *gin.Context) {
now := time.Now()
cache := make(map[int64]*models.BusiGroup)
// 从 Redis 补全 BeatTime
models.FillTargetsBeatTime(rt.Redis, list)
var keys []string
for i := 0; i < len(list); i++ {
ginx.Dangerous(list[i].FillGroup(rt.Ctx, cache))
keys = append(keys, models.WrapIdent(list[i].Ident))
if now.Unix()-list[i].BeatTime < 60 {
if now.Unix()-list[i].UpdateAt < 60 {
list[i].TargetUp = 2
} else if now.Unix()-list[i].BeatTime < 180 {
} else if now.Unix()-list[i].UpdateAt < 180 {
list[i].TargetUp = 1
}
}
@@ -176,43 +148,6 @@ func (rt *Router) targetGets(c *gin.Context) {
}, nil)
}
// downtimeFilter 从缓存获取心跳时间,生成 downtime 筛选条件
// 选择匹配集和非匹配集中较小的一方,用 IN 或 NOT IN 来减少 SQL 参数量
// 返回值:
// - option: 筛选条件nil 表示所有 target 都符合条件(无需过滤)
// - hasMatch: 是否有符合条件的 targetfalse 表示无匹配应返回空结果
func (rt *Router) downtimeFilter(downtime int64) (option models.BuildTargetWhereOption, hasMatch bool) {
now := time.Now().Unix()
targets := rt.TargetCache.GetAll()
var matchIdents, nonMatchIdents []string
for _, target := range targets {
matched := false
if downtime > 0 {
matched = target.BeatTime < now-downtime
} else if downtime < 0 {
matched = target.BeatTime > now+downtime
}
if matched {
matchIdents = append(matchIdents, target.Ident)
} else {
nonMatchIdents = append(nonMatchIdents, target.Ident)
}
}
if len(matchIdents) == 0 {
return nil, false
}
if len(nonMatchIdents) == 0 {
return nil, true
}
if len(matchIdents) <= len(nonMatchIdents) {
return models.BuildTargetWhereWithIdents(matchIdents), true
}
return models.BuildTargetWhereExcludeIdents(nonMatchIdents), true
}
func (rt *Router) targetExtendInfoByIdent(c *gin.Context) {
ident := ginx.QueryStr(c, "ident", "")
key := models.WrapExtendIdent(ident)

View File

@@ -7,9 +7,9 @@ import (
"github.com/ccfos/nightingale/v6/alert/sender"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
)

View File

@@ -8,9 +8,9 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
"github.com/toolkits/pkg/str"
)
@@ -25,7 +25,6 @@ func (rt *Router) taskTplGets(c *gin.Context) {
list, err := models.TaskTplGets(rt.Ctx, []int64{groupId}, query, limit, ginx.Offset(c, limit))
ginx.Dangerous(err)
models.FillUpdateByNicknames(rt.Ctx, list)
ginx.NewRender(c).Data(gin.H{
"total": total,
@@ -61,7 +60,6 @@ func (rt *Router) taskTplGetsByGids(c *gin.Context) {
list, err := models.TaskTplGets(rt.Ctx, gids, query, limit, ginx.Offset(c, limit))
ginx.Dangerous(err)
models.FillUpdateByNicknames(rt.Ctx, list)
ginx.NewRender(c).Data(gin.H{
"total": total,

View File

@@ -2,14 +2,13 @@ package router
import (
"fmt"
"net/http"
"github.com/ccfos/nightingale/v6/center/cconf"
"github.com/ccfos/nightingale/v6/datasource/tdengine"
"github.com/ccfos/nightingale/v6/dscache"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"net/http"
)
type databasesQueryForm struct {

View File

@@ -1,136 +0,0 @@
package router
import (
"encoding/json"
"fmt"
"io"
"net/http"
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/ccfos/nightingale/v6/pkg/loggrep"
"github.com/toolkits/pkg/logger"
"github.com/gin-gonic/gin"
)
// traceLogsPage renders an HTML log viewer page for trace logs.
func (rt *Router) traceLogsPage(c *gin.Context) {
traceId := ginx.UrlParamStr(c, "traceid")
if !loggrep.IsValidTraceID(traceId) {
c.String(http.StatusBadRequest, "invalid trace id format")
return
}
logs, instance, err := rt.getTraceLogs(traceId)
if err != nil {
c.String(http.StatusInternalServerError, "Error: %v", err)
return
}
c.Header("Content-Type", "text/html; charset=utf-8")
err = loggrep.RenderTraceLogsHTML(c.Writer, loggrep.TraceLogsPageData{
TraceID: traceId,
Instance: instance,
Logs: logs,
Total: len(logs),
})
if err != nil {
c.String(http.StatusInternalServerError, "render error: %v", err)
}
}
// traceLogsJSON returns JSON for trace logs.
func (rt *Router) traceLogsJSON(c *gin.Context) {
traceId := ginx.UrlParamStr(c, "traceid")
if !loggrep.IsValidTraceID(traceId) {
ginx.Bomb(200, "invalid trace id format")
}
logs, instance, err := rt.getTraceLogs(traceId)
ginx.Dangerous(err)
ginx.NewRender(c).Data(loggrep.EventDetailResp{
Logs: logs,
Instance: instance,
}, nil)
}
// getTraceLogs finds the same-engine instances and queries each one
// until trace logs are found. Trace logs belong to a single instance.
func (rt *Router) getTraceLogs(traceId string) ([]string, string, error) {
keyword := "trace_id=" + traceId
instance := fmt.Sprintf("%s:%d", rt.Alert.Heartbeat.IP, rt.HTTP.Port)
engineName := rt.Alert.Heartbeat.EngineName
// try local first
logs, err := loggrep.GrepLatestLogFiles(rt.LogDir, keyword)
if err == nil && len(logs) > 0 {
return logs, instance, nil
}
// find all instances with the same engineName
servers, err := models.AlertingEngineGetsInstances(rt.Ctx,
"engine_cluster = ? and clock > ?",
engineName, time.Now().Unix()-30)
if err != nil {
return nil, "", err
}
// loop through remote instances until we find logs
for _, node := range servers {
if node == instance {
continue // already tried local
}
logs, nodeAddr, err := rt.forwardTraceLogs(node, traceId)
if err != nil {
logger.Errorf("forwardTraceLogs failed: %v", err)
continue
}
if len(logs) > 0 {
return logs, nodeAddr, nil
}
}
return nil, instance, nil
}
func (rt *Router) forwardTraceLogs(node, traceId string) ([]string, string, error) {
url := fmt.Sprintf("http://%s/v1/n9e/trace-logs/%s", node, traceId)
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, node, err
}
for user, pass := range rt.HTTP.APIForService.BasicAuth {
req.SetBasicAuth(user, pass)
break
}
client := &http.Client{Timeout: 15 * time.Second}
resp, err := client.Do(req)
if err != nil {
return nil, node, fmt.Errorf("forward to %s failed: %v", node, err)
}
defer resp.Body.Close()
body, err := io.ReadAll(io.LimitReader(resp.Body, 10*1024*1024))
if err != nil {
return nil, node, err
}
var result struct {
Dat loggrep.EventDetailResp `json:"dat"`
Err string `json:"err"`
}
if err := json.Unmarshal(body, &result); err != nil {
return nil, node, err
}
if result.Err != "" {
return nil, node, fmt.Errorf("%s", result.Err)
}
return result.Dat.Logs, result.Dat.Instance, nil
}

View File

@@ -9,9 +9,9 @@ import (
"github.com/ccfos/nightingale/v6/pkg/flashduty"
"github.com/ccfos/nightingale/v6/pkg/ormx"
"github.com/ccfos/nightingale/v6/pkg/secu"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
"gorm.io/gorm"
)

View File

@@ -7,9 +7,9 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/flashduty"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
@@ -27,9 +27,6 @@ func (rt *Router) userGroupGets(c *gin.Context) {
me := c.MustGet("user").(*models.User)
lst, err := me.UserGroups(rt.Ctx, limit, query)
if err == nil {
models.FillUpdateByNicknames(rt.Ctx, lst)
}
ginx.NewRender(c).Data(lst, err)
}

View File

@@ -5,9 +5,9 @@ import (
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) userVariableConfigGets(context *gin.Context) {

View File

@@ -12,7 +12,6 @@ import (
"github.com/ccfos/nightingale/v6/pkg/cas"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/dingtalk"
"github.com/ccfos/nightingale/v6/pkg/feishu"
"github.com/ccfos/nightingale/v6/pkg/ldapx"
"github.com/ccfos/nightingale/v6/pkg/oauth2x"
"github.com/ccfos/nightingale/v6/pkg/oidcx"
@@ -28,7 +27,6 @@ type SsoClient struct {
CAS *cas.SsoClient
OAuth2 *oauth2x.SsoClient
DingTalk *dingtalk.SsoClient
FeiShu *feishu.SsoClient
LastUpdateTime int64
configCache *memsto.ConfigCache
configLastUpdateTime int64
@@ -205,13 +203,6 @@ func Init(center cconf.Center, ctx *ctx.Context, configCache *memsto.ConfigCache
log.Fatalf("init %s failed: %s", dingtalk.SsoTypeName, err)
}
ssoClient.DingTalk = dingtalk.New(config)
case feishu.SsoTypeName:
var config feishu.Config
err := json.Unmarshal([]byte(cfg.Content), &config)
if err != nil {
log.Fatalf("init %s failed: %s", feishu.SsoTypeName, err)
}
ssoClient.FeiShu = feishu.New(config)
}
}
@@ -300,22 +291,6 @@ func (s *SsoClient) reload(ctx *ctx.Context) error {
s.DingTalk = nil
}
if feiShuConfig, ok := ssoConfigMap[feishu.SsoTypeName]; ok {
var config feishu.Config
err := json.Unmarshal([]byte(feiShuConfig.Content), &config)
if err != nil {
logger.Warningf("reload %s failed: %s", feishu.SsoTypeName, err)
} else {
if s.FeiShu != nil {
s.FeiShu.Reload(config)
} else {
s.FeiShu = feishu.New(config)
}
}
} else {
s.FeiShu = nil
}
s.LastUpdateTime = lastUpdateTime
s.configLastUpdateTime = lastCacheUpdateTime
return nil

View File

@@ -71,10 +71,7 @@ CREATE TABLE `datasource`
`updated_at` bigint not null default 0,
`updated_by` varchar(64) not null default '',
PRIMARY KEY (`id`)
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
-- datasource add weight field
alter table `datasource` add `weight` int not null default 0;
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
CREATE TABLE `builtin_cate` (
`id` bigint unsigned not null auto_increment,

View File

@@ -87,7 +87,7 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
alert.Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache,
alertRuleCache, notifyConfigCache, taskTplsCache, dsCache, ctx, promClients, userCache, userGroupCache, notifyRuleCache, notifyChannelCache, messageTemplateCache, configCvalCache)
alertrtRouter := alertrt.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors, config.Log.Dir)
alertrtRouter := alertrt.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors)
alertrtRouter.Config(r)

View File

@@ -1,67 +0,0 @@
package cron
import (
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/robfig/cron/v3"
"github.com/toolkits/pkg/logger"
)
const (
defaultBatchSize = 100 // 每批删除数量
defaultSleepMs = 10 // 每批删除后休眠时间(毫秒)
)
// cleanPipelineExecutionInBatches 分批删除执行记录,避免大批量删除影响数据库性能
func cleanPipelineExecutionInBatches(ctx *ctx.Context, day int) {
threshold := time.Now().Unix() - 86400*int64(day)
var totalDeleted int64
for {
deleted, err := models.DeleteEventPipelineExecutionsInBatches(ctx, threshold, defaultBatchSize)
if err != nil {
logger.Errorf("Failed to clean pipeline execution records in batch: %v", err)
return
}
totalDeleted += deleted
// 如果本批删除数量小于 batchSize说明已删除完毕
if deleted < int64(defaultBatchSize) {
break
}
// 休眠一段时间,降低数据库压力
time.Sleep(time.Duration(defaultSleepMs) * time.Millisecond)
}
if totalDeleted > 0 {
logger.Infof("Cleaned %d pipeline execution records older than %d days", totalDeleted, day)
}
}
// CleanPipelineExecution starts a cron job to clean old pipeline execution records in batches
// Runs daily at 6:00 AM
// day: 数据保留天数,默认 7 天
// 使用分批删除方式,每批 100 条,间隔 10ms避免大批量删除影响数据库性能
func CleanPipelineExecution(ctx *ctx.Context, day int) {
c := cron.New()
if day < 1 {
day = 7 // default retention: 7 days
}
_, err := c.AddFunc("0 6 * * *", func() {
cleanPipelineExecutionInBatches(ctx, day)
})
if err != nil {
logger.Errorf("Failed to add clean pipeline execution cron job: %v", err)
return
}
c.Start()
logger.Infof("Pipeline execution cleanup cron started, retention: %d days, batch_size: %d, sleep_ms: %d", day, defaultBatchSize, defaultSleepMs)
}

View File

@@ -12,8 +12,6 @@ import (
"github.com/mitchellh/mapstructure"
"github.com/toolkits/pkg/logger"
"github.com/ccfos/nightingale/v6/pkg/logx"
)
const (
@@ -180,9 +178,14 @@ func (c *Clickhouse) QueryData(ctx context.Context, query interface{}) ([]models
rows, err := c.QueryTimeseries(ctx, ckQueryParam)
if err != nil {
logx.Warningf(ctx, "query:%+v get data err:%v", ckQueryParam, err)
logger.Warningf("query:%+v get data err:%v", ckQueryParam, err)
return nil, err
}
if err != nil {
logger.Warningf("query:%+v get data err:%v", ckQueryParam, err)
return []models.DataResp{}, err
}
data := make([]models.DataResp, 0)
for i := range rows {
data = append(data, models.DataResp{
@@ -211,7 +214,7 @@ func (c *Clickhouse) QueryLog(ctx context.Context, query interface{}) ([]interfa
rows, err := c.Query(ctx, ckQueryParam)
if err != nil {
logx.Warningf(ctx, "query:%+v get data err:%v", ckQueryParam, err)
logger.Warningf("query:%+v get data err:%v", ckQueryParam, err)
return nil, 0, err
}

View File

@@ -17,7 +17,6 @@ import (
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/logx"
)
type FixedField string
@@ -27,10 +26,6 @@ const (
FieldId FixedField = "_id"
)
// LabelSeparator 用于分隔多个标签的分隔符
// 使用 ASCII 控制字符 Record Separator (0x1E),避免与用户数据中的 "--" 冲突
const LabelSeparator = "\x1e"
type Query struct {
Ref string `json:"ref" mapstructure:"ref"`
IndexType string `json:"index_type" mapstructure:"index_type"` // 普通索引:index 索引模式:index_pattern
@@ -133,7 +128,7 @@ func TransferData(metric, ref string, m map[string][][]float64) []models.DataRes
}
data.Metric["__name__"] = model.LabelValue(metric)
labels := strings.Split(k, LabelSeparator)
labels := strings.Split(k, "--")
for _, label := range labels {
arr := strings.SplitN(label, "=", 2)
if len(arr) == 2 {
@@ -202,7 +197,7 @@ func GetBuckets(labelKey string, keys []string, arr []interface{}, metrics *Metr
case json.Number, string:
if !getTs {
if labels != "" {
newlabels = fmt.Sprintf("%s%s%s=%v", labels, LabelSeparator, labelKey, keyValue)
newlabels = fmt.Sprintf("%s--%s=%v", labels, labelKey, keyValue)
} else {
newlabels = fmt.Sprintf("%s=%v", labelKey, keyValue)
}
@@ -391,8 +386,8 @@ func QueryData(ctx context.Context, queryParam interface{}, cliTimeout int64, ve
now := time.Now().Unix()
var start, end int64
if param.End != 0 && param.Start != 0 {
end = param.End
start = param.Start
end = param.End - param.End%param.Interval
start = param.Start - param.Start%param.Interval
} else {
end = now
start = end - param.Interval
@@ -544,7 +539,7 @@ func QueryData(ctx context.Context, queryParam interface{}, cliTimeout int64, ve
source, _ := queryString.Source()
b, _ := json.Marshal(source)
logx.Debugf(ctx, "query_data q:%+v indexArr:%+v tsAggr:%+v query_string:%s", param, indexArr, tsAggr, string(b))
logger.Debugf("query_data q:%+v indexArr:%+v tsAggr:%+v query_string:%s", param, indexArr, tsAggr, string(b))
searchSource := elastic.NewSearchSource().
Query(queryString).
@@ -552,29 +547,21 @@ func QueryData(ctx context.Context, queryParam interface{}, cliTimeout int64, ve
searchSourceString, err := searchSource.Source()
if err != nil {
logx.Warningf(ctx, "query_data searchSource:%s to string error:%v", searchSourceString, err)
logger.Warningf("query_data searchSource:%s to string error:%v", searchSourceString, err)
}
jsonSearchSource, err := json.Marshal(searchSourceString)
if err != nil {
logx.Warningf(ctx, "query_data searchSource:%s to json error:%v", searchSourceString, err)
logger.Warningf("query_data searchSource:%s to json error:%v", searchSourceString, err)
}
result, err := search(ctx, indexArr, searchSource, param.Timeout, param.MaxShard)
if err != nil {
logx.Warningf(ctx, "query_data searchSource:%s query_data error:%v", searchSourceString, err)
logger.Warningf("query_data searchSource:%s query_data error:%v", searchSourceString, err)
return nil, err
}
// 检查是否有 shard failures有部分数据时仅记录警告继续处理
if shardErr := checkShardFailures(ctx, result.Shards, "query_data", searchSourceString); shardErr != nil {
if len(result.Aggregations["ts"]) == 0 {
return nil, shardErr
}
// 有部分数据checkShardFailures 已记录警告,继续处理
}
logx.Infof(ctx, "query_data searchSource:%s resp:%s", string(jsonSearchSource), string(result.Aggregations["ts"]))
logger.Debugf("query_data searchSource:%s resp:%s", string(jsonSearchSource), string(result.Aggregations["ts"]))
js, err := simplejson.NewJson(result.Aggregations["ts"])
if err != nil {
@@ -611,40 +598,6 @@ func QueryData(ctx context.Context, queryParam interface{}, cliTimeout int64, ve
return items, nil
}
// checkShardFailures 检查 ES 查询结果中的 shard failures返回格式化的错误信息
func checkShardFailures(ctx context.Context, shards *elastic.ShardsInfo, logPrefix string, queryContext interface{}) error {
if shards == nil || shards.Failed == 0 || len(shards.Failures) == 0 {
return nil
}
var failureReasons []string
for _, failure := range shards.Failures {
reason := ""
if failure.Reason != nil {
if reasonType, ok := failure.Reason["type"].(string); ok {
reason = reasonType
}
if reasonMsg, ok := failure.Reason["reason"].(string); ok {
if reason != "" {
reason += ": " + reasonMsg
} else {
reason = reasonMsg
}
}
}
if reason != "" {
failureReasons = append(failureReasons, fmt.Sprintf("index=%s shard=%d: %s", failure.Index, failure.Shard, reason))
}
}
if len(failureReasons) > 0 {
errMsg := fmt.Sprintf("elasticsearch shard failures (%d/%d failed): %s", shards.Failed, shards.Total, strings.Join(failureReasons, "; "))
logx.Warningf(ctx, "%s query:%v %s", logPrefix, queryContext, errMsg)
return fmt.Errorf("%s", errMsg)
}
return nil
}
func HitFilter(typ string) bool {
switch typ {
case "keyword", "date", "long", "integer", "short", "byte", "double", "float", "half_float", "scaled_float", "unsigned_long":
@@ -721,34 +674,28 @@ func QueryLog(ctx context.Context, queryParam interface{}, timeout int64, versio
} else {
source = source.From(param.P).Sort(param.DateField, param.Ascending)
}
sourceBytes, _ := json.Marshal(source)
result, err := search(ctx, indexArr, source, param.Timeout, param.MaxShard)
if err != nil {
logx.Warningf(ctx, "query_log source:%s error:%v", string(sourceBytes), err)
logger.Warningf("query data error:%v", err)
return nil, 0, err
}
// 检查是否有 shard failures有部分数据时仅记录警告继续处理
if shardErr := checkShardFailures(ctx, result.Shards, "query_log", string(sourceBytes)); shardErr != nil {
if len(result.Hits.Hits) == 0 {
return nil, 0, shardErr
}
// 有部分数据checkShardFailures 已记录警告,继续处理
}
total := result.TotalHits()
var ret []interface{}
logx.Debugf(ctx, "query_log source:%s len:%d total:%d", string(sourceBytes), len(result.Hits.Hits), total)
b, _ := json.Marshal(source)
logger.Debugf("query data result query source:%s len:%d total:%d", string(b), len(result.Hits.Hits), total)
resultBytes, _ := json.Marshal(result)
logx.Debugf(ctx, "query_log source:%s result:%s", string(sourceBytes), string(resultBytes))
logger.Debugf("query data result query source:%s result:%s", string(b), string(resultBytes))
if strings.HasPrefix(version, "6") {
for i := 0; i < len(result.Hits.Hits); i++ {
var x map[string]interface{}
err := json.Unmarshal(result.Hits.Hits[i].Source, &x)
if err != nil {
logx.Warningf(ctx, "Unmarshal source error:%v", err)
logger.Warningf("Unmarshal source error:%v", err)
continue
}

View File

@@ -67,13 +67,6 @@ func init() {
PluginType: "pgsql",
PluginTypeName: "PostgreSQL",
}
DatasourceTypes[7] = DatasourceType{
Id: 7,
Category: "logging",
PluginType: "victorialogs",
PluginTypeName: "VictoriaLogs",
}
}
type NewDatasourceFn func(settings map[string]interface{}) (Datasource, error)
@@ -133,5 +126,4 @@ type DatasourceInfo struct {
CreatedAt int64 `json:"created_at"`
UpdatedAt int64 `json:"updated_at"`
IsDefault bool `json:"is_default"`
Weight int `json:"weight"`
}

Some files were not shown because too many files have changed in this diff Show More