mirror of
https://github.com/ccfos/nightingale.git
synced 2026-03-03 06:29:16 +00:00
Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9867a9519c |
@@ -31,9 +31,7 @@
|
||||
|
||||
Nightingale is an open-source monitoring project that focuses on alerting. Similar to Grafana, Nightingale also connects with various existing data sources. However, while Grafana emphasizes visualization, Nightingale places greater emphasis on the alerting engine, as well as the processing and distribution of alarms.
|
||||
|
||||
> 💡 Nightingale has now officially launched the [MCP-Server](https://github.com/n9e/n9e-mcp-server/). This MCP Server enables AI assistants to interact with the Nightingale API using natural language, facilitating alert management, monitoring, and observability tasks.
|
||||
>
|
||||
> The Nightingale project was initially developed and open-sourced by DiDi.inc. On May 11, 2022, it was donated to the Open Source Development Committee of the China Computer Federation (CCF ODTC).
|
||||
> The Nightingale project was initially developed and open-sourced by DiDi.inc. On May 11, 2022, it was donated to the Open Source Development Committee of the China Computer Federation (CCF ODC).
|
||||
|
||||

|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
<img src="doc/img/Nightingale_L_V.png" alt="nightingale - cloud native monitoring" width="100" /></a>
|
||||
</p>
|
||||
<p align="center">
|
||||
<b>开源监控告警管理专家</b>
|
||||
<b>开源告警管理专家</b>
|
||||
</p>
|
||||
|
||||
<p align="center">
|
||||
@@ -33,8 +33,7 @@
|
||||
|
||||
夜莺侧重于监控告警,类似于 Grafana 的数据源集成方式,夜莺也是对接多种既有的数据源,不过 Grafana 侧重于可视化,夜莺则是侧重于告警引擎、告警事件的处理和分发。
|
||||
|
||||
> - 💡夜莺正式推出了 [MCP-Server](https://github.com/n9e/n9e-mcp-server/),此 MCP Server 允许 AI 助手通过自然语言与夜莺 API 交互,实现告警管理、监控和可观测性任务。
|
||||
> - 夜莺监控项目,最初由滴滴开发和开源,并于 2022 年 5 月 11 日,捐赠予中国计算机学会开源发展技术委员会(CCF ODTC),为 CCF ODTC 成立后接受捐赠的第一个开源项目。
|
||||
> 夜莺监控项目,最初由滴滴开发和开源,并于 2022 年 5 月 11 日,捐赠予中国计算机学会开源发展技术委员会(CCF ODTC),为 CCF ODTC 成立后接受捐赠的第一个开源项目。
|
||||
|
||||

|
||||
|
||||
|
||||
@@ -79,7 +79,7 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
|
||||
|
||||
r := httpx.GinEngine(config.Global.RunMode, config.HTTP,
|
||||
configCvalCache.PrintBodyPaths, configCvalCache.PrintAccessLog)
|
||||
rt := router.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors, config.Log.Dir)
|
||||
rt := router.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors)
|
||||
|
||||
if config.Ibex.Enable {
|
||||
ibex.ServerStart(false, nil, redis, config.HTTP.APIForService.BasicAuth, config.Alert.Heartbeat, &config.CenterApi, r, nil, config.Ibex, config.HTTP.Port)
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/aconf"
|
||||
"github.com/ccfos/nightingale/v6/alert/common"
|
||||
"github.com/ccfos/nightingale/v6/alert/queue"
|
||||
"github.com/ccfos/nightingale/v6/memsto"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
@@ -98,12 +99,12 @@ func (e *Consumer) consumeOne(event *models.AlertCurEvent) {
|
||||
e.dispatch.Astats.CounterAlertsTotal.WithLabelValues(event.Cluster, eventType, event.GroupName).Inc()
|
||||
|
||||
if err := event.ParseRule("rule_name"); err != nil {
|
||||
logger.Warningf("alert_eval_%d datasource_%d failed to parse rule name: %v", event.RuleId, event.DatasourceId, err)
|
||||
logger.Warningf("ruleid:%d failed to parse rule name: %v", event.RuleId, err)
|
||||
event.RuleName = fmt.Sprintf("failed to parse rule name: %v", err)
|
||||
}
|
||||
|
||||
if err := event.ParseRule("annotations"); err != nil {
|
||||
logger.Warningf("alert_eval_%d datasource_%d failed to parse annotations: %v", event.RuleId, event.DatasourceId, err)
|
||||
logger.Warningf("ruleid:%d failed to parse annotations: %v", event.RuleId, err)
|
||||
event.Annotations = fmt.Sprintf("failed to parse annotations: %v", err)
|
||||
event.AnnotationsJSON["error"] = event.Annotations
|
||||
}
|
||||
@@ -111,7 +112,7 @@ func (e *Consumer) consumeOne(event *models.AlertCurEvent) {
|
||||
e.queryRecoveryVal(event)
|
||||
|
||||
if err := event.ParseRule("rule_note"); err != nil {
|
||||
logger.Warningf("alert_eval_%d datasource_%d failed to parse rule note: %v", event.RuleId, event.DatasourceId, err)
|
||||
logger.Warningf("ruleid:%d failed to parse rule note: %v", event.RuleId, err)
|
||||
event.RuleNote = fmt.Sprintf("failed to parse rule note: %v", err)
|
||||
}
|
||||
|
||||
@@ -130,7 +131,7 @@ func (e *Consumer) persist(event *models.AlertCurEvent) {
|
||||
var err error
|
||||
event.Id, err = poster.PostByUrlsWithResp[int64](e.ctx, "/v1/n9e/event-persist", event)
|
||||
if err != nil {
|
||||
logger.Errorf("event:%s persist err:%v", event.Hash, err)
|
||||
logger.Errorf("event:%+v persist err:%v", event, err)
|
||||
e.dispatch.Astats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", event.DatasourceId), "persist_event", event.GroupName, fmt.Sprintf("%v", event.RuleId)).Inc()
|
||||
}
|
||||
return
|
||||
@@ -138,7 +139,7 @@ func (e *Consumer) persist(event *models.AlertCurEvent) {
|
||||
|
||||
err := models.EventPersist(e.ctx, event)
|
||||
if err != nil {
|
||||
logger.Errorf("event:%s persist err:%v", event.Hash, err)
|
||||
logger.Errorf("event%+v persist err:%v", event, err)
|
||||
e.dispatch.Astats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", event.DatasourceId), "persist_event", event.GroupName, fmt.Sprintf("%v", event.RuleId)).Inc()
|
||||
}
|
||||
}
|
||||
@@ -156,12 +157,12 @@ func (e *Consumer) queryRecoveryVal(event *models.AlertCurEvent) {
|
||||
|
||||
promql = strings.TrimSpace(promql)
|
||||
if promql == "" {
|
||||
logger.Warningf("alert_eval_%d datasource_%d promql is blank", event.RuleId, event.DatasourceId)
|
||||
logger.Warningf("rule_eval:%s promql is blank", getKey(event))
|
||||
return
|
||||
}
|
||||
|
||||
if e.promClients.IsNil(event.DatasourceId) {
|
||||
logger.Warningf("alert_eval_%d datasource_%d error reader client is nil", event.RuleId, event.DatasourceId)
|
||||
logger.Warningf("rule_eval:%s error reader client is nil", getKey(event))
|
||||
return
|
||||
}
|
||||
|
||||
@@ -170,7 +171,7 @@ func (e *Consumer) queryRecoveryVal(event *models.AlertCurEvent) {
|
||||
var warnings promsdk.Warnings
|
||||
value, warnings, err := readerClient.Query(e.ctx.Ctx, promql, time.Now())
|
||||
if err != nil {
|
||||
logger.Errorf("alert_eval_%d datasource_%d promql:%s, error:%v", event.RuleId, event.DatasourceId, promql, err)
|
||||
logger.Errorf("rule_eval:%s promql:%s, error:%v", getKey(event), promql, err)
|
||||
event.AnnotationsJSON["recovery_promql_error"] = fmt.Sprintf("promql:%s error:%v", promql, err)
|
||||
|
||||
b, err := json.Marshal(event.AnnotationsJSON)
|
||||
@@ -184,12 +185,12 @@ func (e *Consumer) queryRecoveryVal(event *models.AlertCurEvent) {
|
||||
}
|
||||
|
||||
if len(warnings) > 0 {
|
||||
logger.Errorf("alert_eval_%d datasource_%d promql:%s, warnings:%v", event.RuleId, event.DatasourceId, promql, warnings)
|
||||
logger.Errorf("rule_eval:%s promql:%s, warnings:%v", getKey(event), promql, warnings)
|
||||
}
|
||||
|
||||
anomalyPoints := models.ConvertAnomalyPoints(value)
|
||||
if len(anomalyPoints) == 0 {
|
||||
logger.Warningf("alert_eval_%d datasource_%d promql:%s, result is empty", event.RuleId, event.DatasourceId, promql)
|
||||
logger.Warningf("rule_eval:%s promql:%s, result is empty", getKey(event), promql)
|
||||
event.AnnotationsJSON["recovery_promql_error"] = fmt.Sprintf("promql:%s error:%s", promql, "result is empty")
|
||||
} else {
|
||||
event.AnnotationsJSON["recovery_value"] = fmt.Sprintf("%v", anomalyPoints[0].Value)
|
||||
@@ -204,3 +205,6 @@ func (e *Consumer) queryRecoveryVal(event *models.AlertCurEvent) {
|
||||
}
|
||||
}
|
||||
|
||||
func getKey(event *models.AlertCurEvent) string {
|
||||
return common.RuleKey(event.DatasourceId, event.RuleId)
|
||||
}
|
||||
|
||||
@@ -16,7 +16,6 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/alert/astats"
|
||||
"github.com/ccfos/nightingale/v6/alert/common"
|
||||
"github.com/ccfos/nightingale/v6/alert/pipeline"
|
||||
"github.com/ccfos/nightingale/v6/alert/pipeline/engine"
|
||||
"github.com/ccfos/nightingale/v6/alert/sender"
|
||||
"github.com/ccfos/nightingale/v6/memsto"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
@@ -171,7 +170,7 @@ func (e *Dispatch) HandleEventWithNotifyRule(eventOrigin *models.AlertCurEvent)
|
||||
// 深拷贝新的 event,避免并发修改 event 冲突
|
||||
eventCopy := eventOrigin.DeepCopy()
|
||||
|
||||
logger.Infof("notify rule ids: %v, event: %s", notifyRuleId, eventCopy.Hash)
|
||||
logger.Infof("notify rule ids: %v, event: %+v", notifyRuleId, eventCopy)
|
||||
notifyRule := e.notifyRuleCache.Get(notifyRuleId)
|
||||
if notifyRule == nil {
|
||||
continue
|
||||
@@ -185,7 +184,7 @@ func (e *Dispatch) HandleEventWithNotifyRule(eventOrigin *models.AlertCurEvent)
|
||||
|
||||
eventCopy = HandleEventPipeline(notifyRule.PipelineConfigs, eventOrigin, eventCopy, e.eventProcessorCache, e.ctx, notifyRuleId, "notify_rule")
|
||||
if ShouldSkipNotify(e.ctx, eventCopy, notifyRuleId) {
|
||||
logger.Infof("notify_id: %d, event:%s, should skip notify", notifyRuleId, eventCopy.Hash)
|
||||
logger.Infof("notify_id: %d, event:%+v, should skip notify", notifyRuleId, eventCopy)
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -193,7 +192,7 @@ func (e *Dispatch) HandleEventWithNotifyRule(eventOrigin *models.AlertCurEvent)
|
||||
for i := range notifyRule.NotifyConfigs {
|
||||
err := NotifyRuleMatchCheck(¬ifyRule.NotifyConfigs[i], eventCopy)
|
||||
if err != nil {
|
||||
logger.Errorf("notify_id: %d, event:%s, channel_id:%d, template_id: %d, notify_config:%+v, err:%v", notifyRuleId, eventCopy.Hash, notifyRule.NotifyConfigs[i].ChannelID, notifyRule.NotifyConfigs[i].TemplateID, notifyRule.NotifyConfigs[i], err)
|
||||
logger.Errorf("notify_id: %d, event:%+v, channel_id:%d, template_id: %d, notify_config:%+v, err:%v", notifyRuleId, eventCopy, notifyRule.NotifyConfigs[i].ChannelID, notifyRule.NotifyConfigs[i].TemplateID, notifyRule.NotifyConfigs[i], err)
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -201,12 +200,12 @@ func (e *Dispatch) HandleEventWithNotifyRule(eventOrigin *models.AlertCurEvent)
|
||||
messageTemplate := e.messageTemplateCache.Get(notifyRule.NotifyConfigs[i].TemplateID)
|
||||
if notifyChannel == nil {
|
||||
sender.NotifyRecord(e.ctx, []*models.AlertCurEvent{eventCopy}, notifyRuleId, fmt.Sprintf("notify_channel_id:%d", notifyRule.NotifyConfigs[i].ChannelID), "", "", errors.New("notify_channel not found"))
|
||||
logger.Warningf("notify_id: %d, event:%s, channel_id:%d, template_id: %d, notify_channel not found", notifyRuleId, eventCopy.Hash, notifyRule.NotifyConfigs[i].ChannelID, notifyRule.NotifyConfigs[i].TemplateID)
|
||||
logger.Warningf("notify_id: %d, event:%+v, channel_id:%d, template_id: %d, notify_channel not found", notifyRuleId, eventCopy, notifyRule.NotifyConfigs[i].ChannelID, notifyRule.NotifyConfigs[i].TemplateID)
|
||||
continue
|
||||
}
|
||||
|
||||
if notifyChannel.RequestType != "flashduty" && notifyChannel.RequestType != "pagerduty" && messageTemplate == nil {
|
||||
logger.Warningf("notify_id: %d, channel_name: %v, event:%s, template_id: %d, message_template not found", notifyRuleId, notifyChannel.Ident, eventCopy.Hash, notifyRule.NotifyConfigs[i].TemplateID)
|
||||
logger.Warningf("notify_id: %d, channel_name: %v, event:%+v, template_id: %d, message_template not found", notifyRuleId, notifyChannel.Ident, eventCopy, notifyRule.NotifyConfigs[i].TemplateID)
|
||||
sender.NotifyRecord(e.ctx, []*models.AlertCurEvent{eventCopy}, notifyRuleId, notifyChannel.Name, "", "", errors.New("message_template not found"))
|
||||
|
||||
continue
|
||||
@@ -232,8 +231,6 @@ func shouldSkipNotify(ctx *ctx.Context, event *models.AlertCurEvent, notifyRuleI
|
||||
}
|
||||
|
||||
func HandleEventPipeline(pipelineConfigs []models.PipelineConfig, eventOrigin, event *models.AlertCurEvent, eventProcessorCache *memsto.EventProcessorCacheType, ctx *ctx.Context, id int64, from string) *models.AlertCurEvent {
|
||||
workflowEngine := engine.NewWorkflowEngine(ctx)
|
||||
|
||||
for _, pipelineConfig := range pipelineConfigs {
|
||||
if !pipelineConfig.Enable {
|
||||
continue
|
||||
@@ -241,37 +238,32 @@ func HandleEventPipeline(pipelineConfigs []models.PipelineConfig, eventOrigin, e
|
||||
|
||||
eventPipeline := eventProcessorCache.Get(pipelineConfig.PipelineId)
|
||||
if eventPipeline == nil {
|
||||
logger.Warningf("processor_by_%s_id:%d pipeline_id:%d, event pipeline not found, event: %s", from, id, pipelineConfig.PipelineId, event.Hash)
|
||||
logger.Warningf("processor_by_%s_id:%d pipeline_id:%d, event pipeline not found, event: %+v", from, id, pipelineConfig.PipelineId, event)
|
||||
continue
|
||||
}
|
||||
|
||||
if !PipelineApplicable(eventPipeline, event) {
|
||||
logger.Debugf("processor_by_%s_id:%d pipeline_id:%d, event pipeline not applicable, event: %s", from, id, pipelineConfig.PipelineId, event.Hash)
|
||||
logger.Debugf("processor_by_%s_id:%d pipeline_id:%d, event pipeline not applicable, event: %+v", from, id, pipelineConfig.PipelineId, event)
|
||||
continue
|
||||
}
|
||||
|
||||
// 统一使用工作流引擎执行(兼容线性模式和工作流模式)
|
||||
triggerCtx := &models.WorkflowTriggerContext{
|
||||
Mode: models.TriggerModeEvent,
|
||||
TriggerBy: from + "_" + strconv.FormatInt(id, 10),
|
||||
}
|
||||
processors := eventProcessorCache.GetProcessorsById(pipelineConfig.PipelineId)
|
||||
for _, processor := range processors {
|
||||
var res string
|
||||
var err error
|
||||
logger.Infof("processor_by_%s_id:%d pipeline_id:%d, before processor:%+v, event: %+v", from, id, pipelineConfig.PipelineId, processor, event)
|
||||
event, res, err = processor.Process(ctx, event)
|
||||
if event == nil {
|
||||
logger.Infof("processor_by_%s_id:%d pipeline_id:%d, event dropped, after processor:%+v, event: %+v", from, id, pipelineConfig.PipelineId, processor, eventOrigin)
|
||||
|
||||
resultEvent, result, err := workflowEngine.Execute(eventPipeline, event, triggerCtx)
|
||||
if err != nil {
|
||||
logger.Errorf("processor_by_%s_id:%d pipeline_id:%d, pipeline execute error: %v", from, id, pipelineConfig.PipelineId, err)
|
||||
continue
|
||||
}
|
||||
|
||||
if resultEvent == nil {
|
||||
logger.Infof("processor_by_%s_id:%d pipeline_id:%d, event dropped, event: %s", from, id, pipelineConfig.PipelineId, eventOrigin.Hash)
|
||||
if from == "notify_rule" {
|
||||
sender.NotifyRecord(ctx, []*models.AlertCurEvent{eventOrigin}, id, "", "", result.Message, fmt.Errorf("processor_by_%s_id:%d pipeline_id:%d, drop by pipeline", from, id, pipelineConfig.PipelineId))
|
||||
if from == "notify_rule" {
|
||||
// alert_rule 获取不到 eventId 记录没有意义
|
||||
sender.NotifyRecord(ctx, []*models.AlertCurEvent{eventOrigin}, id, "", "", res, fmt.Errorf("processor_by_%s_id:%d pipeline_id:%d, drop by processor", from, id, pipelineConfig.PipelineId))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
return nil
|
||||
logger.Infof("processor_by_%s_id:%d pipeline_id:%d, after processor:%+v, event: %+v, res:%v, err:%v", from, id, pipelineConfig.PipelineId, processor, event, res, err)
|
||||
}
|
||||
|
||||
event = resultEvent
|
||||
logger.Infof("processor_by_%s_id:%d pipeline_id:%d, pipeline executed, status:%s, message:%s", from, id, pipelineConfig.PipelineId, result.Status, result.Message)
|
||||
}
|
||||
|
||||
event.FE2DB()
|
||||
@@ -290,18 +282,15 @@ func PipelineApplicable(pipeline *models.EventPipeline, event *models.AlertCurEv
|
||||
|
||||
tagMatch := true
|
||||
if len(pipeline.LabelFilters) > 0 {
|
||||
// Deep copy to avoid concurrent map writes on cached objects
|
||||
labelFiltersCopy := make([]models.TagFilter, len(pipeline.LabelFilters))
|
||||
copy(labelFiltersCopy, pipeline.LabelFilters)
|
||||
for i := range labelFiltersCopy {
|
||||
if labelFiltersCopy[i].Func == "" {
|
||||
labelFiltersCopy[i].Func = labelFiltersCopy[i].Op
|
||||
for i := range pipeline.LabelFilters {
|
||||
if pipeline.LabelFilters[i].Func == "" {
|
||||
pipeline.LabelFilters[i].Func = pipeline.LabelFilters[i].Op
|
||||
}
|
||||
}
|
||||
|
||||
tagFilters, err := models.ParseTagFilter(labelFiltersCopy)
|
||||
tagFilters, err := models.ParseTagFilter(pipeline.LabelFilters)
|
||||
if err != nil {
|
||||
logger.Errorf("pipeline applicable failed to parse tag filter: %v event:%s pipeline:%+v", err, event.Hash, pipeline)
|
||||
logger.Errorf("pipeline applicable failed to parse tag filter: %v event:%+v pipeline:%+v", err, event, pipeline)
|
||||
return false
|
||||
}
|
||||
tagMatch = common.MatchTags(event.TagsMap, tagFilters)
|
||||
@@ -309,13 +298,9 @@ func PipelineApplicable(pipeline *models.EventPipeline, event *models.AlertCurEv
|
||||
|
||||
attributesMatch := true
|
||||
if len(pipeline.AttrFilters) > 0 {
|
||||
// Deep copy to avoid concurrent map writes on cached objects
|
||||
attrFiltersCopy := make([]models.TagFilter, len(pipeline.AttrFilters))
|
||||
copy(attrFiltersCopy, pipeline.AttrFilters)
|
||||
|
||||
tagFilters, err := models.ParseTagFilter(attrFiltersCopy)
|
||||
tagFilters, err := models.ParseTagFilter(pipeline.AttrFilters)
|
||||
if err != nil {
|
||||
logger.Errorf("pipeline applicable failed to parse tag filter: %v event:%s pipeline:%+v err:%v", tagFilters, event.Hash, pipeline, err)
|
||||
logger.Errorf("pipeline applicable failed to parse tag filter: %v event:%+v pipeline:%+v err:%v", tagFilters, event, pipeline, err)
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -394,18 +379,15 @@ func NotifyRuleMatchCheck(notifyConfig *models.NotifyConfig, event *models.Alert
|
||||
|
||||
tagMatch := true
|
||||
if len(notifyConfig.LabelKeys) > 0 {
|
||||
// Deep copy to avoid concurrent map writes on cached objects
|
||||
labelKeysCopy := make([]models.TagFilter, len(notifyConfig.LabelKeys))
|
||||
copy(labelKeysCopy, notifyConfig.LabelKeys)
|
||||
for i := range labelKeysCopy {
|
||||
if labelKeysCopy[i].Func == "" {
|
||||
labelKeysCopy[i].Func = labelKeysCopy[i].Op
|
||||
for i := range notifyConfig.LabelKeys {
|
||||
if notifyConfig.LabelKeys[i].Func == "" {
|
||||
notifyConfig.LabelKeys[i].Func = notifyConfig.LabelKeys[i].Op
|
||||
}
|
||||
}
|
||||
|
||||
tagFilters, err := models.ParseTagFilter(labelKeysCopy)
|
||||
tagFilters, err := models.ParseTagFilter(notifyConfig.LabelKeys)
|
||||
if err != nil {
|
||||
logger.Errorf("notify send failed to parse tag filter: %v event:%s notify_config:%+v", err, event.Hash, notifyConfig)
|
||||
logger.Errorf("notify send failed to parse tag filter: %v event:%+v notify_config:%+v", err, event, notifyConfig)
|
||||
return fmt.Errorf("failed to parse tag filter: %v", err)
|
||||
}
|
||||
tagMatch = common.MatchTags(event.TagsMap, tagFilters)
|
||||
@@ -417,13 +399,9 @@ func NotifyRuleMatchCheck(notifyConfig *models.NotifyConfig, event *models.Alert
|
||||
|
||||
attributesMatch := true
|
||||
if len(notifyConfig.Attributes) > 0 {
|
||||
// Deep copy to avoid concurrent map writes on cached objects
|
||||
attributesCopy := make([]models.TagFilter, len(notifyConfig.Attributes))
|
||||
copy(attributesCopy, notifyConfig.Attributes)
|
||||
|
||||
tagFilters, err := models.ParseTagFilter(attributesCopy)
|
||||
tagFilters, err := models.ParseTagFilter(notifyConfig.Attributes)
|
||||
if err != nil {
|
||||
logger.Errorf("notify send failed to parse tag filter: %v event:%s notify_config:%+v err:%v", tagFilters, event.Hash, notifyConfig, err)
|
||||
logger.Errorf("notify send failed to parse tag filter: %v event:%+v notify_config:%+v err:%v", tagFilters, event, notifyConfig, err)
|
||||
return fmt.Errorf("failed to parse tag filter: %v", err)
|
||||
}
|
||||
|
||||
@@ -434,7 +412,7 @@ func NotifyRuleMatchCheck(notifyConfig *models.NotifyConfig, event *models.Alert
|
||||
return fmt.Errorf("event attributes not match attributes filter")
|
||||
}
|
||||
|
||||
logger.Infof("notify send timeMatch:%v severityMatch:%v tagMatch:%v attributesMatch:%v event:%s notify_config:%+v", timeMatch, severityMatch, tagMatch, attributesMatch, event.Hash, notifyConfig)
|
||||
logger.Infof("notify send timeMatch:%v severityMatch:%v tagMatch:%v attributesMatch:%v event:%+v notify_config:%+v", timeMatch, severityMatch, tagMatch, attributesMatch, event, notifyConfig)
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -546,8 +524,8 @@ func SendNotifyRuleMessage(ctx *ctx.Context, userCache *memsto.UserCacheType, us
|
||||
for i := range flashDutyChannelIDs {
|
||||
start := time.Now()
|
||||
respBody, err := notifyChannel.SendFlashDuty(events, flashDutyChannelIDs[i], notifyChannelCache.GetHttpClient(notifyChannel.ID))
|
||||
respBody = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), respBody)
|
||||
logger.Infof("duty_sender notify_id: %d, channel_name: %v, event:%s, IntegrationUrl: %v dutychannel_id: %v, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0].Hash, notifyChannel.RequestConfig.FlashDutyRequestConfig.IntegrationUrl, flashDutyChannelIDs[i], respBody, err)
|
||||
respBody = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), respBody)
|
||||
logger.Infof("duty_sender notify_id: %d, channel_name: %v, event:%+v, IntegrationUrl: %v dutychannel_id: %v, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0], notifyChannel.RequestConfig.FlashDutyRequestConfig.IntegrationUrl, flashDutyChannelIDs[i], respBody, err)
|
||||
sender.NotifyRecord(ctx, events, notifyRuleId, notifyChannel.Name, strconv.FormatInt(flashDutyChannelIDs[i], 10), respBody, err)
|
||||
}
|
||||
|
||||
@@ -555,8 +533,8 @@ func SendNotifyRuleMessage(ctx *ctx.Context, userCache *memsto.UserCacheType, us
|
||||
for _, routingKey := range pagerdutyRoutingKeys {
|
||||
start := time.Now()
|
||||
respBody, err := notifyChannel.SendPagerDuty(events, routingKey, siteInfo.SiteUrl, notifyChannelCache.GetHttpClient(notifyChannel.ID))
|
||||
respBody = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), respBody)
|
||||
logger.Infof("pagerduty_sender notify_id: %d, channel_name: %v, event:%s, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0].Hash, respBody, err)
|
||||
respBody = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), respBody)
|
||||
logger.Infof("pagerduty_sender notify_id: %d, channel_name: %v, event:%+v, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0], respBody, err)
|
||||
sender.NotifyRecord(ctx, events, notifyRuleId, notifyChannel.Name, "", respBody, err)
|
||||
}
|
||||
|
||||
@@ -586,11 +564,11 @@ func SendNotifyRuleMessage(ctx *ctx.Context, userCache *memsto.UserCacheType, us
|
||||
case "script":
|
||||
start := time.Now()
|
||||
target, res, err := notifyChannel.SendScript(events, tplContent, customParams, sendtos)
|
||||
res = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), res)
|
||||
logger.Infof("script_sender notify_id: %d, channel_name: %v, event:%s, tplContent:%s, customParams:%v, target:%s, res:%s, err:%v", notifyRuleId, notifyChannel.Name, events[0].Hash, tplContent, customParams, target, res, err)
|
||||
res = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), res)
|
||||
logger.Infof("script_sender notify_id: %d, channel_name: %v, event:%+v, tplContent:%s, customParams:%v, target:%s, res:%s, err:%v", notifyRuleId, notifyChannel.Name, events[0], tplContent, customParams, target, res, err)
|
||||
sender.NotifyRecord(ctx, events, notifyRuleId, notifyChannel.Name, target, res, err)
|
||||
default:
|
||||
logger.Warningf("notify_id: %d, channel_name: %v, event:%s send type not found", notifyRuleId, notifyChannel.Name, events[0].Hash)
|
||||
logger.Warningf("notify_id: %d, channel_name: %v, event:%+v send type not found", notifyRuleId, notifyChannel.Name, events[0])
|
||||
}
|
||||
}
|
||||
|
||||
@@ -734,7 +712,7 @@ func (e *Dispatch) Send(rule *models.AlertRule, event *models.AlertCurEvent, not
|
||||
event = msgCtx.Events[0]
|
||||
}
|
||||
|
||||
logger.Debugf("send to channel:%s event:%s users:%+v", channel, event.Hash, msgCtx.Users)
|
||||
logger.Debugf("send to channel:%s event:%+v users:%+v", channel, event, msgCtx.Users)
|
||||
s.Send(msgCtx)
|
||||
}
|
||||
}
|
||||
@@ -833,12 +811,12 @@ func (e *Dispatch) HandleIbex(rule *models.AlertRule, event *models.AlertCurEven
|
||||
|
||||
if len(t.Host) == 0 {
|
||||
sender.CallIbex(e.ctx, t.TplId, event.TargetIdent,
|
||||
e.taskTplsCache, e.targetCache, e.userCache, event, "")
|
||||
e.taskTplsCache, e.targetCache, e.userCache, event)
|
||||
continue
|
||||
}
|
||||
for _, host := range t.Host {
|
||||
sender.CallIbex(e.ctx, t.TplId, host,
|
||||
e.taskTplsCache, e.targetCache, e.userCache, event, "")
|
||||
e.taskTplsCache, e.targetCache, e.userCache, event)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,11 +18,11 @@ func LogEvent(event *models.AlertCurEvent, location string, err ...error) {
|
||||
}
|
||||
|
||||
logger.Infof(
|
||||
"alert_eval_%d event(%s %s) %s: sub_id:%d notify_rule_ids:%v cluster:%s %v%s@%d last_eval_time:%d %s",
|
||||
event.RuleId,
|
||||
"event(%s %s) %s: rule_id=%d sub_id:%d notify_rule_ids:%v cluster:%s %v%s@%d last_eval_time:%d %s",
|
||||
event.Hash,
|
||||
status,
|
||||
location,
|
||||
event.RuleId,
|
||||
event.SubRuleId,
|
||||
event.NotifyRuleIds,
|
||||
event.Cluster,
|
||||
|
||||
@@ -101,17 +101,17 @@ func (s *Scheduler) syncAlertRules() {
|
||||
}
|
||||
ds := s.datasourceCache.GetById(dsId)
|
||||
if ds == nil {
|
||||
logger.Debugf("alert_eval_%d datasource %d not found", rule.Id, dsId)
|
||||
logger.Debugf("datasource %d not found", dsId)
|
||||
continue
|
||||
}
|
||||
|
||||
if ds.PluginType != ruleType {
|
||||
logger.Debugf("alert_eval_%d datasource %d category is %s not %s", rule.Id, dsId, ds.PluginType, ruleType)
|
||||
logger.Debugf("datasource %d category is %s not %s", dsId, ds.PluginType, ruleType)
|
||||
continue
|
||||
}
|
||||
|
||||
if ds.Status != "enabled" {
|
||||
logger.Debugf("alert_eval_%d datasource %d status is %s", rule.Id, dsId, ds.Status)
|
||||
logger.Debugf("datasource %d status is %s", dsId, ds.Status)
|
||||
continue
|
||||
}
|
||||
processor := process.NewProcessor(s.aconf.Heartbeat.EngineName, rule, dsId, s.alertRuleCache, s.targetCache, s.targetsOfAlertRuleCache, s.busiGroupCache, s.alertMuteCache, s.datasourceCache, s.ctx, s.stats)
|
||||
@@ -134,12 +134,12 @@ func (s *Scheduler) syncAlertRules() {
|
||||
for _, dsId := range dsIds {
|
||||
ds := s.datasourceCache.GetById(dsId)
|
||||
if ds == nil {
|
||||
logger.Debugf("alert_eval_%d datasource %d not found", rule.Id, dsId)
|
||||
logger.Debugf("datasource %d not found", dsId)
|
||||
continue
|
||||
}
|
||||
|
||||
if ds.Status != "enabled" {
|
||||
logger.Debugf("alert_eval_%d datasource %d status is %s", rule.Id, dsId, ds.Status)
|
||||
logger.Debugf("datasource %d status is %s", dsId, ds.Status)
|
||||
continue
|
||||
}
|
||||
processor := process.NewProcessor(s.aconf.Heartbeat.EngineName, rule, dsId, s.alertRuleCache, s.targetCache, s.targetsOfAlertRuleCache, s.busiGroupCache, s.alertMuteCache, s.datasourceCache, s.ctx, s.stats)
|
||||
|
||||
@@ -11,7 +11,6 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"text/template"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/astats"
|
||||
@@ -25,7 +24,6 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/pkg/poster"
|
||||
promsdk "github.com/ccfos/nightingale/v6/pkg/prom"
|
||||
promql2 "github.com/ccfos/nightingale/v6/pkg/promql"
|
||||
"github.com/ccfos/nightingale/v6/pkg/tplx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/unit"
|
||||
"github.com/ccfos/nightingale/v6/prom"
|
||||
"github.com/prometheus/common/model"
|
||||
@@ -62,7 +60,6 @@ const (
|
||||
CHECK_QUERY = "check_query_config"
|
||||
GET_CLIENT = "get_client"
|
||||
QUERY_DATA = "query_data"
|
||||
EXEC_TEMPLATE = "exec_template"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -109,7 +106,7 @@ func NewAlertRuleWorker(rule *models.AlertRule, datasourceId int64, Processor *p
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
logger.Errorf("alert_eval_%d datasource_%d add cron pattern error: %v", arw.Rule.Id, arw.DatasourceId, err)
|
||||
logger.Errorf("alert rule %s add cron pattern error: %v", arw.Key(), err)
|
||||
}
|
||||
|
||||
Processor.ScheduleEntry = arw.Scheduler.Entry(entryID)
|
||||
@@ -152,9 +149,9 @@ func (arw *AlertRuleWorker) Eval() {
|
||||
|
||||
defer func() {
|
||||
if len(message) == 0 {
|
||||
logger.Infof("alert_eval_%d datasource_%d finished, duration:%v", arw.Rule.Id, arw.DatasourceId, time.Since(begin))
|
||||
logger.Infof("rule_eval:%s finished, duration:%v", arw.Key(), time.Since(begin))
|
||||
} else {
|
||||
logger.Warningf("alert_eval_%d datasource_%d finished, duration:%v, message:%s", arw.Rule.Id, arw.DatasourceId, time.Since(begin), message)
|
||||
logger.Infof("rule_eval:%s finished, duration:%v, message:%s", arw.Key(), time.Since(begin), message)
|
||||
}
|
||||
}()
|
||||
|
||||
@@ -189,7 +186,8 @@ func (arw *AlertRuleWorker) Eval() {
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
message = fmt.Sprintf("failed to get anomaly points: %v", err)
|
||||
logger.Errorf("rule_eval:%s get anomaly point err:%s", arw.Key(), err.Error())
|
||||
message = "failed to get anomaly points"
|
||||
return
|
||||
}
|
||||
|
||||
@@ -236,7 +234,7 @@ func (arw *AlertRuleWorker) Eval() {
|
||||
}
|
||||
|
||||
func (arw *AlertRuleWorker) Stop() {
|
||||
logger.Infof("alert_eval_%d datasource_%d stopped", arw.Rule.Id, arw.DatasourceId)
|
||||
logger.Infof("rule_eval:%s stopped", arw.Key())
|
||||
close(arw.Quit)
|
||||
c := arw.Scheduler.Stop()
|
||||
<-c.Done()
|
||||
@@ -252,7 +250,7 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
|
||||
var rule *models.PromRuleConfig
|
||||
if err := json.Unmarshal([]byte(ruleConfig), &rule); err != nil {
|
||||
logger.Errorf("alert_eval_%d datasource_%d rule_config:%s, error:%v", arw.Rule.Id, arw.DatasourceId, ruleConfig, err)
|
||||
logger.Errorf("rule_eval:%s rule_config:%s, error:%v", arw.Key(), ruleConfig, err)
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_RULE_CONFIG, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
fmt.Sprintf("%v", arw.Rule.Id),
|
||||
@@ -263,7 +261,7 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
}
|
||||
|
||||
if rule == nil {
|
||||
logger.Errorf("alert_eval_%d datasource_%d rule_config:%s, error:rule is nil", arw.Rule.Id, arw.DatasourceId, ruleConfig)
|
||||
logger.Errorf("rule_eval:%s rule_config:%s, error:rule is nil", arw.Key(), ruleConfig)
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_RULE_CONFIG, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
fmt.Sprintf("%v", arw.Rule.Id),
|
||||
@@ -278,7 +276,7 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
readerClient := arw.PromClients.GetCli(arw.DatasourceId)
|
||||
|
||||
if readerClient == nil {
|
||||
logger.Warningf("alert_eval_%d datasource_%d error reader client is nil", arw.Rule.Id, arw.DatasourceId)
|
||||
logger.Warningf("rule_eval:%s error reader client is nil", arw.Key())
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_CLIENT, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
fmt.Sprintf("%v", arw.Rule.Id),
|
||||
@@ -314,13 +312,13 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
// 无变量
|
||||
promql := strings.TrimSpace(query.PromQl)
|
||||
if promql == "" {
|
||||
logger.Warningf("alert_eval_%d datasource_%d promql is blank", arw.Rule.Id, arw.DatasourceId)
|
||||
logger.Warningf("rule_eval:%s promql is blank", arw.Key())
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), CHECK_QUERY, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
continue
|
||||
}
|
||||
|
||||
if arw.PromClients.IsNil(arw.DatasourceId) {
|
||||
logger.Warningf("alert_eval_%d datasource_%d error reader client is nil", arw.Rule.Id, arw.DatasourceId)
|
||||
logger.Warningf("rule_eval:%s error reader client is nil", arw.Key())
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_CLIENT, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
continue
|
||||
}
|
||||
@@ -329,7 +327,7 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
arw.Processor.Stats.CounterQueryDataTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId), fmt.Sprintf("%d", arw.Rule.Id)).Inc()
|
||||
value, warnings, err := readerClient.Query(context.Background(), promql, time.Now())
|
||||
if err != nil {
|
||||
logger.Errorf("alert_eval_%d datasource_%d promql:%s, error:%v", arw.Rule.Id, arw.DatasourceId, promql, err)
|
||||
logger.Errorf("rule_eval:%s promql:%s, error:%v", arw.Key(), promql, err)
|
||||
arw.Processor.Stats.CounterQueryDataErrorTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId)).Inc()
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), QUERY_DATA, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
@@ -341,12 +339,12 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
}
|
||||
|
||||
if len(warnings) > 0 {
|
||||
logger.Errorf("alert_eval_%d datasource_%d promql:%s, warnings:%v", arw.Rule.Id, arw.DatasourceId, promql, warnings)
|
||||
logger.Errorf("rule_eval:%s promql:%s, warnings:%v", arw.Key(), promql, warnings)
|
||||
arw.Processor.Stats.CounterQueryDataErrorTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId)).Inc()
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), QUERY_DATA, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
}
|
||||
|
||||
logger.Infof("alert_eval_%d datasource_%d query:%+v, value:%v", arw.Rule.Id, arw.DatasourceId, query, value)
|
||||
logger.Infof("rule_eval:%s query:%+v, value:%v", arw.Key(), query, value)
|
||||
points := models.ConvertAnomalyPoints(value)
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
fmt.Sprintf("%v", arw.Rule.Id),
|
||||
@@ -388,10 +386,21 @@ type sample struct {
|
||||
// 每个节点先查询无参数的 query, 即 mem_used_percent{} > curVal, 得到满足值变量的所有结果
|
||||
// 结果中有满足本节点参数变量的值,加入异常点列表
|
||||
// 参数变量的值不满足的组合,需要覆盖上层筛选中产生的异常点
|
||||
// VarFillingAfterQuery 先查询再过滤变量,效率较高,但无法处理有聚合函数导致标签丢失的情况
|
||||
//
|
||||
// 修复说明 (Issue #2971):
|
||||
// 原实现中使用参数变量组合作为 key 存储异常点,导致同一参数值下的多条时序数据互相覆盖。
|
||||
// 修复方案:
|
||||
// 1. 同一层内:使用完整的标签 hash 作为 key,避免不同时序数据覆盖
|
||||
// 2. 跨层级时:子层按参数变量组合前缀删除父层的所有相关告警,实现子筛选覆盖父筛选
|
||||
func (arw *AlertRuleWorker) VarFillingAfterQuery(query models.PromQuery, readerClient promsdk.API) []models.AnomalyPoint {
|
||||
varToLabel := ExtractVarMapping(query.PromQl)
|
||||
fullQuery := removeVal(query.PromQl)
|
||||
// 存储所有的异常点,key 为参数变量的组合,可以实现子筛选对上一层筛选的覆盖
|
||||
// 存储所有的异常点
|
||||
// key 格式: {参数变量组合}@@{标签hash}
|
||||
// 这样可以:
|
||||
// 1. 同层内不同时序数据有不同的 key(标签hash不同)
|
||||
// 2. 跨层时可以按参数变量组合前缀删除父层的告警
|
||||
anomalyPointsMap := make(map[string]models.AnomalyPoint)
|
||||
// 统一变量配置格式
|
||||
VarConfigForCalc := &models.ChildVarConfig{
|
||||
@@ -418,7 +427,16 @@ func (arw *AlertRuleWorker) VarFillingAfterQuery(query models.PromQuery, readerC
|
||||
})
|
||||
// 遍历变量配置链表
|
||||
curNode := VarConfigForCalc
|
||||
isFirstLayer := true
|
||||
for curNode != nil {
|
||||
// 当前层收集到的所有异常点,按参数组合分组
|
||||
// key: 参数变量组合, value: 该组合下的所有异常点及其完整key
|
||||
type pointWithKey struct {
|
||||
point models.AnomalyPoint
|
||||
fullKey string
|
||||
}
|
||||
currentLayerPointsByParam := make(map[string][]pointWithKey)
|
||||
|
||||
for _, param := range curNode.ParamVal {
|
||||
// curQuery 当前节点的无参数 query,用于时序库查询
|
||||
curQuery := fullQuery
|
||||
@@ -440,14 +458,14 @@ func (arw *AlertRuleWorker) VarFillingAfterQuery(query models.PromQuery, readerC
|
||||
arw.Processor.Stats.CounterQueryDataTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId), fmt.Sprintf("%d", arw.Rule.Id)).Inc()
|
||||
value, _, err := readerClient.Query(context.Background(), curQuery, time.Now())
|
||||
if err != nil {
|
||||
logger.Errorf("alert_eval_%d datasource_%d promql:%s, error:%v", arw.Rule.Id, arw.DatasourceId, curQuery, err)
|
||||
logger.Errorf("rule_eval:%s, promql:%s, error:%v", arw.Key(), curQuery, err)
|
||||
continue
|
||||
}
|
||||
seqVals := getSamples(value)
|
||||
// 得到参数变量的所有组合
|
||||
paramPermutation, err := arw.getParamPermutation(param, ParamKeys, varToLabel, query.PromQl, readerClient)
|
||||
if err != nil {
|
||||
logger.Errorf("alert_eval_%d datasource_%d paramPermutation error:%v", arw.Rule.Id, arw.DatasourceId, err)
|
||||
logger.Errorf("rule_eval:%s, paramPermutation error:%v", arw.Key(), err)
|
||||
continue
|
||||
}
|
||||
// 判断哪些参数值符合条件
|
||||
@@ -460,8 +478,13 @@ func (arw *AlertRuleWorker) VarFillingAfterQuery(query models.PromQuery, readerC
|
||||
curRealQuery = fillVar(curRealQuery, paramKey, val)
|
||||
}
|
||||
|
||||
if _, ok := paramPermutation[strings.Join(cur, JoinMark)]; ok {
|
||||
anomalyPointsMap[strings.Join(cur, JoinMark)] = models.AnomalyPoint{
|
||||
paramKey := strings.Join(cur, JoinMark)
|
||||
if _, ok := paramPermutation[paramKey]; ok {
|
||||
// 计算标签 hash,确保不同的时序数据有不同的 key
|
||||
tagHash := hash.GetTagHash(seqVals[i].Metric)
|
||||
fullKey := paramKey + JoinMark + fmt.Sprintf("%d", tagHash)
|
||||
|
||||
point := models.AnomalyPoint{
|
||||
Key: seqVals[i].Metric.String(),
|
||||
Timestamp: seqVals[i].Timestamp.Unix(),
|
||||
Value: float64(seqVals[i].Value),
|
||||
@@ -469,17 +492,44 @@ func (arw *AlertRuleWorker) VarFillingAfterQuery(query models.PromQuery, readerC
|
||||
Severity: query.Severity,
|
||||
Query: curRealQuery,
|
||||
}
|
||||
// 生成异常点后,删除该参数组合
|
||||
delete(paramPermutation, strings.Join(cur, JoinMark))
|
||||
currentLayerPointsByParam[paramKey] = append(currentLayerPointsByParam[paramKey], pointWithKey{point: point, fullKey: fullKey})
|
||||
}
|
||||
}
|
||||
|
||||
// 剩余的参数组合为本层筛选不产生异常点的组合,需要覆盖上层筛选中产生的异常点
|
||||
for k, _ := range paramPermutation {
|
||||
delete(anomalyPointsMap, k)
|
||||
// 初始化空的参数组合(用于子层覆盖父层的场景)
|
||||
for paramKey := range paramPermutation {
|
||||
if _, exists := currentLayerPointsByParam[paramKey]; !exists {
|
||||
currentLayerPointsByParam[paramKey] = []pointWithKey{}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 处理当前层的结果
|
||||
for paramKey, pointsWithKeys := range currentLayerPointsByParam {
|
||||
if !isFirstLayer {
|
||||
// 非首层(子层):先删除父层中该参数组合的所有告警
|
||||
// 这实现了 issue #2433 要求的子筛选覆盖父筛选功能
|
||||
keysToDelete := make([]string, 0)
|
||||
for k := range anomalyPointsMap {
|
||||
// key 格式: {参数组合}@@{标签hash}
|
||||
// 检查是否以当前参数组合开头(后面跟着 JoinMark)
|
||||
if strings.HasPrefix(k, paramKey+JoinMark) {
|
||||
keysToDelete = append(keysToDelete, k)
|
||||
}
|
||||
}
|
||||
for _, k := range keysToDelete {
|
||||
delete(anomalyPointsMap, k)
|
||||
}
|
||||
}
|
||||
|
||||
// 添加当前层的所有异常点
|
||||
for _, pwk := range pointsWithKeys {
|
||||
anomalyPointsMap[pwk.fullKey] = pwk.point
|
||||
}
|
||||
}
|
||||
|
||||
curNode = curNode.ChildVarConfigs
|
||||
isFirstLayer = false
|
||||
}
|
||||
|
||||
anomalyPoints := make([]models.AnomalyPoint, 0)
|
||||
@@ -580,14 +630,14 @@ func (arw *AlertRuleWorker) getParamPermutation(paramVal map[string]models.Param
|
||||
case "host":
|
||||
hostIdents, err := arw.getHostIdents(paramQuery)
|
||||
if err != nil {
|
||||
logger.Errorf("alert_eval_%d datasource_%d fail to get host idents, error:%v", arw.Rule.Id, arw.DatasourceId, err)
|
||||
logger.Errorf("rule_eval:%s, fail to get host idents, error:%v", arw.Key(), err)
|
||||
break
|
||||
}
|
||||
params = hostIdents
|
||||
case "device":
|
||||
deviceIdents, err := arw.getDeviceIdents(paramQuery)
|
||||
if err != nil {
|
||||
logger.Errorf("alert_eval_%d datasource_%d fail to get device idents, error:%v", arw.Rule.Id, arw.DatasourceId, err)
|
||||
logger.Errorf("rule_eval:%s, fail to get device idents, error:%v", arw.Key(), err)
|
||||
break
|
||||
}
|
||||
params = deviceIdents
|
||||
@@ -596,12 +646,12 @@ func (arw *AlertRuleWorker) getParamPermutation(paramVal map[string]models.Param
|
||||
var query []string
|
||||
err := json.Unmarshal(q, &query)
|
||||
if err != nil {
|
||||
logger.Errorf("alert_eval_%d datasource_%d query:%s fail to unmarshalling into string slice, error:%v", arw.Rule.Id, arw.DatasourceId, paramQuery.Query, err)
|
||||
logger.Errorf("query:%s fail to unmarshalling into string slice, error:%v", paramQuery.Query, err)
|
||||
}
|
||||
if len(query) == 0 {
|
||||
paramsKeyAllLabel, err := getParamKeyAllLabel(varToLabel[paramKey], originPromql, readerClient, arw.DatasourceId, arw.Rule.Id, arw.Processor.Stats)
|
||||
if err != nil {
|
||||
logger.Errorf("alert_eval_%d datasource_%d fail to getParamKeyAllLabel, error:%v query:%s", arw.Rule.Id, arw.DatasourceId, err, paramQuery.Query)
|
||||
logger.Errorf("rule_eval:%s, fail to getParamKeyAllLabel, error:%v query:%s", arw.Key(), err, paramQuery.Query)
|
||||
}
|
||||
params = paramsKeyAllLabel
|
||||
} else {
|
||||
@@ -615,7 +665,7 @@ func (arw *AlertRuleWorker) getParamPermutation(paramVal map[string]models.Param
|
||||
return nil, fmt.Errorf("param key: %s, params is empty", paramKey)
|
||||
}
|
||||
|
||||
logger.Infof("alert_eval_%d datasource_%d paramKey: %s, params: %v", arw.Rule.Id, arw.DatasourceId, paramKey, params)
|
||||
logger.Infof("rule_eval:%s paramKey: %s, params: %v", arw.Key(), paramKey, params)
|
||||
paramMap[paramKey] = params
|
||||
}
|
||||
|
||||
@@ -766,7 +816,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
|
||||
var rule *models.HostRuleConfig
|
||||
if err := json.Unmarshal([]byte(ruleConfig), &rule); err != nil {
|
||||
logger.Errorf("alert_eval_%d datasource_%d rule_config:%s, error:%v", arw.Rule.Id, arw.DatasourceId, ruleConfig, err)
|
||||
logger.Errorf("rule_eval:%s rule_config:%s, error:%v", arw.Key(), ruleConfig, err)
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_RULE_CONFIG, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
fmt.Sprintf("%v", arw.Rule.Id),
|
||||
@@ -777,7 +827,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
}
|
||||
|
||||
if rule == nil {
|
||||
logger.Errorf("alert_eval_%d datasource_%d rule_config:%s, error:rule is nil", arw.Rule.Id, arw.DatasourceId, ruleConfig)
|
||||
logger.Errorf("rule_eval:%s rule_config:%s, error:rule is nil", arw.Key(), ruleConfig)
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_RULE_CONFIG, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
fmt.Sprintf("%v", arw.Rule.Id),
|
||||
@@ -800,7 +850,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
// 如果是中心节点, 将不再上报数据的主机 engineName 为空的机器,也加入到 targets 中
|
||||
missEngineIdents, exists = arw.Processor.TargetsOfAlertRuleCache.Get("", arw.Rule.Id)
|
||||
if !exists {
|
||||
logger.Debugf("alert_eval_%d datasource_%d targets not found engineName:%s", arw.Rule.Id, arw.DatasourceId, arw.Processor.EngineName)
|
||||
logger.Debugf("rule_eval:%s targets not found engineName:%s", arw.Key(), arw.Processor.EngineName)
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), QUERY_DATA, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
}
|
||||
}
|
||||
@@ -808,7 +858,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
|
||||
engineIdents, exists = arw.Processor.TargetsOfAlertRuleCache.Get(arw.Processor.EngineName, arw.Rule.Id)
|
||||
if !exists {
|
||||
logger.Warningf("alert_eval_%d datasource_%d targets not found engineName:%s", arw.Rule.Id, arw.DatasourceId, arw.Processor.EngineName)
|
||||
logger.Warningf("rule_eval:%s targets not found engineName:%s", arw.Key(), arw.Processor.EngineName)
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), QUERY_DATA, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
}
|
||||
idents = append(idents, engineIdents...)
|
||||
@@ -835,7 +885,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
"",
|
||||
).Set(float64(len(missTargets)))
|
||||
|
||||
logger.Debugf("alert_eval_%d datasource_%d missTargets:%v", arw.Rule.Id, arw.DatasourceId, missTargets)
|
||||
logger.Debugf("rule_eval:%s missTargets:%v", arw.Key(), missTargets)
|
||||
targets := arw.Processor.TargetCache.Gets(missTargets)
|
||||
for _, target := range targets {
|
||||
m := make(map[string]string)
|
||||
@@ -844,7 +894,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
}
|
||||
m["ident"] = target.Ident
|
||||
|
||||
lst = append(lst, models.NewAnomalyPoint(trigger.Type, m, now, float64(now-target.BeatTime), trigger.Severity))
|
||||
lst = append(lst, models.NewAnomalyPoint(trigger.Type, m, now, float64(now-target.UpdateAt), trigger.Severity))
|
||||
}
|
||||
case "offset":
|
||||
idents, exists := arw.Processor.TargetsOfAlertRuleCache.Get(arw.Processor.EngineName, arw.Rule.Id)
|
||||
@@ -854,7 +904,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
fmt.Sprintf("%v", arw.Processor.DatasourceId()),
|
||||
"",
|
||||
).Set(0)
|
||||
logger.Warningf("alert_eval_%d datasource_%d targets not found", arw.Rule.Id, arw.DatasourceId)
|
||||
logger.Warningf("rule_eval:%s targets not found", arw.Key())
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), QUERY_DATA, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
continue
|
||||
}
|
||||
@@ -873,7 +923,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
continue
|
||||
}
|
||||
if target, exists := targetMap[ident]; exists {
|
||||
if now-target.BeatTime > 120 {
|
||||
if now-target.UpdateAt > 120 {
|
||||
// means this target is not a active host, do not check offset
|
||||
continue
|
||||
}
|
||||
@@ -885,7 +935,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
}
|
||||
}
|
||||
|
||||
logger.Debugf("alert_eval_%d datasource_%d offsetIdents:%v", arw.Rule.Id, arw.DatasourceId, offsetIdents)
|
||||
logger.Debugf("rule_eval:%s offsetIdents:%v", arw.Key(), offsetIdents)
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
fmt.Sprintf("%v", arw.Rule.Id),
|
||||
fmt.Sprintf("%v", arw.Processor.DatasourceId()),
|
||||
@@ -912,7 +962,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
fmt.Sprintf("%v", arw.Processor.DatasourceId()),
|
||||
"",
|
||||
).Set(0)
|
||||
logger.Warningf("alert_eval_%d datasource_%d targets not found", arw.Rule.Id, arw.DatasourceId)
|
||||
logger.Warningf("rule_eval:%s targets not found", arw.Key())
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), QUERY_DATA, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
continue
|
||||
}
|
||||
@@ -924,7 +974,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
missTargets = append(missTargets, ident)
|
||||
}
|
||||
}
|
||||
logger.Debugf("alert_eval_%d datasource_%d missTargets:%v", arw.Rule.Id, arw.DatasourceId, missTargets)
|
||||
logger.Debugf("rule_eval:%s missTargets:%v", arw.Key(), missTargets)
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
fmt.Sprintf("%v", arw.Rule.Id),
|
||||
fmt.Sprintf("%v", arw.Processor.DatasourceId()),
|
||||
@@ -1120,7 +1170,7 @@ func ProcessJoins(ruleId int64, trigger models.Trigger, seriesTagIndexes map[str
|
||||
|
||||
// 有 join 条件,按条件依次合并
|
||||
if len(seriesTagIndexes) < len(trigger.Joins)+1 {
|
||||
logger.Errorf("alert_eval_%d queries' count: %d not match join condition's count: %d", ruleId, len(seriesTagIndexes), len(trigger.Joins))
|
||||
logger.Errorf("rule_eval rid:%d queries' count: %d not match join condition's count: %d", ruleId, len(seriesTagIndexes), len(trigger.Joins))
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -1156,7 +1206,7 @@ func ProcessJoins(ruleId int64, trigger models.Trigger, seriesTagIndexes map[str
|
||||
lastRehashed = exclude(curRehashed, lastRehashed)
|
||||
last = flatten(lastRehashed)
|
||||
default:
|
||||
logger.Warningf("alert_eval_%d join type:%s not support", ruleId, trigger.Joins[i].JoinType)
|
||||
logger.Warningf("rule_eval rid:%d join type:%s not support", ruleId, trigger.Joins[i].JoinType)
|
||||
}
|
||||
}
|
||||
return last
|
||||
@@ -1230,9 +1280,20 @@ func GetQueryRefAndUnit(query interface{}) (string, string, error) {
|
||||
// 每个节点先填充参数再进行查询, 即先得到完整的 promql avg(mem_used_percent{host="127.0.0.1"}) > 5
|
||||
// 再查询得到满足值变量的所有结果加入异常点列表
|
||||
// 参数变量的值不满足的组合,需要覆盖上层筛选中产生的异常点
|
||||
//
|
||||
// 修复说明 (Issue #2971):
|
||||
// 原实现中使用参数变量组合作为 key 存储异常点,导致同一参数值下的多条时序数据互相覆盖。
|
||||
// 修复方案:
|
||||
// 1. 同一层内:使用完整的标签 hash 作为 key,避免不同时序数据覆盖
|
||||
// 2. 跨层级时:子层按参数变量组合删除父层的所有相关告警,实现子筛选覆盖父筛选
|
||||
func (arw *AlertRuleWorker) VarFillingBeforeQuery(query models.PromQuery, readerClient promsdk.API) []models.AnomalyPoint {
|
||||
varToLabel := ExtractVarMapping(query.PromQl)
|
||||
// 存储异常点的 map,key 为参数变量的组合,可以实现子筛选对上一层筛选的覆盖
|
||||
|
||||
// 存储异常点的 map
|
||||
// key 格式: {参数变量组合}@@{标签hash}
|
||||
// 这样可以:
|
||||
// 1. 同层内不同时序数据有不同的 key(标签hash不同)
|
||||
// 2. 跨层时可以按参数变量组合前缀删除父层的告警
|
||||
anomalyPointsMap := sync.Map{}
|
||||
// 统一变量配置格式
|
||||
VarConfigForCalc := &models.ChildVarConfig{
|
||||
@@ -1257,11 +1318,19 @@ func (arw *AlertRuleWorker) VarFillingBeforeQuery(query models.PromQuery, reader
|
||||
sort.Slice(ParamKeys, func(i, j int) bool {
|
||||
return ParamKeys[i] < ParamKeys[j]
|
||||
})
|
||||
// 遍历变量配置链表
|
||||
|
||||
// 遍历变量配置链表(父层 -> 子层)
|
||||
curNode := VarConfigForCalc
|
||||
isFirstLayer := true
|
||||
for curNode != nil {
|
||||
// 当前层收集到的所有异常点,按参数组合分组
|
||||
// key: 参数变量组合, value: 该组合下的所有异常点
|
||||
currentLayerPointsByParam := make(map[string][]models.AnomalyPoint)
|
||||
var currentLayerMutex sync.Mutex
|
||||
|
||||
for _, param := range curNode.ParamVal {
|
||||
curPromql := query.PromQl
|
||||
|
||||
// 取出阈值变量
|
||||
valMap := make(map[string]string)
|
||||
for val, valQuery := range param {
|
||||
@@ -1276,12 +1345,12 @@ func (arw *AlertRuleWorker) VarFillingBeforeQuery(query models.PromQuery, reader
|
||||
// 得到参数变量的所有组合
|
||||
paramPermutation, err := arw.getParamPermutation(param, ParamKeys, varToLabel, query.PromQl, readerClient)
|
||||
if err != nil {
|
||||
logger.Errorf("alert_eval_%d datasource_%d paramPermutation error:%v", arw.Rule.Id, arw.DatasourceId, err)
|
||||
logger.Errorf("rule_eval:%s, paramPermutation error:%v", arw.Key(), err)
|
||||
continue
|
||||
}
|
||||
|
||||
keyToPromql := make(map[string]string)
|
||||
for paramPermutationKeys, _ := range paramPermutation {
|
||||
for paramPermutationKeys := range paramPermutation {
|
||||
realPromql := curPromql
|
||||
split := strings.Split(paramPermutationKeys, JoinMark)
|
||||
for j := range ParamKeys {
|
||||
@@ -1293,47 +1362,90 @@ func (arw *AlertRuleWorker) VarFillingBeforeQuery(query models.PromQuery, reader
|
||||
// 并发查询
|
||||
wg := sync.WaitGroup{}
|
||||
semaphore := make(chan struct{}, 200)
|
||||
for key, promql := range keyToPromql {
|
||||
for paramKey, promql := range keyToPromql {
|
||||
wg.Add(1)
|
||||
semaphore <- struct{}{}
|
||||
go func(key, promql string) {
|
||||
go func(paramKey, promql string) {
|
||||
defer func() {
|
||||
<-semaphore
|
||||
wg.Done()
|
||||
}()
|
||||
arw.Processor.Stats.CounterQueryDataTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId), fmt.Sprintf("%d", arw.Rule.Id)).Inc()
|
||||
|
||||
arw.Processor.Stats.CounterQueryDataTotal.WithLabelValues(
|
||||
fmt.Sprintf("%d", arw.DatasourceId),
|
||||
fmt.Sprintf("%d", arw.Rule.Id),
|
||||
).Inc()
|
||||
|
||||
value, _, err := readerClient.Query(context.Background(), promql, time.Now())
|
||||
if err != nil {
|
||||
logger.Errorf("alert_eval_%d datasource_%d promql:%s, error:%v", arw.Rule.Id, arw.DatasourceId, promql, err)
|
||||
logger.Errorf("rule_eval:%s, promql:%s, error:%v", arw.Key(), promql, err)
|
||||
return
|
||||
}
|
||||
logger.Infof("alert_eval_%d datasource_%d promql:%s, value:%+v", arw.Rule.Id, arw.DatasourceId, promql, value)
|
||||
logger.Infof("rule_eval:%s, promql:%s, value:%+v", arw.Key(), promql, value)
|
||||
|
||||
points := models.ConvertAnomalyPoints(value)
|
||||
if len(points) == 0 {
|
||||
anomalyPointsMap.Delete(key)
|
||||
// 查询无结果时,标记该参数组合需要清除(用于子层覆盖父层)
|
||||
currentLayerMutex.Lock()
|
||||
if _, exists := currentLayerPointsByParam[paramKey]; !exists {
|
||||
currentLayerPointsByParam[paramKey] = []models.AnomalyPoint{}
|
||||
}
|
||||
currentLayerMutex.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
for i := 0; i < len(points); i++ {
|
||||
points[i].Severity = query.Severity
|
||||
points[i].Query = promql
|
||||
points[i].ValuesUnit = map[string]unit.FormattedValue{
|
||||
"v": unit.ValueFormatter(query.Unit, 2, points[i].Value),
|
||||
}
|
||||
// 每个异常点都需要生成 key,子筛选使用 key 覆盖上层筛选,解决 issue https://github.com/ccfos/nightingale/issues/2433 提的问题
|
||||
var cur []string
|
||||
for _, paramKey := range ParamKeys {
|
||||
val := string(points[i].Labels[model.LabelName(varToLabel[paramKey])])
|
||||
cur = append(cur, val)
|
||||
}
|
||||
anomalyPointsMap.Store(strings.Join(cur, JoinMark), points[i])
|
||||
}
|
||||
}(key, promql)
|
||||
|
||||
// 收集当前层的异常点
|
||||
currentLayerMutex.Lock()
|
||||
currentLayerPointsByParam[paramKey] = append(currentLayerPointsByParam[paramKey], points...)
|
||||
currentLayerMutex.Unlock()
|
||||
}(paramKey, promql)
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
// 处理当前层的结果
|
||||
for paramKey, points := range currentLayerPointsByParam {
|
||||
if !isFirstLayer {
|
||||
// 非首层(子层):先删除父层中该参数组合的所有告警
|
||||
// 这实现了 issue #2433 要求的子筛选覆盖父筛选功能
|
||||
keysToDelete := make([]string, 0)
|
||||
anomalyPointsMap.Range(func(k, v any) bool {
|
||||
keyStr := k.(string)
|
||||
// key 格式: {参数组合}@@{标签hash}
|
||||
// 检查是否以当前参数组合开头
|
||||
if strings.HasPrefix(keyStr, paramKey+JoinMark) {
|
||||
keysToDelete = append(keysToDelete, keyStr)
|
||||
}
|
||||
return true
|
||||
})
|
||||
for _, k := range keysToDelete {
|
||||
anomalyPointsMap.Delete(k)
|
||||
}
|
||||
}
|
||||
|
||||
// 添加当前层的所有异常点
|
||||
// 使用 参数组合 + 标签hash 作为 key,保证同一参数值下的不同时序数据不会互相覆盖
|
||||
for _, point := range points {
|
||||
// 计算标签 hash,确保不同的时序数据有不同的 key
|
||||
tagHash := hash.GetTagHash(point.Labels)
|
||||
fullKey := paramKey + JoinMark + fmt.Sprintf("%d", tagHash)
|
||||
anomalyPointsMap.Store(fullKey, point)
|
||||
}
|
||||
}
|
||||
|
||||
curNode = curNode.ChildVarConfigs
|
||||
isFirstLayer = false
|
||||
}
|
||||
|
||||
// 收集所有异常点
|
||||
anomalyPoints := make([]models.AnomalyPoint, 0)
|
||||
anomalyPointsMap.Range(func(key, value any) bool {
|
||||
if point, ok := value.(models.AnomalyPoint); ok {
|
||||
@@ -1341,6 +1453,7 @@ func (arw *AlertRuleWorker) VarFillingBeforeQuery(query models.PromQuery, reader
|
||||
}
|
||||
return true
|
||||
})
|
||||
|
||||
return anomalyPoints
|
||||
}
|
||||
|
||||
@@ -1446,7 +1559,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
recoverPoints := []models.AnomalyPoint{}
|
||||
ruleConfig := strings.TrimSpace(rule.RuleConfig)
|
||||
if ruleConfig == "" {
|
||||
logger.Warningf("alert_eval_%d datasource_%d ruleConfig is blank", rule.Id, dsId)
|
||||
logger.Warningf("rule_eval:%d ruleConfig is blank", rule.Id)
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_RULE_CONFIG, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
fmt.Sprintf("%v", arw.Rule.Id),
|
||||
@@ -1454,15 +1567,15 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
"",
|
||||
).Set(0)
|
||||
|
||||
return points, recoverPoints, fmt.Errorf("alert_eval_%d datasource_%d ruleConfig is blank", rule.Id, dsId)
|
||||
return points, recoverPoints, fmt.Errorf("rule_eval:%d ruleConfig is blank", rule.Id)
|
||||
}
|
||||
|
||||
var ruleQuery models.RuleQuery
|
||||
err := json.Unmarshal([]byte(ruleConfig), &ruleQuery)
|
||||
if err != nil {
|
||||
logger.Warningf("alert_eval_%d datasource_%d promql parse error:%s", rule.Id, dsId, err.Error())
|
||||
logger.Warningf("rule_eval:%d promql parse error:%s", rule.Id, err.Error())
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_RULE_CONFIG, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
return points, recoverPoints, fmt.Errorf("alert_eval_%d datasource_%d promql parse error:%s", rule.Id, dsId, err.Error())
|
||||
return points, recoverPoints, fmt.Errorf("rule_eval:%d promql parse error:%s", rule.Id, err.Error())
|
||||
}
|
||||
|
||||
arw.Inhibit = ruleQuery.Inhibit
|
||||
@@ -1474,7 +1587,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
|
||||
plug, exists := dscache.DsCache.Get(rule.Cate, dsId)
|
||||
if !exists {
|
||||
logger.Warningf("alert_eval_%d datasource_%d not exists", rule.Id, dsId)
|
||||
logger.Warningf("rule_eval rid:%d datasource:%d not exists", rule.Id, dsId)
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_CLIENT, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
@@ -1483,24 +1596,14 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
fmt.Sprintf("%v", i),
|
||||
).Set(-2)
|
||||
|
||||
return points, recoverPoints, fmt.Errorf("alert_eval_%d datasource_%d not exists", rule.Id, dsId)
|
||||
}
|
||||
|
||||
if err = ExecuteQueryTemplate(rule.Cate, query, nil); err != nil {
|
||||
logger.Warningf("alert_eval_%d datasource_%d execute query template error: %v", rule.Id, dsId, err)
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), EXEC_TEMPLATE, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
fmt.Sprintf("%v", arw.Rule.Id),
|
||||
fmt.Sprintf("%v", arw.Processor.DatasourceId()),
|
||||
fmt.Sprintf("%v", i),
|
||||
).Set(-3)
|
||||
return points, recoverPoints, fmt.Errorf("rule_eval:%d datasource:%d not exists", rule.Id, dsId)
|
||||
}
|
||||
|
||||
ctx := context.WithValue(context.Background(), "delay", int64(rule.Delay))
|
||||
series, err := plug.QueryData(ctx, query)
|
||||
arw.Processor.Stats.CounterQueryDataTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId), fmt.Sprintf("%d", rule.Id)).Inc()
|
||||
if err != nil {
|
||||
logger.Warningf("alert_eval_%d datasource_%d query data error: %v", rule.Id, dsId, err)
|
||||
logger.Warningf("rule_eval rid:%d query data error: %v", rule.Id, err)
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_CLIENT, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
fmt.Sprintf("%v", arw.Rule.Id),
|
||||
@@ -1508,7 +1611,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
fmt.Sprintf("%v", i),
|
||||
).Set(-1)
|
||||
|
||||
return points, recoverPoints, fmt.Errorf("alert_eval_%d datasource_%d query data error: %v", rule.Id, dsId, err)
|
||||
return points, recoverPoints, fmt.Errorf("rule_eval:%d query data error: %v", rule.Id, err)
|
||||
}
|
||||
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
@@ -1518,7 +1621,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
).Set(float64(len(series)))
|
||||
|
||||
// 此条日志很重要,是告警判断的现场值
|
||||
logger.Infof("alert_eval_%d datasource_%d req:%+v resp:%v", rule.Id, dsId, query, series)
|
||||
logger.Infof("rule_eval rid:%d req:%+v resp:%v", rule.Id, query, series)
|
||||
for i := 0; i < len(series); i++ {
|
||||
seriesHash := hash.GetHash(series[i].Metric, series[i].Ref)
|
||||
tagHash := hash.GetTagHash(series[i].Metric)
|
||||
@@ -1532,7 +1635,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
}
|
||||
ref, err := GetQueryRef(query)
|
||||
if err != nil {
|
||||
logger.Warningf("alert_eval_%d datasource_%d query:%+v get ref error:%s", rule.Id, dsId, query, err.Error())
|
||||
logger.Warningf("rule_eval rid:%d query:%+v get ref error:%s", rule.Id, query, err.Error())
|
||||
continue
|
||||
}
|
||||
seriesTagIndexes[ref] = seriesTagIndex
|
||||
@@ -1542,7 +1645,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
for _, query := range ruleQuery.Queries {
|
||||
ref, unit, err := GetQueryRefAndUnit(query)
|
||||
if err != nil {
|
||||
logger.Warningf("alert_eval_%d datasource_%d query:%+v get ref and unit error:%s", rule.Id, dsId, query, err.Error())
|
||||
logger.Warningf("rule_eval rid:%d query:%+v get ref and unit error:%s", rule.Id, query, err.Error())
|
||||
continue
|
||||
}
|
||||
unitMap[ref] = unit
|
||||
@@ -1565,12 +1668,12 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
for _, seriesHash := range seriesHash {
|
||||
series, exists := seriesStore[seriesHash]
|
||||
if !exists {
|
||||
logger.Warningf("alert_eval_%d datasource_%d series:%+v not found", rule.Id, dsId, series)
|
||||
logger.Warningf("rule_eval rid:%d series:%+v not found", rule.Id, series)
|
||||
continue
|
||||
}
|
||||
t, v, exists := series.Last()
|
||||
if !exists {
|
||||
logger.Warningf("alert_eval_%d datasource_%d series:%+v value not found", rule.Id, dsId, series)
|
||||
logger.Warningf("rule_eval rid:%d series:%+v value not found", rule.Id, series)
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -1601,12 +1704,12 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
ts = int64(t)
|
||||
sample = series
|
||||
value = v
|
||||
logger.Infof("alert_eval_%d datasource_%d origin series labels:%+v", rule.Id, dsId, series.Metric)
|
||||
logger.Infof("rule_eval rid:%d origin series labels:%+v", rule.Id, series.Metric)
|
||||
}
|
||||
|
||||
isTriggered := parser.CalcWithRid(trigger.Exp, m, rule.Id)
|
||||
// 此条日志很重要,是告警判断的现场值
|
||||
logger.Infof("alert_eval_%d datasource_%d trigger:%+v exp:%s res:%v m:%v", rule.Id, dsId, trigger, trigger.Exp, isTriggered, m)
|
||||
logger.Infof("rule_eval rid:%d trigger:%+v exp:%s res:%v m:%v", rule.Id, trigger, trigger.Exp, isTriggered, m)
|
||||
|
||||
var values string
|
||||
for k, v := range m {
|
||||
@@ -1614,15 +1717,11 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
continue
|
||||
}
|
||||
|
||||
if u, exists := valuesUnitMap[k]; exists { // 配置了单位,优先用配置了单位的值
|
||||
values += fmt.Sprintf("%s:%s ", k, u.Text)
|
||||
} else {
|
||||
switch v.(type) {
|
||||
case float64:
|
||||
values += fmt.Sprintf("%s:%.3f ", k, v)
|
||||
case string:
|
||||
values += fmt.Sprintf("%s:%s ", k, v)
|
||||
}
|
||||
switch v.(type) {
|
||||
case float64:
|
||||
values += fmt.Sprintf("%s:%.3f ", k, v)
|
||||
case string:
|
||||
values += fmt.Sprintf("%s:%s ", k, v)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1679,7 +1778,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
|
||||
// 检查是否超过 resolve_after 时间
|
||||
if now-int64(lastTs) > int64(ruleQuery.NodataTrigger.ResolveAfter) {
|
||||
logger.Infof("alert_eval_%d datasource_%d series:%+v resolve after %d seconds now:%d lastTs:%d", rule.Id, dsId, lastSeries, ruleQuery.NodataTrigger.ResolveAfter, now, int64(lastTs))
|
||||
logger.Infof("rule_eval rid:%d series:%+v resolve after %d seconds now:%d lastTs:%d", rule.Id, lastSeries, ruleQuery.NodataTrigger.ResolveAfter, now, int64(lastTs))
|
||||
delete(arw.LastSeriesStore, hash)
|
||||
continue
|
||||
}
|
||||
@@ -1700,7 +1799,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
TriggerType: models.TriggerTypeNodata,
|
||||
}
|
||||
points = append(points, point)
|
||||
logger.Infof("alert_eval_%d datasource_%d nodata point:%+v", rule.Id, dsId, point)
|
||||
logger.Infof("rule_eval rid:%d nodata point:%+v", rule.Id, point)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1715,61 +1814,3 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
|
||||
return points, recoverPoints, nil
|
||||
}
|
||||
|
||||
// ExecuteQueryTemplate 根据数据源类型对 Query 进行模板渲染处理
|
||||
// cate: 数据源类别,如 "mysql", "pgsql" 等
|
||||
// query: 查询对象,如果是数据库类型的数据源,会处理其中的 sql 字段
|
||||
// data: 模板数据对象,如果为 nil 则使用空结构体(不支持变量渲染),如果不为 nil 则使用传入的数据(支持变量渲染)
|
||||
func ExecuteQueryTemplate(cate string, query interface{}, data interface{}) error {
|
||||
// 检查 query 是否是 map,且包含 sql 字段
|
||||
queryMap, ok := query.(map[string]interface{})
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
sqlVal, exists := queryMap["sql"]
|
||||
if !exists {
|
||||
return nil
|
||||
}
|
||||
|
||||
sqlStr, ok := sqlVal.(string)
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
// 调用 ExecuteSqlTemplate 处理 sql 字段
|
||||
processedSQL, err := ExecuteSqlTemplate(sqlStr, data)
|
||||
if err != nil {
|
||||
return fmt.Errorf("execute sql template error: %w", err)
|
||||
}
|
||||
|
||||
// 更新 query 中的 sql 字段
|
||||
queryMap["sql"] = processedSQL
|
||||
return nil
|
||||
}
|
||||
|
||||
// ExecuteSqlTemplate 执行 query 中的 golang 模板语法函数
|
||||
// query: 要处理的 query 字符串
|
||||
// data: 模板数据对象,如果为 nil 则使用空结构体(不支持变量渲染),如果不为 nil 则使用传入的数据(支持变量渲染)
|
||||
func ExecuteSqlTemplate(query string, data interface{}) (string, error) {
|
||||
if !strings.Contains(query, "{{") || !strings.Contains(query, "}}") {
|
||||
return query, nil
|
||||
}
|
||||
|
||||
tmpl, err := template.New("query").Funcs(tplx.TemplateFuncMap).Parse(query)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("query tmpl parse error: %w", err)
|
||||
}
|
||||
|
||||
var buf strings.Builder
|
||||
templateData := data
|
||||
if templateData == nil {
|
||||
templateData = struct{}{}
|
||||
}
|
||||
|
||||
if err := tmpl.Execute(&buf, templateData); err != nil {
|
||||
return "", fmt.Errorf("query tmpl execute error: %w", err)
|
||||
}
|
||||
|
||||
return buf.String(), nil
|
||||
}
|
||||
|
||||
@@ -41,28 +41,8 @@ func IsMuted(rule *models.AlertRule, event *models.AlertCurEvent, targetCache *m
|
||||
|
||||
// TimeSpanMuteStrategy 根据规则配置的告警生效时间段过滤,如果产生的告警不在规则配置的告警生效时间段内,则不告警,即被mute
|
||||
// 时间范围,左闭右开,默认范围:00:00-24:00
|
||||
// 如果规则配置了时区,则在该时区下进行时间判断;如果时区为空,则使用系统时区
|
||||
func TimeSpanMuteStrategy(rule *models.AlertRule, event *models.AlertCurEvent) bool {
|
||||
// 确定使用的时区
|
||||
var targetLoc *time.Location
|
||||
var err error
|
||||
|
||||
timezone := rule.TimeZone
|
||||
if timezone == "" {
|
||||
// 如果时区为空,使用系统时区(保持原有逻辑)
|
||||
targetLoc = time.Local
|
||||
} else {
|
||||
// 加载规则配置的时区
|
||||
targetLoc, err = time.LoadLocation(timezone)
|
||||
if err != nil {
|
||||
// 如果时区加载失败,记录错误并使用系统时区
|
||||
logger.Warningf("Failed to load timezone %s for rule %d, using system timezone: %v", timezone, rule.Id, err)
|
||||
targetLoc = time.Local
|
||||
}
|
||||
}
|
||||
|
||||
// 将触发时间转换到目标时区
|
||||
tm := time.Unix(event.TriggerTime, 0).In(targetLoc)
|
||||
tm := time.Unix(event.TriggerTime, 0)
|
||||
triggerTime := tm.Format("15:04")
|
||||
triggerWeek := strconv.Itoa(int(tm.Weekday()))
|
||||
|
||||
@@ -122,7 +102,7 @@ func IdentNotExistsMuteStrategy(rule *models.AlertRule, event *models.AlertCurEv
|
||||
// 如果是target_up的告警,且ident已经不存在了,直接过滤掉
|
||||
// 这里的判断有点太粗暴了,但是目前没有更好的办法
|
||||
if !exists && strings.Contains(rule.PromQl, "target_up") {
|
||||
logger.Debugf("alert_eval_%d [IdentNotExistsMuteStrategy] mute: cluster:%s ident:%s", rule.Id, event.Cluster, ident)
|
||||
logger.Debugf("[%s] mute: rule_eval:%d cluster:%s ident:%s", "IdentNotExistsMuteStrategy", rule.Id, event.Cluster, ident)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
@@ -144,7 +124,7 @@ func BgNotMatchMuteStrategy(rule *models.AlertRule, event *models.AlertCurEvent,
|
||||
// 对于包含ident的告警事件,check一下ident所属bg和rule所属bg是否相同
|
||||
// 如果告警规则选择了只在本BG生效,那其他BG的机器就不能因此规则产生告警
|
||||
if exists && !target.MatchGroupId(rule.GroupId) {
|
||||
logger.Debugf("alert_eval_%d [BgNotMatchMuteStrategy] mute: cluster:%s", rule.Id, event.Cluster)
|
||||
logger.Debugf("[%s] mute: rule_eval:%d cluster:%s", "BgNotMatchMuteStrategy", rule.Id, event.Cluster)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
||||
@@ -1,380 +0,0 @@
|
||||
package engine
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/google/uuid"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
type WorkflowEngine struct {
|
||||
ctx *ctx.Context
|
||||
}
|
||||
|
||||
func NewWorkflowEngine(c *ctx.Context) *WorkflowEngine {
|
||||
return &WorkflowEngine{ctx: c}
|
||||
}
|
||||
|
||||
func (e *WorkflowEngine) Execute(pipeline *models.EventPipeline, event *models.AlertCurEvent, triggerCtx *models.WorkflowTriggerContext) (*models.AlertCurEvent, *models.WorkflowResult, error) {
|
||||
startTime := time.Now()
|
||||
|
||||
wfCtx := e.initWorkflowContext(pipeline, event, triggerCtx)
|
||||
|
||||
nodes := pipeline.GetWorkflowNodes()
|
||||
connections := pipeline.GetWorkflowConnections()
|
||||
|
||||
if len(nodes) == 0 {
|
||||
return event, &models.WorkflowResult{
|
||||
Event: event,
|
||||
Status: models.ExecutionStatusSuccess,
|
||||
Message: "no nodes to execute",
|
||||
}, nil
|
||||
}
|
||||
|
||||
nodeMap := make(map[string]*models.WorkflowNode)
|
||||
for i := range nodes {
|
||||
if nodes[i].RetryInterval == 0 {
|
||||
nodes[i].RetryInterval = 1
|
||||
}
|
||||
|
||||
if nodes[i].MaxRetries == 0 {
|
||||
nodes[i].MaxRetries = 1
|
||||
}
|
||||
|
||||
nodeMap[nodes[i].ID] = &nodes[i]
|
||||
}
|
||||
|
||||
result := e.executeDAG(nodeMap, connections, wfCtx)
|
||||
result.Event = wfCtx.Event
|
||||
|
||||
duration := time.Since(startTime).Milliseconds()
|
||||
|
||||
if triggerCtx != nil && triggerCtx.Mode != "" {
|
||||
e.saveExecutionRecord(pipeline, wfCtx, result, triggerCtx, startTime.Unix(), duration)
|
||||
}
|
||||
|
||||
return wfCtx.Event, result, nil
|
||||
}
|
||||
|
||||
func (e *WorkflowEngine) initWorkflowContext(pipeline *models.EventPipeline, event *models.AlertCurEvent, triggerCtx *models.WorkflowTriggerContext) *models.WorkflowContext {
|
||||
// 合并输入参数
|
||||
inputs := pipeline.GetInputsMap()
|
||||
if triggerCtx != nil && triggerCtx.InputsOverrides != nil {
|
||||
for k, v := range triggerCtx.InputsOverrides {
|
||||
inputs[k] = v
|
||||
}
|
||||
}
|
||||
|
||||
metadata := map[string]string{
|
||||
"start_time": fmt.Sprintf("%d", time.Now().Unix()),
|
||||
"pipeline_id": fmt.Sprintf("%d", pipeline.ID),
|
||||
}
|
||||
|
||||
// 是否启用流式输出
|
||||
stream := false
|
||||
if triggerCtx != nil {
|
||||
metadata["request_id"] = triggerCtx.RequestID
|
||||
metadata["trigger_mode"] = triggerCtx.Mode
|
||||
metadata["trigger_by"] = triggerCtx.TriggerBy
|
||||
stream = triggerCtx.Stream
|
||||
}
|
||||
|
||||
return &models.WorkflowContext{
|
||||
Event: event,
|
||||
Inputs: inputs,
|
||||
Vars: make(map[string]interface{}), // 初始化空的 Vars,供节点间传递数据
|
||||
Metadata: metadata,
|
||||
Stream: stream,
|
||||
}
|
||||
}
|
||||
|
||||
// executeDAG 使用 Kahn 算法执行 DAG
|
||||
func (e *WorkflowEngine) executeDAG(nodeMap map[string]*models.WorkflowNode, connections models.Connections, wfCtx *models.WorkflowContext) *models.WorkflowResult {
|
||||
result := &models.WorkflowResult{
|
||||
Status: models.ExecutionStatusSuccess,
|
||||
NodeResults: make([]*models.NodeExecutionResult, 0),
|
||||
Stream: wfCtx.Stream, // 从上下文继承流式输出设置
|
||||
}
|
||||
|
||||
// 计算每个节点的入度
|
||||
inDegree := make(map[string]int)
|
||||
for nodeID := range nodeMap {
|
||||
inDegree[nodeID] = 0
|
||||
}
|
||||
|
||||
// 遍历连接,计算入度
|
||||
for _, nodeConns := range connections {
|
||||
for _, targets := range nodeConns.Main {
|
||||
for _, target := range targets {
|
||||
inDegree[target.Node]++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 找到所有入度为 0 的节点(起始节点)
|
||||
queue := make([]string, 0)
|
||||
for nodeID, degree := range inDegree {
|
||||
if degree == 0 {
|
||||
queue = append(queue, nodeID)
|
||||
}
|
||||
}
|
||||
|
||||
// 如果没有起始节点,说明存在循环依赖
|
||||
if len(queue) == 0 && len(nodeMap) > 0 {
|
||||
result.Status = models.ExecutionStatusFailed
|
||||
result.Message = "workflow has circular dependency"
|
||||
return result
|
||||
}
|
||||
|
||||
// 记录已执行的节点
|
||||
executed := make(map[string]bool)
|
||||
// 记录节点的分支选择结果
|
||||
branchResults := make(map[string]*int)
|
||||
|
||||
for len(queue) > 0 {
|
||||
// 取出队首节点
|
||||
nodeID := queue[0]
|
||||
queue = queue[1:]
|
||||
|
||||
// 检查是否已执行
|
||||
if executed[nodeID] {
|
||||
continue
|
||||
}
|
||||
|
||||
node, exists := nodeMap[nodeID]
|
||||
if !exists {
|
||||
continue
|
||||
}
|
||||
|
||||
// 执行节点
|
||||
nodeResult, nodeOutput := e.executeNode(node, wfCtx)
|
||||
result.NodeResults = append(result.NodeResults, nodeResult)
|
||||
|
||||
if nodeOutput != nil && nodeOutput.Stream && nodeOutput.StreamChan != nil {
|
||||
// 流式输出节点通常是最后一个节点
|
||||
// 直接传递 StreamChan 给 WorkflowResult,不阻塞等待
|
||||
result.Stream = true
|
||||
result.StreamChan = nodeOutput.StreamChan
|
||||
result.Event = wfCtx.Event
|
||||
result.Status = "streaming"
|
||||
result.Message = fmt.Sprintf("streaming output from node: %s", node.Name)
|
||||
|
||||
// 更新节点状态为 streaming
|
||||
nodeResult.Status = "streaming"
|
||||
nodeResult.Message = "streaming in progress"
|
||||
|
||||
// 立即返回,让 API 层处理流式响应
|
||||
return result
|
||||
}
|
||||
executed[nodeID] = true
|
||||
|
||||
// 保存分支结果
|
||||
if nodeResult.BranchIndex != nil {
|
||||
branchResults[nodeID] = nodeResult.BranchIndex
|
||||
}
|
||||
|
||||
// 检查执行状态
|
||||
if nodeResult.Status == "failed" {
|
||||
if !node.ContinueOnFail {
|
||||
result.Status = models.ExecutionStatusFailed
|
||||
result.ErrorNode = nodeID
|
||||
result.Message = fmt.Sprintf("node %s failed: %s", node.Name, nodeResult.Error)
|
||||
}
|
||||
}
|
||||
|
||||
// 检查是否终止
|
||||
if nodeResult.Status == "terminated" {
|
||||
result.Message = fmt.Sprintf("workflow terminated at node %s", node.Name)
|
||||
return result
|
||||
}
|
||||
|
||||
// 更新后继节点的入度
|
||||
if nodeConns, ok := connections[nodeID]; ok {
|
||||
for outputIndex, targets := range nodeConns.Main {
|
||||
// 检查是否应该走这个分支
|
||||
if !e.shouldFollowBranch(nodeID, outputIndex, branchResults) {
|
||||
continue
|
||||
}
|
||||
|
||||
for _, target := range targets {
|
||||
inDegree[target.Node]--
|
||||
if inDegree[target.Node] == 0 {
|
||||
queue = append(queue, target.Node)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// executeNode 执行单个节点
|
||||
// 返回:节点执行结果、节点输出(用于流式输出检测)
|
||||
func (e *WorkflowEngine) executeNode(node *models.WorkflowNode, wfCtx *models.WorkflowContext) (*models.NodeExecutionResult, *models.NodeOutput) {
|
||||
startTime := time.Now()
|
||||
nodeResult := &models.NodeExecutionResult{
|
||||
NodeID: node.ID,
|
||||
NodeName: node.Name,
|
||||
NodeType: node.Type,
|
||||
StartedAt: startTime.Unix(),
|
||||
}
|
||||
|
||||
var nodeOutput *models.NodeOutput
|
||||
|
||||
// 跳过禁用的节点
|
||||
if node.Disabled {
|
||||
nodeResult.Status = "skipped"
|
||||
nodeResult.Message = "node is disabled"
|
||||
nodeResult.FinishedAt = time.Now().Unix()
|
||||
nodeResult.DurationMs = time.Since(startTime).Milliseconds()
|
||||
return nodeResult, nil
|
||||
}
|
||||
|
||||
// 获取处理器
|
||||
processor, err := models.GetProcessorByType(node.Type, node.Config)
|
||||
if err != nil {
|
||||
nodeResult.Status = "failed"
|
||||
nodeResult.Error = fmt.Sprintf("failed to get processor: %v", err)
|
||||
nodeResult.FinishedAt = time.Now().Unix()
|
||||
nodeResult.DurationMs = time.Since(startTime).Milliseconds()
|
||||
return nodeResult, nil
|
||||
}
|
||||
|
||||
// 执行处理器(带重试)
|
||||
var retries int
|
||||
maxRetries := node.MaxRetries
|
||||
if !node.RetryOnFail {
|
||||
maxRetries = 0
|
||||
}
|
||||
|
||||
for retries <= maxRetries {
|
||||
// 检查是否为分支处理器
|
||||
if branchProcessor, ok := processor.(models.BranchProcessor); ok {
|
||||
output, err := branchProcessor.ProcessWithBranch(e.ctx, wfCtx)
|
||||
if err != nil {
|
||||
if retries < maxRetries {
|
||||
retries++
|
||||
time.Sleep(time.Duration(node.RetryInterval) * time.Second)
|
||||
continue
|
||||
}
|
||||
nodeResult.Status = "failed"
|
||||
nodeResult.Error = err.Error()
|
||||
} else {
|
||||
nodeResult.Status = "success"
|
||||
if output != nil {
|
||||
nodeOutput = output
|
||||
if output.WfCtx != nil {
|
||||
wfCtx = output.WfCtx
|
||||
}
|
||||
nodeResult.Message = output.Message
|
||||
nodeResult.BranchIndex = output.BranchIndex
|
||||
if output.Terminate {
|
||||
nodeResult.Status = "terminated"
|
||||
}
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
// 普通处理器
|
||||
newWfCtx, msg, err := processor.Process(e.ctx, wfCtx)
|
||||
if err != nil {
|
||||
if retries < maxRetries {
|
||||
retries++
|
||||
time.Sleep(time.Duration(node.RetryInterval) * time.Second)
|
||||
continue
|
||||
}
|
||||
nodeResult.Status = "failed"
|
||||
nodeResult.Error = err.Error()
|
||||
} else {
|
||||
nodeResult.Status = "success"
|
||||
nodeResult.Message = msg
|
||||
if newWfCtx != nil {
|
||||
wfCtx = newWfCtx
|
||||
|
||||
// 检测流式输出标记
|
||||
if newWfCtx.Stream && newWfCtx.StreamChan != nil {
|
||||
nodeOutput = &models.NodeOutput{
|
||||
WfCtx: newWfCtx,
|
||||
Message: msg,
|
||||
Stream: true,
|
||||
StreamChan: newWfCtx.StreamChan,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 如果事件被 drop(返回 nil 或 Event 为 nil),标记为终止
|
||||
if newWfCtx == nil || newWfCtx.Event == nil {
|
||||
nodeResult.Status = "terminated"
|
||||
nodeResult.Message = msg
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
nodeResult.FinishedAt = time.Now().Unix()
|
||||
nodeResult.DurationMs = time.Since(startTime).Milliseconds()
|
||||
|
||||
logger.Infof("workflow: executed node %s (type=%s) status=%s msg=%s duration=%dms",
|
||||
node.Name, node.Type, nodeResult.Status, nodeResult.Message, nodeResult.DurationMs)
|
||||
|
||||
return nodeResult, nodeOutput
|
||||
}
|
||||
|
||||
// shouldFollowBranch 判断是否应该走某个分支
|
||||
func (e *WorkflowEngine) shouldFollowBranch(nodeID string, outputIndex int, branchResults map[string]*int) bool {
|
||||
branchIndex, hasBranch := branchResults[nodeID]
|
||||
if !hasBranch {
|
||||
// 没有分支结果,说明不是分支节点,只走第一个输出
|
||||
return outputIndex == 0
|
||||
}
|
||||
|
||||
if branchIndex == nil {
|
||||
// branchIndex 为 nil,走默认分支(通常是最后一个)
|
||||
return true
|
||||
}
|
||||
|
||||
// 只走选中的分支
|
||||
return outputIndex == *branchIndex
|
||||
}
|
||||
|
||||
func (e *WorkflowEngine) saveExecutionRecord(pipeline *models.EventPipeline, wfCtx *models.WorkflowContext, result *models.WorkflowResult, triggerCtx *models.WorkflowTriggerContext, startTime int64, duration int64) {
|
||||
executionID := triggerCtx.RequestID
|
||||
if executionID == "" {
|
||||
executionID = uuid.New().String()
|
||||
}
|
||||
|
||||
execution := &models.EventPipelineExecution{
|
||||
ID: executionID,
|
||||
PipelineID: pipeline.ID,
|
||||
PipelineName: pipeline.Name,
|
||||
Mode: triggerCtx.Mode,
|
||||
Status: result.Status,
|
||||
ErrorMessage: result.Message,
|
||||
ErrorNode: result.ErrorNode,
|
||||
CreatedAt: startTime,
|
||||
FinishedAt: time.Now().Unix(),
|
||||
DurationMs: duration,
|
||||
TriggerBy: triggerCtx.TriggerBy,
|
||||
}
|
||||
|
||||
if wfCtx.Event != nil {
|
||||
execution.EventID = wfCtx.Event.Id
|
||||
}
|
||||
|
||||
if err := execution.SetNodeResults(result.NodeResults); err != nil {
|
||||
logger.Errorf("workflow: failed to set node results: pipeline_id=%d, error=%v", pipeline.ID, err)
|
||||
}
|
||||
|
||||
if err := execution.SetInputsSnapshot(wfCtx.Inputs); err != nil {
|
||||
logger.Errorf("workflow: failed to set inputs snapshot: pipeline_id=%d, error=%v", pipeline.ID, err)
|
||||
}
|
||||
|
||||
if err := models.CreateEventPipelineExecution(e.ctx, execution); err != nil {
|
||||
logger.Errorf("workflow: failed to save execution record: pipeline_id=%d, error=%v", pipeline.ID, err)
|
||||
}
|
||||
}
|
||||
@@ -5,7 +5,6 @@ import (
|
||||
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/callback"
|
||||
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/eventdrop"
|
||||
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/eventupdate"
|
||||
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/logic"
|
||||
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/relabel"
|
||||
)
|
||||
|
||||
|
||||
@@ -55,24 +55,23 @@ func (c *AISummaryConfig) Init(settings interface{}) (models.Processor, error) {
|
||||
return result, err
|
||||
}
|
||||
|
||||
func (c *AISummaryConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
|
||||
event := wfCtx.Event
|
||||
func (c *AISummaryConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) (*models.AlertCurEvent, string, error) {
|
||||
if c.Client == nil {
|
||||
if err := c.initHTTPClient(); err != nil {
|
||||
return wfCtx, "", fmt.Errorf("failed to initialize HTTP client: %v processor: %v", err, c)
|
||||
return event, "", fmt.Errorf("failed to initialize HTTP client: %v processor: %v", err, c)
|
||||
}
|
||||
}
|
||||
|
||||
// 准备告警事件信息
|
||||
eventInfo, err := c.prepareEventInfo(wfCtx)
|
||||
eventInfo, err := c.prepareEventInfo(event)
|
||||
if err != nil {
|
||||
return wfCtx, "", fmt.Errorf("failed to prepare event info: %v processor: %v", err, c)
|
||||
return event, "", fmt.Errorf("failed to prepare event info: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
// 调用AI模型生成总结
|
||||
summary, err := c.generateAISummary(eventInfo)
|
||||
if err != nil {
|
||||
return wfCtx, "", fmt.Errorf("failed to generate AI summary: %v processor: %v", err, c)
|
||||
return event, "", fmt.Errorf("failed to generate AI summary: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
// 将总结添加到annotations字段
|
||||
@@ -84,11 +83,11 @@ func (c *AISummaryConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContex
|
||||
// 更新Annotations字段
|
||||
b, err := json.Marshal(event.AnnotationsJSON)
|
||||
if err != nil {
|
||||
return wfCtx, "", fmt.Errorf("failed to marshal annotations: %v processor: %v", err, c)
|
||||
return event, "", fmt.Errorf("failed to marshal annotations: %v processor: %v", err, c)
|
||||
}
|
||||
event.Annotations = string(b)
|
||||
|
||||
return wfCtx, "", nil
|
||||
return event, "", nil
|
||||
}
|
||||
|
||||
func (c *AISummaryConfig) initHTTPClient() error {
|
||||
@@ -111,10 +110,9 @@ func (c *AISummaryConfig) initHTTPClient() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *AISummaryConfig) prepareEventInfo(wfCtx *models.WorkflowContext) (string, error) {
|
||||
func (c *AISummaryConfig) prepareEventInfo(event *models.AlertCurEvent) (string, error) {
|
||||
var defs = []string{
|
||||
"{{$event := .Event}}",
|
||||
"{{$inputs := .Inputs}}",
|
||||
"{{$event := .}}",
|
||||
}
|
||||
|
||||
text := strings.Join(append(defs, c.PromptTemplate), "")
|
||||
@@ -124,7 +122,7 @@ func (c *AISummaryConfig) prepareEventInfo(wfCtx *models.WorkflowContext) (strin
|
||||
}
|
||||
|
||||
var body bytes.Buffer
|
||||
err = t.Execute(&body, wfCtx)
|
||||
err = t.Execute(&body, event)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to execute prompt template: %v", err)
|
||||
}
|
||||
|
||||
@@ -42,14 +42,8 @@ func TestAISummaryConfig_Process(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
// 创建 WorkflowContext
|
||||
wfCtx := &models.WorkflowContext{
|
||||
Event: event,
|
||||
Inputs: map[string]string{},
|
||||
}
|
||||
|
||||
// 测试模板处理
|
||||
eventInfo, err := config.prepareEventInfo(wfCtx)
|
||||
eventInfo, err := config.prepareEventInfo(event)
|
||||
assert.NoError(t, err)
|
||||
assert.Contains(t, eventInfo, "Test Rule")
|
||||
assert.Contains(t, eventInfo, "1")
|
||||
@@ -60,18 +54,18 @@ func TestAISummaryConfig_Process(t *testing.T) {
|
||||
assert.NotNil(t, processor)
|
||||
|
||||
// 测试处理函数
|
||||
result, _, err := processor.Process(&ctx.Context{}, wfCtx)
|
||||
result, _, err := processor.Process(&ctx.Context{}, event)
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, result)
|
||||
assert.NotEmpty(t, result.Event.AnnotationsJSON["ai_summary"])
|
||||
assert.NotEmpty(t, result.AnnotationsJSON["ai_summary"])
|
||||
|
||||
// 展示处理结果
|
||||
t.Log("\n=== 处理结果 ===")
|
||||
t.Logf("告警规则: %s", result.Event.RuleName)
|
||||
t.Logf("严重程度: %d", result.Event.Severity)
|
||||
t.Logf("标签: %v", result.Event.TagsMap)
|
||||
t.Logf("原始注释: %v", result.Event.AnnotationsJSON["description"])
|
||||
t.Logf("AI总结: %s", result.Event.AnnotationsJSON["ai_summary"])
|
||||
t.Logf("告警规则: %s", result.RuleName)
|
||||
t.Logf("严重程度: %d", result.Severity)
|
||||
t.Logf("标签: %v", result.TagsMap)
|
||||
t.Logf("原始注释: %v", result.AnnotationsJSON["description"])
|
||||
t.Logf("AI总结: %s", result.AnnotationsJSON["ai_summary"])
|
||||
}
|
||||
|
||||
func TestConvertCustomParam(t *testing.T) {
|
||||
|
||||
@@ -11,7 +11,6 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/common"
|
||||
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/utils"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
@@ -44,8 +43,7 @@ func (c *CallbackConfig) Init(settings interface{}) (models.Processor, error) {
|
||||
return result, err
|
||||
}
|
||||
|
||||
func (c *CallbackConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
|
||||
event := wfCtx.Event
|
||||
func (c *CallbackConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) (*models.AlertCurEvent, string, error) {
|
||||
if c.Client == nil {
|
||||
transport := &http.Transport{
|
||||
TLSClientConfig: &tls.Config{InsecureSkipVerify: c.SkipSSLVerify},
|
||||
@@ -54,7 +52,7 @@ func (c *CallbackConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext
|
||||
if c.Proxy != "" {
|
||||
proxyURL, err := url.Parse(c.Proxy)
|
||||
if err != nil {
|
||||
return wfCtx, "", fmt.Errorf("failed to parse proxy url: %v processor: %v", err, c)
|
||||
return event, "", fmt.Errorf("failed to parse proxy url: %v processor: %v", err, c)
|
||||
} else {
|
||||
transport.Proxy = http.ProxyURL(proxyURL)
|
||||
}
|
||||
@@ -72,19 +70,14 @@ func (c *CallbackConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext
|
||||
headers[k] = v
|
||||
}
|
||||
|
||||
url, err := utils.TplRender(wfCtx, c.URL)
|
||||
if err != nil {
|
||||
return wfCtx, "", fmt.Errorf("failed to render url template: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
body, err := json.Marshal(event)
|
||||
if err != nil {
|
||||
return wfCtx, "", fmt.Errorf("failed to marshal event: %v processor: %v", err, c)
|
||||
return event, "", fmt.Errorf("failed to marshal event: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("POST", url, strings.NewReader(string(body)))
|
||||
req, err := http.NewRequest("POST", c.URL, strings.NewReader(string(body)))
|
||||
if err != nil {
|
||||
return wfCtx, "", fmt.Errorf("failed to create request: %v processor: %v", err, c)
|
||||
return event, "", fmt.Errorf("failed to create request: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
for k, v := range headers {
|
||||
@@ -97,14 +90,14 @@ func (c *CallbackConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext
|
||||
|
||||
resp, err := c.Client.Do(req)
|
||||
if err != nil {
|
||||
return wfCtx, "", fmt.Errorf("failed to send request: %v processor: %v", err, c)
|
||||
return event, "", fmt.Errorf("failed to send request: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
b, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return wfCtx, "", fmt.Errorf("failed to read response body: %v processor: %v", err, c)
|
||||
return event, "", fmt.Errorf("failed to read response body: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
logger.Debugf("callback processor response body: %s", string(b))
|
||||
return wfCtx, "callback success", nil
|
||||
return event, "callback success", nil
|
||||
}
|
||||
|
||||
@@ -26,38 +26,35 @@ func (c *EventDropConfig) Init(settings interface{}) (models.Processor, error) {
|
||||
return result, err
|
||||
}
|
||||
|
||||
func (c *EventDropConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
|
||||
func (c *EventDropConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) (*models.AlertCurEvent, string, error) {
|
||||
// 使用背景是可以根据此处理器,实现对事件进行更加灵活的过滤的逻辑
|
||||
// 在标签过滤和属性过滤都不满足需求时可以使用
|
||||
// 如果模板执行结果为 true,则删除该事件
|
||||
event := wfCtx.Event
|
||||
|
||||
var defs = []string{
|
||||
"{{ $event := .Event }}",
|
||||
"{{ $labels := .Event.TagsMap }}",
|
||||
"{{ $value := .Event.TriggerValue }}",
|
||||
"{{ $inputs := .Inputs }}",
|
||||
"{{ $event := . }}",
|
||||
"{{ $labels := .TagsMap }}",
|
||||
"{{ $value := .TriggerValue }}",
|
||||
}
|
||||
|
||||
text := strings.Join(append(defs, c.Content), "")
|
||||
|
||||
tpl, err := texttemplate.New("eventdrop").Funcs(tplx.TemplateFuncMap).Parse(text)
|
||||
if err != nil {
|
||||
return wfCtx, "", fmt.Errorf("processor failed to parse template: %v processor: %v", err, c)
|
||||
return event, "", fmt.Errorf("processor failed to parse template: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
var body bytes.Buffer
|
||||
if err = tpl.Execute(&body, wfCtx); err != nil {
|
||||
return wfCtx, "", fmt.Errorf("processor failed to execute template: %v processor: %v", err, c)
|
||||
if err = tpl.Execute(&body, event); err != nil {
|
||||
return event, "", fmt.Errorf("processor failed to execute template: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
result := strings.TrimSpace(body.String())
|
||||
logger.Infof("processor eventdrop result: %v", result)
|
||||
if result == "true" {
|
||||
wfCtx.Event = nil
|
||||
logger.Infof("processor eventdrop drop event: %s", event.Hash)
|
||||
return wfCtx, "drop event success", nil
|
||||
logger.Infof("processor eventdrop drop event: %v", event)
|
||||
return nil, "drop event success", nil
|
||||
}
|
||||
|
||||
return wfCtx, "drop event failed", nil
|
||||
return event, "drop event failed", nil
|
||||
}
|
||||
|
||||
@@ -31,8 +31,7 @@ func (c *EventUpdateConfig) Init(settings interface{}) (models.Processor, error)
|
||||
return result, err
|
||||
}
|
||||
|
||||
func (c *EventUpdateConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
|
||||
event := wfCtx.Event
|
||||
func (c *EventUpdateConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) (*models.AlertCurEvent, string, error) {
|
||||
if c.Client == nil {
|
||||
transport := &http.Transport{
|
||||
TLSClientConfig: &tls.Config{InsecureSkipVerify: c.SkipSSLVerify},
|
||||
@@ -41,7 +40,7 @@ func (c *EventUpdateConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowCont
|
||||
if c.Proxy != "" {
|
||||
proxyURL, err := url.Parse(c.Proxy)
|
||||
if err != nil {
|
||||
return wfCtx, "", fmt.Errorf("failed to parse proxy url: %v processor: %v", err, c)
|
||||
return event, "", fmt.Errorf("failed to parse proxy url: %v processor: %v", err, c)
|
||||
} else {
|
||||
transport.Proxy = http.ProxyURL(proxyURL)
|
||||
}
|
||||
@@ -61,12 +60,12 @@ func (c *EventUpdateConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowCont
|
||||
|
||||
body, err := json.Marshal(event)
|
||||
if err != nil {
|
||||
return wfCtx, "", fmt.Errorf("failed to marshal event: %v processor: %v", err, c)
|
||||
return event, "", fmt.Errorf("failed to marshal event: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("POST", c.URL, strings.NewReader(string(body)))
|
||||
if err != nil {
|
||||
return wfCtx, "", fmt.Errorf("failed to create request: %v processor: %v", err, c)
|
||||
return event, "", fmt.Errorf("failed to create request: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
for k, v := range headers {
|
||||
@@ -79,7 +78,7 @@ func (c *EventUpdateConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowCont
|
||||
|
||||
resp, err := c.Client.Do(req)
|
||||
if err != nil {
|
||||
return wfCtx, "", fmt.Errorf("failed to send request: %v processor: %v", err, c)
|
||||
return event, "", fmt.Errorf("failed to send request: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
b, err := io.ReadAll(resp.Body)
|
||||
@@ -90,8 +89,8 @@ func (c *EventUpdateConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowCont
|
||||
|
||||
err = json.Unmarshal(b, &event)
|
||||
if err != nil {
|
||||
return wfCtx, "", fmt.Errorf("failed to unmarshal response body: %v processor: %v", err, c)
|
||||
return event, "", fmt.Errorf("failed to unmarshal response body: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
return wfCtx, "", nil
|
||||
return event, "", nil
|
||||
}
|
||||
|
||||
@@ -1,197 +0,0 @@
|
||||
package logic
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"strings"
|
||||
"text/template"
|
||||
|
||||
alertCommon "github.com/ccfos/nightingale/v6/alert/common"
|
||||
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/common"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/tplx"
|
||||
)
|
||||
|
||||
// 判断模式常量
|
||||
const (
|
||||
ConditionModeExpression = "expression" // 表达式模式(默认)
|
||||
ConditionModeTags = "tags" // 标签/属性模式
|
||||
)
|
||||
|
||||
// IfConfig If 条件处理器配置
|
||||
type IfConfig struct {
|
||||
// 判断模式:expression(表达式)或 tags(标签/属性)
|
||||
Mode string `json:"mode,omitempty"`
|
||||
|
||||
// 表达式模式配置
|
||||
// 条件表达式(支持 Go 模板语法)
|
||||
// 例如:{{ if eq .Severity 1 }}true{{ end }}
|
||||
Condition string `json:"condition,omitempty"`
|
||||
|
||||
// 标签/属性模式配置
|
||||
LabelKeys []models.TagFilter `json:"label_keys,omitempty"` // 适用标签
|
||||
Attributes []models.TagFilter `json:"attributes,omitempty"` // 适用属性
|
||||
|
||||
// 内部使用,解析后的过滤器
|
||||
parsedLabelKeys []models.TagFilter `json:"-"`
|
||||
parsedAttributes []models.TagFilter `json:"-"`
|
||||
}
|
||||
|
||||
func init() {
|
||||
models.RegisterProcessor("logic.if", &IfConfig{})
|
||||
}
|
||||
|
||||
func (c *IfConfig) Init(settings interface{}) (models.Processor, error) {
|
||||
result, err := common.InitProcessor[*IfConfig](settings)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// 解析标签过滤器
|
||||
if len(result.LabelKeys) > 0 {
|
||||
// Deep copy to avoid concurrent map writes on cached objects
|
||||
labelKeysCopy := make([]models.TagFilter, len(result.LabelKeys))
|
||||
copy(labelKeysCopy, result.LabelKeys)
|
||||
for i := range labelKeysCopy {
|
||||
if labelKeysCopy[i].Func == "" {
|
||||
labelKeysCopy[i].Func = labelKeysCopy[i].Op
|
||||
}
|
||||
}
|
||||
result.parsedLabelKeys, err = models.ParseTagFilter(labelKeysCopy)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse label_keys: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// 解析属性过滤器
|
||||
if len(result.Attributes) > 0 {
|
||||
// Deep copy to avoid concurrent map writes on cached objects
|
||||
attributesCopy := make([]models.TagFilter, len(result.Attributes))
|
||||
copy(attributesCopy, result.Attributes)
|
||||
for i := range attributesCopy {
|
||||
if attributesCopy[i].Func == "" {
|
||||
attributesCopy[i].Func = attributesCopy[i].Op
|
||||
}
|
||||
}
|
||||
result.parsedAttributes, err = models.ParseTagFilter(attributesCopy)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse attributes: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// Process 实现 Processor 接口(兼容旧模式)
|
||||
func (c *IfConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
|
||||
result, err := c.evaluateCondition(wfCtx)
|
||||
if err != nil {
|
||||
return wfCtx, "", fmt.Errorf("if processor: failed to evaluate condition: %v", err)
|
||||
}
|
||||
|
||||
if result {
|
||||
return wfCtx, "condition matched (true branch)", nil
|
||||
}
|
||||
return wfCtx, "condition not matched (false branch)", nil
|
||||
}
|
||||
|
||||
// ProcessWithBranch 实现 BranchProcessor 接口
|
||||
func (c *IfConfig) ProcessWithBranch(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.NodeOutput, error) {
|
||||
result, err := c.evaluateCondition(wfCtx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("if processor: failed to evaluate condition: %v", err)
|
||||
}
|
||||
|
||||
output := &models.NodeOutput{
|
||||
WfCtx: wfCtx,
|
||||
}
|
||||
|
||||
if result {
|
||||
// 条件为 true,走输出 0(true 分支)
|
||||
branchIndex := 0
|
||||
output.BranchIndex = &branchIndex
|
||||
output.Message = "condition matched (true branch)"
|
||||
} else {
|
||||
// 条件为 false,走输出 1(false 分支)
|
||||
branchIndex := 1
|
||||
output.BranchIndex = &branchIndex
|
||||
output.Message = "condition not matched (false branch)"
|
||||
}
|
||||
|
||||
return output, nil
|
||||
}
|
||||
|
||||
// evaluateCondition 评估条件
|
||||
func (c *IfConfig) evaluateCondition(wfCtx *models.WorkflowContext) (bool, error) {
|
||||
mode := c.Mode
|
||||
if mode == "" {
|
||||
mode = ConditionModeExpression // 默认表达式模式
|
||||
}
|
||||
|
||||
switch mode {
|
||||
case ConditionModeTags:
|
||||
return c.evaluateTagsCondition(wfCtx.Event)
|
||||
default:
|
||||
return c.evaluateExpressionCondition(wfCtx)
|
||||
}
|
||||
}
|
||||
|
||||
// evaluateExpressionCondition 评估表达式条件
|
||||
func (c *IfConfig) evaluateExpressionCondition(wfCtx *models.WorkflowContext) (bool, error) {
|
||||
if c.Condition == "" {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// 构建模板数据
|
||||
var defs = []string{
|
||||
"{{ $event := .Event }}",
|
||||
"{{ $labels := .Event.TagsMap }}",
|
||||
"{{ $value := .Event.TriggerValue }}",
|
||||
"{{ $inputs := .Inputs }}",
|
||||
}
|
||||
|
||||
text := strings.Join(append(defs, c.Condition), "")
|
||||
|
||||
tpl, err := template.New("if_condition").Funcs(tplx.TemplateFuncMap).Parse(text)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
if err = tpl.Execute(&buf, wfCtx); err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
result := strings.TrimSpace(strings.ToLower(buf.String()))
|
||||
return result == "true" || result == "1", nil
|
||||
}
|
||||
|
||||
// evaluateTagsCondition 评估标签/属性条件
|
||||
func (c *IfConfig) evaluateTagsCondition(event *models.AlertCurEvent) (bool, error) {
|
||||
// 如果没有配置任何过滤条件,默认返回 true
|
||||
if len(c.parsedLabelKeys) == 0 && len(c.parsedAttributes) == 0 {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// 匹配标签 (TagsMap)
|
||||
if len(c.parsedLabelKeys) > 0 {
|
||||
tagsMap := event.TagsMap
|
||||
if tagsMap == nil {
|
||||
tagsMap = make(map[string]string)
|
||||
}
|
||||
if !alertCommon.MatchTags(tagsMap, c.parsedLabelKeys) {
|
||||
return false, nil
|
||||
}
|
||||
}
|
||||
|
||||
// 匹配属性 (JsonTagsAndValue - 所有 JSON 字段)
|
||||
if len(c.parsedAttributes) > 0 {
|
||||
attributesMap := event.JsonTagsAndValue()
|
||||
if !alertCommon.MatchTags(attributesMap, c.parsedAttributes) {
|
||||
return false, nil
|
||||
}
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}
|
||||
@@ -1,224 +0,0 @@
|
||||
package logic
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"strings"
|
||||
"text/template"
|
||||
|
||||
alertCommon "github.com/ccfos/nightingale/v6/alert/common"
|
||||
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/common"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/tplx"
|
||||
)
|
||||
|
||||
// SwitchCase Switch 分支定义
|
||||
type SwitchCase struct {
|
||||
// 判断模式:expression(表达式)或 tags(标签/属性)
|
||||
Mode string `json:"mode,omitempty"`
|
||||
|
||||
// 表达式模式配置
|
||||
// 条件表达式(支持 Go 模板语法)
|
||||
Condition string `json:"condition,omitempty"`
|
||||
|
||||
// 标签/属性模式配置
|
||||
LabelKeys []models.TagFilter `json:"label_keys,omitempty"` // 适用标签
|
||||
Attributes []models.TagFilter `json:"attributes,omitempty"` // 适用属性
|
||||
|
||||
// 分支名称(可选,用于日志)
|
||||
Name string `json:"name,omitempty"`
|
||||
|
||||
// 内部使用,解析后的过滤器
|
||||
parsedLabelKeys []models.TagFilter `json:"-"`
|
||||
parsedAttributes []models.TagFilter `json:"-"`
|
||||
}
|
||||
|
||||
// SwitchConfig Switch 多分支处理器配置
|
||||
type SwitchConfig struct {
|
||||
// 分支条件列表
|
||||
// 按顺序匹配,第一个为 true 的分支将被选中
|
||||
Cases []SwitchCase `json:"cases"`
|
||||
// 是否允许多个分支同时匹配(默认 false,只走第一个匹配的)
|
||||
AllowMultiple bool `json:"allow_multiple,omitempty"`
|
||||
}
|
||||
|
||||
func init() {
|
||||
models.RegisterProcessor("logic.switch", &SwitchConfig{})
|
||||
}
|
||||
|
||||
func (c *SwitchConfig) Init(settings interface{}) (models.Processor, error) {
|
||||
result, err := common.InitProcessor[*SwitchConfig](settings)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// 解析每个 case 的标签和属性过滤器
|
||||
for i := range result.Cases {
|
||||
if len(result.Cases[i].LabelKeys) > 0 {
|
||||
// Deep copy to avoid concurrent map writes on cached objects
|
||||
labelKeysCopy := make([]models.TagFilter, len(result.Cases[i].LabelKeys))
|
||||
copy(labelKeysCopy, result.Cases[i].LabelKeys)
|
||||
for j := range labelKeysCopy {
|
||||
if labelKeysCopy[j].Func == "" {
|
||||
labelKeysCopy[j].Func = labelKeysCopy[j].Op
|
||||
}
|
||||
}
|
||||
result.Cases[i].parsedLabelKeys, err = models.ParseTagFilter(labelKeysCopy)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse label_keys for case[%d]: %v", i, err)
|
||||
}
|
||||
}
|
||||
|
||||
if len(result.Cases[i].Attributes) > 0 {
|
||||
// Deep copy to avoid concurrent map writes on cached objects
|
||||
attributesCopy := make([]models.TagFilter, len(result.Cases[i].Attributes))
|
||||
copy(attributesCopy, result.Cases[i].Attributes)
|
||||
for j := range attributesCopy {
|
||||
if attributesCopy[j].Func == "" {
|
||||
attributesCopy[j].Func = attributesCopy[j].Op
|
||||
}
|
||||
}
|
||||
result.Cases[i].parsedAttributes, err = models.ParseTagFilter(attributesCopy)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse attributes for case[%d]: %v", i, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// Process 实现 Processor 接口(兼容旧模式)
|
||||
func (c *SwitchConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
|
||||
index, caseName, err := c.evaluateCases(wfCtx)
|
||||
if err != nil {
|
||||
return wfCtx, "", fmt.Errorf("switch processor: failed to evaluate cases: %v", err)
|
||||
}
|
||||
|
||||
if index >= 0 {
|
||||
if caseName != "" {
|
||||
return wfCtx, fmt.Sprintf("matched case[%d]: %s", index, caseName), nil
|
||||
}
|
||||
return wfCtx, fmt.Sprintf("matched case[%d]", index), nil
|
||||
}
|
||||
|
||||
// 走默认分支(最后一个输出)
|
||||
return wfCtx, "no case matched, using default branch", nil
|
||||
}
|
||||
|
||||
// ProcessWithBranch 实现 BranchProcessor 接口
|
||||
func (c *SwitchConfig) ProcessWithBranch(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.NodeOutput, error) {
|
||||
index, caseName, err := c.evaluateCases(wfCtx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("switch processor: failed to evaluate cases: %v", err)
|
||||
}
|
||||
|
||||
output := &models.NodeOutput{
|
||||
WfCtx: wfCtx,
|
||||
}
|
||||
|
||||
if index >= 0 {
|
||||
output.BranchIndex = &index
|
||||
if caseName != "" {
|
||||
output.Message = fmt.Sprintf("matched case[%d]: %s", index, caseName)
|
||||
} else {
|
||||
output.Message = fmt.Sprintf("matched case[%d]", index)
|
||||
}
|
||||
} else {
|
||||
// 默认分支的索引是 cases 数量(即最后一个输出端口)
|
||||
defaultIndex := len(c.Cases)
|
||||
output.BranchIndex = &defaultIndex
|
||||
output.Message = "no case matched, using default branch"
|
||||
}
|
||||
|
||||
return output, nil
|
||||
}
|
||||
|
||||
// evaluateCases 评估所有分支条件
|
||||
// 返回匹配的分支索引和分支名称,如果没有匹配返回 -1
|
||||
func (c *SwitchConfig) evaluateCases(wfCtx *models.WorkflowContext) (int, string, error) {
|
||||
for i := range c.Cases {
|
||||
matched, err := c.evaluateCaseCondition(&c.Cases[i], wfCtx)
|
||||
if err != nil {
|
||||
return -1, "", fmt.Errorf("case[%d] evaluation error: %v", i, err)
|
||||
}
|
||||
if matched {
|
||||
return i, c.Cases[i].Name, nil
|
||||
}
|
||||
}
|
||||
return -1, "", nil
|
||||
}
|
||||
|
||||
// evaluateCaseCondition 评估单个分支条件
|
||||
func (c *SwitchConfig) evaluateCaseCondition(caseItem *SwitchCase, wfCtx *models.WorkflowContext) (bool, error) {
|
||||
mode := caseItem.Mode
|
||||
if mode == "" {
|
||||
mode = ConditionModeExpression // 默认表达式模式
|
||||
}
|
||||
|
||||
switch mode {
|
||||
case ConditionModeTags:
|
||||
return c.evaluateTagsCondition(caseItem, wfCtx.Event)
|
||||
default:
|
||||
return c.evaluateExpressionCondition(caseItem.Condition, wfCtx)
|
||||
}
|
||||
}
|
||||
|
||||
// evaluateExpressionCondition 评估表达式条件
|
||||
func (c *SwitchConfig) evaluateExpressionCondition(condition string, wfCtx *models.WorkflowContext) (bool, error) {
|
||||
if condition == "" {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
var defs = []string{
|
||||
"{{ $event := .Event }}",
|
||||
"{{ $labels := .Event.TagsMap }}",
|
||||
"{{ $value := .Event.TriggerValue }}",
|
||||
"{{ $inputs := .Inputs }}",
|
||||
}
|
||||
|
||||
text := strings.Join(append(defs, condition), "")
|
||||
|
||||
tpl, err := template.New("switch_condition").Funcs(tplx.TemplateFuncMap).Parse(text)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
if err = tpl.Execute(&buf, wfCtx); err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
result := strings.TrimSpace(strings.ToLower(buf.String()))
|
||||
return result == "true" || result == "1", nil
|
||||
}
|
||||
|
||||
// evaluateTagsCondition 评估标签/属性条件
|
||||
func (c *SwitchConfig) evaluateTagsCondition(caseItem *SwitchCase, event *models.AlertCurEvent) (bool, error) {
|
||||
// 如果没有配置任何过滤条件,默认返回 false(不匹配)
|
||||
if len(caseItem.parsedLabelKeys) == 0 && len(caseItem.parsedAttributes) == 0 {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// 匹配标签 (TagsMap)
|
||||
if len(caseItem.parsedLabelKeys) > 0 {
|
||||
tagsMap := event.TagsMap
|
||||
if tagsMap == nil {
|
||||
tagsMap = make(map[string]string)
|
||||
}
|
||||
if !alertCommon.MatchTags(tagsMap, caseItem.parsedLabelKeys) {
|
||||
return false, nil
|
||||
}
|
||||
}
|
||||
|
||||
// 匹配属性 (JsonTagsAndValue - 所有 JSON 字段)
|
||||
if len(caseItem.parsedAttributes) > 0 {
|
||||
attributesMap := event.JsonTagsAndValue()
|
||||
if !alertCommon.MatchTags(attributesMap, caseItem.parsedAttributes) {
|
||||
return false, nil
|
||||
}
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}
|
||||
@@ -42,7 +42,7 @@ func (r *RelabelConfig) Init(settings interface{}) (models.Processor, error) {
|
||||
return result, err
|
||||
}
|
||||
|
||||
func (r *RelabelConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
|
||||
func (r *RelabelConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) (*models.AlertCurEvent, string, error) {
|
||||
sourceLabels := make([]model.LabelName, len(r.SourceLabels))
|
||||
for i := range r.SourceLabels {
|
||||
sourceLabels[i] = model.LabelName(strings.ReplaceAll(r.SourceLabels[i], ".", REPLACE_DOT))
|
||||
@@ -63,8 +63,8 @@ func (r *RelabelConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext)
|
||||
},
|
||||
}
|
||||
|
||||
EventRelabel(wfCtx.Event, relabelConfigs)
|
||||
return wfCtx, "", nil
|
||||
EventRelabel(event, relabelConfigs)
|
||||
return event, "", nil
|
||||
}
|
||||
|
||||
func EventRelabel(event *models.AlertCurEvent, relabelConfigs []*pconf.RelabelConfig) {
|
||||
|
||||
@@ -1,32 +0,0 @@
|
||||
package utils
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"strings"
|
||||
"text/template"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/tplx"
|
||||
)
|
||||
|
||||
func TplRender(wfCtx *models.WorkflowContext, content string) (string, error) {
|
||||
var defs = []string{
|
||||
"{{ $event := .Event }}",
|
||||
"{{ $labels := .Event.TagsMap }}",
|
||||
"{{ $value := .Event.TriggerValue }}",
|
||||
"{{ $inputs := .Inputs }}",
|
||||
}
|
||||
text := strings.Join(append(defs, content), "")
|
||||
tpl, err := template.New("tpl").Funcs(tplx.TemplateFuncMap).Parse(text)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to parse template: %v", err)
|
||||
}
|
||||
|
||||
var body bytes.Buffer
|
||||
if err = tpl.Execute(&body, wfCtx); err != nil {
|
||||
return "", fmt.Errorf("failed to execute template: %v", err)
|
||||
}
|
||||
|
||||
return strings.TrimSpace(body.String()), nil
|
||||
}
|
||||
@@ -131,7 +131,7 @@ func (p *Processor) Handle(anomalyPoints []models.AnomalyPoint, from string, inh
|
||||
p.inhibit = inhibit
|
||||
cachedRule := p.alertRuleCache.Get(p.rule.Id)
|
||||
if cachedRule == nil {
|
||||
logger.Warningf("alert_eval_%d datasource_%d handle error: rule not found, maybe rule has been deleted, anomalyPoints:%+v", p.rule.Id, p.datasourceId, anomalyPoints)
|
||||
logger.Warningf("process handle error: rule not found %+v rule_id:%d maybe rule has been deleted", anomalyPoints, p.rule.Id)
|
||||
p.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", p.DatasourceId()), "handle_event", p.BusiGroupCache.GetNameByBusiGroupId(p.rule.GroupId), fmt.Sprintf("%v", p.rule.Id)).Inc()
|
||||
return
|
||||
}
|
||||
@@ -156,14 +156,14 @@ func (p *Processor) Handle(anomalyPoints []models.AnomalyPoint, from string, inh
|
||||
eventCopy := event.DeepCopy()
|
||||
event = dispatch.HandleEventPipeline(cachedRule.PipelineConfigs, eventCopy, event, dispatch.EventProcessorCache, p.ctx, cachedRule.Id, "alert_rule")
|
||||
if event == nil {
|
||||
logger.Infof("alert_eval_%d datasource_%d is muted drop by pipeline event:%s", p.rule.Id, p.datasourceId, eventCopy.Hash)
|
||||
logger.Infof("rule_eval:%s is muted drop by pipeline event:%v", p.Key(), eventCopy)
|
||||
continue
|
||||
}
|
||||
|
||||
// event mute
|
||||
isMuted, detail, muteId := mute.IsMuted(cachedRule, event, p.TargetCache, p.alertMuteCache)
|
||||
if isMuted {
|
||||
logger.Infof("alert_eval_%d datasource_%d is muted, detail:%s event:%s", p.rule.Id, p.datasourceId, detail, event.Hash)
|
||||
logger.Infof("rule_eval:%s is muted, detail:%s event:%v", p.Key(), detail, event)
|
||||
p.Stats.CounterMuteTotal.WithLabelValues(
|
||||
fmt.Sprintf("%v", event.GroupName),
|
||||
fmt.Sprintf("%v", p.rule.Id),
|
||||
@@ -174,7 +174,7 @@ func (p *Processor) Handle(anomalyPoints []models.AnomalyPoint, from string, inh
|
||||
}
|
||||
|
||||
if dispatch.EventMuteHook(event) {
|
||||
logger.Infof("alert_eval_%d datasource_%d is muted by hook event:%s", p.rule.Id, p.datasourceId, event.Hash)
|
||||
logger.Infof("rule_eval:%s is muted by hook event:%v", p.Key(), event)
|
||||
p.Stats.CounterMuteTotal.WithLabelValues(
|
||||
fmt.Sprintf("%v", event.GroupName),
|
||||
fmt.Sprintf("%v", p.rule.Id),
|
||||
@@ -247,7 +247,7 @@ func (p *Processor) BuildEvent(anomalyPoint models.AnomalyPoint, from string, no
|
||||
|
||||
if err := json.Unmarshal([]byte(p.rule.Annotations), &event.AnnotationsJSON); err != nil {
|
||||
event.AnnotationsJSON = make(map[string]string) // 解析失败时使用空 map
|
||||
logger.Warningf("alert_eval_%d datasource_%d unmarshal annotations json failed: %v", p.rule.Id, p.datasourceId, err)
|
||||
logger.Warningf("unmarshal annotations json failed: %v, rule: %d", err, p.rule.Id)
|
||||
}
|
||||
|
||||
if event.TriggerValues != "" && strings.Count(event.TriggerValues, "$") > 1 {
|
||||
@@ -272,7 +272,7 @@ func (p *Processor) BuildEvent(anomalyPoint models.AnomalyPoint, from string, no
|
||||
pt.GroupNames = p.BusiGroupCache.GetNamesByBusiGroupIds(pt.GroupIds)
|
||||
event.Target = pt
|
||||
} else {
|
||||
logger.Infof("alert_eval_%d datasource_%d fill event target error, ident: %s doesn't exist in cache.", p.rule.Id, p.datasourceId, event.TargetIdent)
|
||||
logger.Infof("fill event target error, ident: %s doesn't exist in cache.", event.TargetIdent)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -371,19 +371,19 @@ func (p *Processor) RecoverSingle(byRecover bool, hash string, now int64, value
|
||||
lastPendingEvent, has := p.pendingsUseByRecover.Get(hash)
|
||||
if !has {
|
||||
// 说明没有产生过异常点,就不需要恢复了
|
||||
logger.Debugf("alert_eval_%d datasource_%d event:%s do not has pending event, not recover", p.rule.Id, p.datasourceId, event.Hash)
|
||||
logger.Debugf("rule_eval:%s event:%v do not has pending event, not recover", p.Key(), event)
|
||||
return
|
||||
}
|
||||
|
||||
if now-lastPendingEvent.LastEvalTime < cachedRule.RecoverDuration {
|
||||
logger.Debugf("alert_eval_%d datasource_%d event:%s not recover", p.rule.Id, p.datasourceId, event.Hash)
|
||||
logger.Debugf("rule_eval:%s event:%v not recover", p.Key(), event)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// 如果设置了恢复条件,则不能在此处恢复,必须依靠 recoverPoint 来恢复
|
||||
if event.RecoverConfig.JudgeType != models.Origin && !byRecover {
|
||||
logger.Debugf("alert_eval_%d datasource_%d event:%s not recover", p.rule.Id, p.datasourceId, event.Hash)
|
||||
logger.Debugf("rule_eval:%s event:%v not recover", p.Key(), event)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -460,7 +460,7 @@ func (p *Processor) handleEvent(events []*models.AlertCurEvent) {
|
||||
func (p *Processor) inhibitEvent(events []*models.AlertCurEvent, highSeverity int) {
|
||||
for _, event := range events {
|
||||
if p.inhibit && event.Severity > highSeverity {
|
||||
logger.Debugf("alert_eval_%d datasource_%d event:%s inhibit highSeverity:%d", p.rule.Id, p.datasourceId, event.Hash, highSeverity)
|
||||
logger.Debugf("rule_eval:%s event:%+v inhibit highSeverity:%d", p.Key(), event, highSeverity)
|
||||
continue
|
||||
}
|
||||
p.fireEvent(event)
|
||||
@@ -476,7 +476,7 @@ func (p *Processor) fireEvent(event *models.AlertCurEvent) {
|
||||
|
||||
message := "unknown"
|
||||
defer func() {
|
||||
logger.Infof("alert_eval_%d datasource_%d event-hash-%s %s", p.rule.Id, p.datasourceId, event.Hash, message)
|
||||
logger.Infof("rule_eval:%s event-hash-%s %s", p.Key(), event.Hash, message)
|
||||
}()
|
||||
|
||||
if fired, has := p.fires.Get(event.Hash); has {
|
||||
@@ -527,7 +527,7 @@ func (p *Processor) pushEventToQueue(e *models.AlertCurEvent) {
|
||||
|
||||
dispatch.LogEvent(e, "push_queue")
|
||||
if !queue.EventQueue.PushFront(e) {
|
||||
logger.Warningf("alert_eval_%d datasource_%d event_push_queue: queue is full, event:%s", p.rule.Id, p.datasourceId, e.Hash)
|
||||
logger.Warningf("event_push_queue: queue is full, event:%+v", e)
|
||||
p.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", p.DatasourceId()), "push_event_queue", p.BusiGroupCache.GetNameByBusiGroupId(p.rule.GroupId), fmt.Sprintf("%v", p.rule.Id)).Inc()
|
||||
}
|
||||
}
|
||||
@@ -538,7 +538,7 @@ func (p *Processor) RecoverAlertCurEventFromDb() {
|
||||
|
||||
curEvents, err := models.AlertCurEventGetByRuleIdAndDsId(p.ctx, p.rule.Id, p.datasourceId)
|
||||
if err != nil {
|
||||
logger.Errorf("alert_eval_%d datasource_%d recover event from db failed, err:%s", p.rule.Id, p.datasourceId, err)
|
||||
logger.Errorf("recover event from db for rule:%s failed, err:%s", p.Key(), err)
|
||||
p.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", p.DatasourceId()), "get_recover_event", p.BusiGroupCache.GetNameByBusiGroupId(p.rule.GroupId), fmt.Sprintf("%v", p.rule.Id)).Inc()
|
||||
p.fires = NewAlertCurEventMap(nil)
|
||||
return
|
||||
|
||||
@@ -22,11 +22,10 @@ type Router struct {
|
||||
AlertStats *astats.Stats
|
||||
Ctx *ctx.Context
|
||||
ExternalProcessors *process.ExternalProcessorsType
|
||||
LogDir string
|
||||
}
|
||||
|
||||
func New(httpConfig httpx.Config, alert aconf.Alert, amc *memsto.AlertMuteCacheType, tc *memsto.TargetCacheType, bgc *memsto.BusiGroupCacheType,
|
||||
astats *astats.Stats, ctx *ctx.Context, externalProcessors *process.ExternalProcessorsType, logDir string) *Router {
|
||||
astats *astats.Stats, ctx *ctx.Context, externalProcessors *process.ExternalProcessorsType) *Router {
|
||||
return &Router{
|
||||
HTTP: httpConfig,
|
||||
Alert: alert,
|
||||
@@ -36,7 +35,6 @@ func New(httpConfig httpx.Config, alert aconf.Alert, amc *memsto.AlertMuteCacheT
|
||||
AlertStats: astats,
|
||||
Ctx: ctx,
|
||||
ExternalProcessors: externalProcessors,
|
||||
LogDir: logDir,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -52,9 +50,6 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
service.POST("/event", rt.pushEventToQueue)
|
||||
service.POST("/event-persist", rt.eventPersist)
|
||||
service.POST("/make-event", rt.makeEvent)
|
||||
service.GET("/event-detail/:hash", rt.eventDetail)
|
||||
service.GET("/alert-eval-detail/:id", rt.alertEvalDetail)
|
||||
service.GET("/trace-logs/:traceid", rt.traceLogs)
|
||||
}
|
||||
|
||||
func Render(c *gin.Context, data, msg interface{}) {
|
||||
|
||||
@@ -1,28 +0,0 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/pkg/loggrep"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
func (rt *Router) alertEvalDetail(c *gin.Context) {
|
||||
id := ginx.UrlParamStr(c, "id")
|
||||
if !loggrep.IsValidRuleID(id) {
|
||||
ginx.Bomb(200, "invalid rule id format")
|
||||
}
|
||||
|
||||
instance := fmt.Sprintf("%s:%d", rt.Alert.Heartbeat.IP, rt.HTTP.Port)
|
||||
|
||||
keyword := fmt.Sprintf("alert_eval_%s", id)
|
||||
logs, err := loggrep.GrepLogDir(rt.LogDir, keyword)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(loggrep.EventDetailResp{
|
||||
Logs: logs,
|
||||
Instance: instance,
|
||||
}, nil)
|
||||
}
|
||||
@@ -13,9 +13,9 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/alert/queue"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/poster"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
@@ -75,7 +75,7 @@ func (rt *Router) pushEventToQueue(c *gin.Context) {
|
||||
|
||||
dispatch.LogEvent(event, "http_push_queue")
|
||||
if !queue.EventQueue.PushFront(event) {
|
||||
msg := fmt.Sprintf("event:%s push_queue err: queue is full", event.Hash)
|
||||
msg := fmt.Sprintf("event:%+v push_queue err: queue is full", event)
|
||||
ginx.Bomb(200, msg)
|
||||
logger.Warningf(msg)
|
||||
}
|
||||
@@ -105,21 +105,21 @@ func (rt *Router) makeEvent(c *gin.Context) {
|
||||
for i := 0; i < len(events); i++ {
|
||||
node, err := naming.DatasourceHashRing.GetNode(strconv.FormatInt(events[i].DatasourceId, 10), fmt.Sprintf("%d", events[i].RuleId))
|
||||
if err != nil {
|
||||
logger.Warningf("event(rule_id=%d ds_id=%d) get node err:%v", events[i].RuleId, events[i].DatasourceId, err)
|
||||
logger.Warningf("event:%+v get node err:%v", events[i], err)
|
||||
ginx.Bomb(200, "event node not exists")
|
||||
}
|
||||
|
||||
if node != rt.Alert.Heartbeat.Endpoint {
|
||||
err := forwardEvent(events[i], node)
|
||||
if err != nil {
|
||||
logger.Warningf("event(rule_id=%d ds_id=%d) forward err:%v", events[i].RuleId, events[i].DatasourceId, err)
|
||||
logger.Warningf("event:%+v forward err:%v", events[i], err)
|
||||
ginx.Bomb(200, "event forward error")
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
ruleWorker, exists := rt.ExternalProcessors.GetExternalAlertRule(events[i].DatasourceId, events[i].RuleId)
|
||||
logger.Debugf("handle event(rule_id=%d ds_id=%d) exists:%v", events[i].RuleId, events[i].DatasourceId, exists)
|
||||
logger.Debugf("handle event:%+v exists:%v", events[i], exists)
|
||||
if !exists {
|
||||
ginx.Bomb(200, "rule not exists")
|
||||
}
|
||||
@@ -143,6 +143,6 @@ func forwardEvent(event *eventForm, instance string) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
logger.Infof("forward event: result=succ url=%s code=%d rule_id=%d response=%s", ur, code, event.RuleId, string(res))
|
||||
logger.Infof("forward event: result=succ url=%s code=%d event:%v response=%s", ur, code, event, string(res))
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -1,27 +0,0 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/pkg/loggrep"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
func (rt *Router) eventDetail(c *gin.Context) {
|
||||
hash := ginx.UrlParamStr(c, "hash")
|
||||
if !loggrep.IsValidHash(hash) {
|
||||
ginx.Bomb(200, "invalid hash format")
|
||||
}
|
||||
|
||||
instance := fmt.Sprintf("%s:%d", rt.Alert.Heartbeat.IP, rt.HTTP.Port)
|
||||
|
||||
logs, err := loggrep.GrepLogDir(rt.LogDir, hash)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(loggrep.EventDetailResp{
|
||||
Logs: logs,
|
||||
Instance: instance,
|
||||
}, nil)
|
||||
}
|
||||
@@ -1,28 +0,0 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/loggrep"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
func (rt *Router) traceLogs(c *gin.Context) {
|
||||
traceId := ginx.UrlParamStr(c, "traceid")
|
||||
if !loggrep.IsValidTraceID(traceId) {
|
||||
ginx.Bomb(200, "invalid trace id format")
|
||||
}
|
||||
|
||||
instance := fmt.Sprintf("%s:%d", rt.Alert.Heartbeat.IP, rt.HTTP.Port)
|
||||
|
||||
keyword := "trace_id=" + traceId
|
||||
logs, err := loggrep.GrepLatestLogFiles(rt.LogDir, keyword)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(loggrep.EventDetailResp{
|
||||
Logs: logs,
|
||||
Instance: instance,
|
||||
}, nil)
|
||||
}
|
||||
@@ -135,7 +135,9 @@ func (c *DefaultCallBacker) CallBack(ctx CallBackContext) {
|
||||
|
||||
func doSendAndRecord(ctx *ctx.Context, url, token string, body interface{}, channel string,
|
||||
stats *astats.Stats, events []*models.AlertCurEvent) {
|
||||
start := time.Now()
|
||||
res, err := doSend(url, body, channel, stats)
|
||||
res = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), res)
|
||||
NotifyRecord(ctx, events, 0, channel, token, res, err)
|
||||
}
|
||||
|
||||
@@ -169,11 +171,11 @@ func doSend(url string, body interface{}, channel string, stats *astats.Stats) (
|
||||
|
||||
start := time.Now()
|
||||
res, code, err := poster.PostJSON(url, time.Second*5, body, 3)
|
||||
res = []byte(fmt.Sprintf("duration: %d ms status_code:%d, response:%s", time.Since(start).Milliseconds(), code, string(res)))
|
||||
res = []byte(fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), res))
|
||||
if err != nil {
|
||||
logger.Errorf("%s_sender: result=fail url=%s code=%d error=%v req:%v response=%s", channel, url, code, err, body, string(res))
|
||||
stats.AlertNotifyErrorTotal.WithLabelValues(channel).Inc()
|
||||
return string(res), err
|
||||
return "", err
|
||||
}
|
||||
|
||||
logger.Infof("%s_sender: result=succ url=%s code=%d req:%v response=%s", channel, url, code, body, string(res))
|
||||
@@ -205,6 +207,6 @@ func PushCallbackEvent(ctx *ctx.Context, webhook *models.Webhook, event *models.
|
||||
|
||||
succ := queue.eventQueue.Push(event)
|
||||
if !succ {
|
||||
logger.Warningf("Write channel(%s) full, current channel size: %d event:%s", webhook.Url, queue.eventQueue.Len(), event.Hash)
|
||||
logger.Warningf("Write channel(%s) full, current channel size: %d event:%v", webhook.Url, queue.eventQueue.Len(), event)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -30,14 +30,14 @@ type IbexCallBacker struct {
|
||||
|
||||
func (c *IbexCallBacker) CallBack(ctx CallBackContext) {
|
||||
if len(ctx.CallBackURL) == 0 || len(ctx.Events) == 0 {
|
||||
logger.Warningf("event_callback_ibex: url or events is empty, url: %s", ctx.CallBackURL)
|
||||
logger.Warningf("event_callback_ibex: url or events is empty, url: %s, events: %+v", ctx.CallBackURL, ctx.Events)
|
||||
return
|
||||
}
|
||||
|
||||
event := ctx.Events[0]
|
||||
|
||||
if event.IsRecovered {
|
||||
logger.Infof("event_callback_ibex: event is recovered, event: %s", event.Hash)
|
||||
logger.Infof("event_callback_ibex: event is recovered, event: %+v", event)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -45,9 +45,9 @@ func (c *IbexCallBacker) CallBack(ctx CallBackContext) {
|
||||
}
|
||||
|
||||
func (c *IbexCallBacker) handleIbex(ctx *ctx.Context, url string, event *models.AlertCurEvent) {
|
||||
logger.Infof("event_callback_ibex: url: %s, event: %s", url, event.Hash)
|
||||
logger.Infof("event_callback_ibex: url: %s, event: %+v", url, event)
|
||||
if imodels.DB() == nil && ctx.IsCenter {
|
||||
logger.Warningf("event_callback_ibex: db is nil, event: %s", event.Hash)
|
||||
logger.Warningf("event_callback_ibex: db is nil, event: %+v", event)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -66,7 +66,7 @@ func (c *IbexCallBacker) handleIbex(ctx *ctx.Context, url string, event *models.
|
||||
|
||||
id, err := strconv.ParseInt(idstr, 10, 64)
|
||||
if err != nil {
|
||||
logger.Errorf("event_callback_ibex: failed to parse url: %s event: %s", url, event.Hash)
|
||||
logger.Errorf("event_callback_ibex: failed to parse url: %s event: %+v", url, event)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -82,37 +82,34 @@ func (c *IbexCallBacker) handleIbex(ctx *ctx.Context, url string, event *models.
|
||||
}
|
||||
|
||||
if host == "" {
|
||||
logger.Errorf("event_callback_ibex: failed to get host, id: %d, event: %s", id, event.Hash)
|
||||
logger.Errorf("event_callback_ibex: failed to get host, id: %d, event: %+v", id, event)
|
||||
return
|
||||
}
|
||||
|
||||
CallIbex(ctx, id, host, c.taskTplCache, c.targetCache, c.userCache, event, "")
|
||||
CallIbex(ctx, id, host, c.taskTplCache, c.targetCache, c.userCache, event)
|
||||
}
|
||||
|
||||
func CallIbex(ctx *ctx.Context, id int64, host string,
|
||||
taskTplCache *memsto.TaskTplCache, targetCache *memsto.TargetCacheType,
|
||||
userCache *memsto.UserCacheType, event *models.AlertCurEvent, args string) (int64, error) {
|
||||
logger.Infof("event_callback_ibex: id: %d, host: %s, args: %s, event: %s", id, host, args, event.Hash)
|
||||
userCache *memsto.UserCacheType, event *models.AlertCurEvent) {
|
||||
logger.Infof("event_callback_ibex: id: %d, host: %s, event: %+v", id, host, event)
|
||||
|
||||
tpl := taskTplCache.Get(id)
|
||||
if tpl == nil {
|
||||
err := fmt.Errorf("event_callback_ibex: no such tpl(%d), event: %s", id, event.Hash)
|
||||
logger.Errorf("%s", err)
|
||||
return 0, err
|
||||
logger.Errorf("event_callback_ibex: no such tpl(%d), event: %+v", id, event)
|
||||
return
|
||||
}
|
||||
// check perm
|
||||
// tpl.GroupId - host - account 三元组校验权限
|
||||
can, err := CanDoIbex(tpl.UpdateBy, tpl, host, targetCache, userCache)
|
||||
can, err := canDoIbex(tpl.UpdateBy, tpl, host, targetCache, userCache)
|
||||
if err != nil {
|
||||
err = fmt.Errorf("event_callback_ibex: check perm fail: %v, event: %s", err, event.Hash)
|
||||
logger.Errorf("%s", err)
|
||||
return 0, err
|
||||
logger.Errorf("event_callback_ibex: check perm fail: %v, event: %+v", err, event)
|
||||
return
|
||||
}
|
||||
|
||||
if !can {
|
||||
err = fmt.Errorf("event_callback_ibex: user(%s) no permission, event: %s", tpl.UpdateBy, event.Hash)
|
||||
logger.Errorf("%s", err)
|
||||
return 0, err
|
||||
logger.Errorf("event_callback_ibex: user(%s) no permission, event: %+v", tpl.UpdateBy, event)
|
||||
return
|
||||
}
|
||||
|
||||
tagsMap := make(map[string]string)
|
||||
@@ -136,16 +133,11 @@ func CallIbex(ctx *ctx.Context, id int64, host string,
|
||||
|
||||
tags, err := json.Marshal(tagsMap)
|
||||
if err != nil {
|
||||
err = fmt.Errorf("event_callback_ibex: failed to marshal tags to json: %v, event: %s", tagsMap, event.Hash)
|
||||
logger.Errorf("%s", err)
|
||||
return 0, err
|
||||
logger.Errorf("event_callback_ibex: failed to marshal tags to json: %v, event: %+v", tagsMap, event)
|
||||
return
|
||||
}
|
||||
|
||||
// call ibex
|
||||
taskArgs := tpl.Args
|
||||
if args != "" {
|
||||
taskArgs = args
|
||||
}
|
||||
in := models.TaskForm{
|
||||
Title: tpl.Title + " FH: " + host,
|
||||
Account: tpl.Account,
|
||||
@@ -154,7 +146,7 @@ func CallIbex(ctx *ctx.Context, id int64, host string,
|
||||
Timeout: tpl.Timeout,
|
||||
Pause: tpl.Pause,
|
||||
Script: tpl.Script,
|
||||
Args: taskArgs,
|
||||
Args: tpl.Args,
|
||||
Stdin: string(tags),
|
||||
Action: "start",
|
||||
Creator: tpl.UpdateBy,
|
||||
@@ -164,9 +156,8 @@ func CallIbex(ctx *ctx.Context, id int64, host string,
|
||||
|
||||
id, err = TaskAdd(in, tpl.UpdateBy, ctx.IsCenter)
|
||||
if err != nil {
|
||||
err = fmt.Errorf("event_callback_ibex: call ibex fail: %v, event: %s", err, event.Hash)
|
||||
logger.Errorf("%s", err)
|
||||
return 0, err
|
||||
logger.Errorf("event_callback_ibex: call ibex fail: %v, event: %+v", err, event)
|
||||
return
|
||||
}
|
||||
|
||||
// write db
|
||||
@@ -187,14 +178,11 @@ func CallIbex(ctx *ctx.Context, id int64, host string,
|
||||
}
|
||||
|
||||
if err = record.Add(ctx); err != nil {
|
||||
err = fmt.Errorf("event_callback_ibex: persist task_record fail: %v, event: %s", err, event.Hash)
|
||||
logger.Errorf("%s", err)
|
||||
return id, err
|
||||
logger.Errorf("event_callback_ibex: persist task_record fail: %v, event: %+v", err, event)
|
||||
}
|
||||
return id, nil
|
||||
}
|
||||
|
||||
func CanDoIbex(username string, tpl *models.TaskTpl, host string, targetCache *memsto.TargetCacheType, userCache *memsto.UserCacheType) (bool, error) {
|
||||
func canDoIbex(username string, tpl *models.TaskTpl, host string, targetCache *memsto.TargetCacheType, userCache *memsto.UserCacheType) (bool, error) {
|
||||
user := userCache.GetByUsername(username)
|
||||
if user != nil && user.IsAdmin() {
|
||||
return true, nil
|
||||
|
||||
@@ -89,7 +89,7 @@ func alertingCallScript(ctx *ctx.Context, stdinBytes []byte, notifyScript models
|
||||
err, isTimeout := sys.WrapTimeout(cmd, time.Duration(config.Timeout)*time.Second)
|
||||
|
||||
res := buf.String()
|
||||
res = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), res)
|
||||
res = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), res)
|
||||
|
||||
// 截断超出长度的输出
|
||||
if len(res) > 512 {
|
||||
|
||||
@@ -13,53 +13,10 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/alert/astats"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/poster"
|
||||
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
// webhookClientCache 缓存 http.Client,避免每次请求都创建新的 Client 导致连接泄露
|
||||
var webhookClientCache sync.Map // key: clientKey (string), value: *http.Client
|
||||
|
||||
// 相同配置的 webhook 会复用同一个 Client
|
||||
func getWebhookClient(webhook *models.Webhook) *http.Client {
|
||||
clientKey := webhook.Hash()
|
||||
|
||||
if client, ok := webhookClientCache.Load(clientKey); ok {
|
||||
return client.(*http.Client)
|
||||
}
|
||||
|
||||
// 创建新的 Client
|
||||
transport := &http.Transport{
|
||||
TLSClientConfig: &tls.Config{InsecureSkipVerify: webhook.SkipVerify},
|
||||
MaxIdleConns: 100,
|
||||
MaxIdleConnsPerHost: 10,
|
||||
IdleConnTimeout: 90 * time.Second,
|
||||
}
|
||||
|
||||
if poster.UseProxy(webhook.Url) {
|
||||
transport.Proxy = http.ProxyFromEnvironment
|
||||
}
|
||||
|
||||
timeout := webhook.Timeout
|
||||
if timeout <= 0 {
|
||||
timeout = 10
|
||||
}
|
||||
|
||||
newClient := &http.Client{
|
||||
Timeout: time.Duration(timeout) * time.Second,
|
||||
Transport: transport,
|
||||
}
|
||||
|
||||
// 使用 LoadOrStore 确保并发安全,避免重复创建
|
||||
actual, loaded := webhookClientCache.LoadOrStore(clientKey, newClient)
|
||||
if loaded {
|
||||
return actual.(*http.Client)
|
||||
}
|
||||
|
||||
return newClient
|
||||
}
|
||||
|
||||
func sendWebhook(webhook *models.Webhook, event interface{}, stats *astats.Stats) (bool, string, error) {
|
||||
channel := "webhook"
|
||||
if webhook.Type == models.RuleCallback {
|
||||
@@ -72,7 +29,7 @@ func sendWebhook(webhook *models.Webhook, event interface{}, stats *astats.Stats
|
||||
}
|
||||
bs, err := json.Marshal(event)
|
||||
if err != nil {
|
||||
logger.Errorf("%s alertingWebhook failed to marshal event err:%v", channel, err)
|
||||
logger.Errorf("%s alertingWebhook failed to marshal event:%+v err:%v", channel, event, err)
|
||||
return false, "", err
|
||||
}
|
||||
|
||||
@@ -98,13 +55,25 @@ func sendWebhook(webhook *models.Webhook, event interface{}, stats *astats.Stats
|
||||
req.Header.Set(conf.Headers[i], conf.Headers[i+1])
|
||||
}
|
||||
}
|
||||
// 使用全局 Client 缓存,避免每次请求都创建新的 Client 导致连接泄露
|
||||
client := getWebhookClient(conf)
|
||||
insecureSkipVerify := false
|
||||
if webhook != nil {
|
||||
insecureSkipVerify = webhook.SkipVerify
|
||||
}
|
||||
|
||||
if conf.Client == nil {
|
||||
logger.Warningf("event_%s, event:%s, url: [%s], error: [%s]", channel, string(bs), conf.Url, "client is nil")
|
||||
conf.Client = &http.Client{
|
||||
Timeout: time.Duration(conf.Timeout) * time.Second,
|
||||
Transport: &http.Transport{
|
||||
TLSClientConfig: &tls.Config{InsecureSkipVerify: insecureSkipVerify},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
stats.AlertNotifyTotal.WithLabelValues(channel).Inc()
|
||||
var resp *http.Response
|
||||
var body []byte
|
||||
resp, err = client.Do(req)
|
||||
resp, err = conf.Client.Do(req)
|
||||
|
||||
if err != nil {
|
||||
stats.AlertNotifyErrorTotal.WithLabelValues(channel).Inc()
|
||||
@@ -119,11 +88,11 @@ func sendWebhook(webhook *models.Webhook, event interface{}, stats *astats.Stats
|
||||
|
||||
if resp.StatusCode == 429 {
|
||||
logger.Errorf("event_%s_fail, url: %s, response code: %d, body: %s event:%s", channel, conf.Url, resp.StatusCode, string(body), string(bs))
|
||||
return true, fmt.Sprintf("status_code:%d, response:%s", resp.StatusCode, string(body)), fmt.Errorf("status code is 429")
|
||||
return true, string(body), fmt.Errorf("status code is 429")
|
||||
}
|
||||
|
||||
logger.Debugf("event_%s_succ, url: %s, response code: %d, body: %s event:%s", channel, conf.Url, resp.StatusCode, string(body), string(bs))
|
||||
return false, fmt.Sprintf("status_code:%d, response:%s", resp.StatusCode, string(body)), nil
|
||||
return false, string(body), nil
|
||||
}
|
||||
|
||||
func SingleSendWebhooks(ctx *ctx.Context, webhooks map[string]*models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
|
||||
@@ -132,7 +101,7 @@ func SingleSendWebhooks(ctx *ctx.Context, webhooks map[string]*models.Webhook, e
|
||||
for retryCount < 3 {
|
||||
start := time.Now()
|
||||
needRetry, res, err := sendWebhook(conf, event, stats)
|
||||
res = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), res)
|
||||
res = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), res)
|
||||
NotifyRecord(ctx, []*models.AlertCurEvent{event}, 0, "webhook", conf.Url, res, err)
|
||||
if !needRetry {
|
||||
break
|
||||
@@ -145,7 +114,7 @@ func SingleSendWebhooks(ctx *ctx.Context, webhooks map[string]*models.Webhook, e
|
||||
|
||||
func BatchSendWebhooks(ctx *ctx.Context, webhooks map[string]*models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
|
||||
for _, conf := range webhooks {
|
||||
logger.Infof("push event:%s to queue:%v", event.Hash, conf)
|
||||
logger.Infof("push event:%+v to queue:%v", event, conf)
|
||||
PushEvent(ctx, conf, event, stats)
|
||||
}
|
||||
}
|
||||
@@ -183,7 +152,7 @@ func PushEvent(ctx *ctx.Context, webhook *models.Webhook, event *models.AlertCur
|
||||
succ := queue.eventQueue.Push(event)
|
||||
if !succ {
|
||||
stats.AlertNotifyErrorTotal.WithLabelValues("push_event_queue").Inc()
|
||||
logger.Warningf("Write channel(%s) full, current channel size: %d event:%s", webhook.Url, queue.eventQueue.Len(), event.Hash)
|
||||
logger.Warningf("Write channel(%s) full, current channel size: %d event:%v", webhook.Url, queue.eventQueue.Len(), event)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -204,7 +173,7 @@ func StartConsumer(ctx *ctx.Context, queue *WebhookQueue, popSize int, webhook *
|
||||
for retryCount < webhook.RetryCount {
|
||||
start := time.Now()
|
||||
needRetry, res, err := sendWebhook(webhook, events, stats)
|
||||
res = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), res)
|
||||
res = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), res)
|
||||
go NotifyRecord(ctx, events, 0, "webhook", webhook.Url, res, err)
|
||||
if !needRetry {
|
||||
break
|
||||
|
||||
@@ -1,26 +1,20 @@
|
||||
package cconf
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/pkg/httpx"
|
||||
)
|
||||
import "time"
|
||||
|
||||
type Center struct {
|
||||
Plugins []Plugin
|
||||
MetricsYamlFile string
|
||||
OpsYamlFile string
|
||||
BuiltinIntegrationsDir string
|
||||
I18NHeaderKey string
|
||||
MetricDesc MetricDescType
|
||||
AnonymousAccess AnonymousAccess
|
||||
UseFileAssets bool
|
||||
FlashDuty FlashDuty
|
||||
EventHistoryGroupView bool
|
||||
CleanNotifyRecordDay int
|
||||
CleanPipelineExecutionDay int
|
||||
MigrateBusiGroupLabel bool
|
||||
RSA httpx.RSAConfig
|
||||
Plugins []Plugin
|
||||
MetricsYamlFile string
|
||||
OpsYamlFile string
|
||||
BuiltinIntegrationsDir string
|
||||
I18NHeaderKey string
|
||||
MetricDesc MetricDescType
|
||||
AnonymousAccess AnonymousAccess
|
||||
UseFileAssets bool
|
||||
FlashDuty FlashDuty
|
||||
EventHistoryGroupView bool
|
||||
CleanNotifyRecordDay int
|
||||
MigrateBusiGroupLabel bool
|
||||
}
|
||||
|
||||
type Plugin struct {
|
||||
|
||||
@@ -55,10 +55,4 @@ var Plugins = []Plugin{
|
||||
Type: "opensearch",
|
||||
TypeName: "OpenSearch",
|
||||
},
|
||||
{
|
||||
Id: 10,
|
||||
Category: "logging",
|
||||
Type: "victorialogs",
|
||||
TypeName: "VictoriaLogs",
|
||||
},
|
||||
}
|
||||
|
||||
@@ -134,12 +134,11 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
|
||||
go version.GetGithubVersion()
|
||||
|
||||
go cron.CleanNotifyRecord(ctx, config.Center.CleanNotifyRecordDay)
|
||||
go cron.CleanPipelineExecution(ctx, config.Center.CleanPipelineExecutionDay)
|
||||
|
||||
alertrtRouter := alertrt.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors, config.Log.Dir)
|
||||
alertrtRouter := alertrt.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors)
|
||||
centerRouter := centerrt.New(config.HTTP, config.Center, config.Alert, config.Ibex,
|
||||
cconf.Operations, dsCache, notifyConfigCache, promClients,
|
||||
redis, sso, ctx, metas, idents, targetCache, userCache, userGroupCache, userTokenCache, config.Log.Dir)
|
||||
redis, sso, ctx, metas, idents, targetCache, userCache, userGroupCache, userTokenCache)
|
||||
pushgwRouter := pushgwrt.New(config.HTTP, config.Pushgw, config.Alert, targetCache, busiGroupCache, idents, metas, writers, ctx)
|
||||
|
||||
r := httpx.GinEngine(config.Global.RunMode, config.HTTP, configCvalCache.PrintBodyPaths, configCvalCache.PrintAccessLog)
|
||||
|
||||
@@ -271,8 +271,10 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
|
||||
}
|
||||
|
||||
for _, metric := range metrics {
|
||||
time.Sleep(time.Microsecond)
|
||||
metric.UUID = time.Now().UnixMicro()
|
||||
if metric.UUID == 0 {
|
||||
time.Sleep(time.Microsecond)
|
||||
metric.UUID = time.Now().UnixMicro()
|
||||
}
|
||||
metric.ID = metric.UUID
|
||||
metric.CreatedBy = SYSTEM
|
||||
metric.UpdatedBy = SYSTEM
|
||||
|
||||
@@ -118,7 +118,7 @@ func (s *Set) updateTargets(m map[string]models.HostMeta) error {
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
err := storage.MSet(context.Background(), s.redis, newMap, 7*24*time.Hour)
|
||||
err := storage.MSet(context.Background(), s.redis, newMap)
|
||||
if err != nil {
|
||||
cstats.RedisOperationLatency.WithLabelValues("mset_target_meta", "fail").Observe(time.Since(start).Seconds())
|
||||
return err
|
||||
@@ -127,7 +127,7 @@ func (s *Set) updateTargets(m map[string]models.HostMeta) error {
|
||||
}
|
||||
|
||||
if len(extendMap) > 0 {
|
||||
err = storage.MSet(context.Background(), s.redis, extendMap, 7*24*time.Hour)
|
||||
err = storage.MSet(context.Background(), s.redis, extendMap)
|
||||
if err != nil {
|
||||
cstats.RedisOperationLatency.WithLabelValues("mset_target_extend", "fail").Observe(time.Since(start).Seconds())
|
||||
return err
|
||||
|
||||
@@ -24,11 +24,11 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/prom"
|
||||
"github.com/ccfos/nightingale/v6/pushgw/idents"
|
||||
"github.com/ccfos/nightingale/v6/storage"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"gorm.io/gorm"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/rakyll/statik/fs"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
"github.com/toolkits/pkg/runner"
|
||||
)
|
||||
@@ -51,7 +51,6 @@ type Router struct {
|
||||
UserGroupCache *memsto.UserGroupCacheType
|
||||
UserTokenCache *memsto.UserTokenCacheType
|
||||
Ctx *ctx.Context
|
||||
LogDir string
|
||||
|
||||
HeartbeatHook HeartbeatHookFunc
|
||||
TargetDeleteHook models.TargetDeleteHookFunc
|
||||
@@ -62,7 +61,7 @@ func New(httpConfig httpx.Config, center cconf.Center, alert aconf.Alert, ibex c
|
||||
operations cconf.Operation, ds *memsto.DatasourceCacheType, ncc *memsto.NotifyConfigCacheType,
|
||||
pc *prom.PromClientMap, redis storage.Redis,
|
||||
sso *sso.SsoClient, ctx *ctx.Context, metaSet *metas.Set, idents *idents.Set,
|
||||
tc *memsto.TargetCacheType, uc *memsto.UserCacheType, ugc *memsto.UserGroupCacheType, utc *memsto.UserTokenCacheType, logDir string) *Router {
|
||||
tc *memsto.TargetCacheType, uc *memsto.UserCacheType, ugc *memsto.UserGroupCacheType, utc *memsto.UserTokenCacheType) *Router {
|
||||
return &Router{
|
||||
HTTP: httpConfig,
|
||||
Center: center,
|
||||
@@ -81,7 +80,6 @@ func New(httpConfig httpx.Config, center cconf.Center, alert aconf.Alert, ibex c
|
||||
UserGroupCache: ugc,
|
||||
UserTokenCache: utc,
|
||||
Ctx: ctx,
|
||||
LogDir: logDir,
|
||||
HeartbeatHook: func(ident string) map[string]interface{} { return nil },
|
||||
TargetDeleteHook: func(tx *gorm.DB, idents []string) error { return nil },
|
||||
AlertRuleModifyHook: func(ar *models.AlertRule) {},
|
||||
@@ -213,8 +211,8 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.GET("/datasource/brief", rt.auth(), rt.user(), rt.datasourceBriefs)
|
||||
pages.POST("/datasource/query", rt.auth(), rt.user(), rt.datasourceQuery)
|
||||
|
||||
pages.POST("/ds-query", rt.auth(), rt.user(), rt.QueryData)
|
||||
pages.POST("/logs-query", rt.auth(), rt.user(), rt.QueryLogV2)
|
||||
pages.POST("/ds-query", rt.auth(), rt.QueryData)
|
||||
pages.POST("/logs-query", rt.auth(), rt.QueryLogV2)
|
||||
|
||||
pages.POST("/tdengine-databases", rt.auth(), rt.tdengineDatabases)
|
||||
pages.POST("/tdengine-tables", rt.auth(), rt.tdengineTables)
|
||||
@@ -253,12 +251,10 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.GET("/auth/redirect/cas", rt.loginRedirectCas)
|
||||
pages.GET("/auth/redirect/oauth", rt.loginRedirectOAuth)
|
||||
pages.GET("/auth/redirect/dingtalk", rt.loginRedirectDingTalk)
|
||||
pages.GET("/auth/redirect/feishu", rt.loginRedirectFeiShu)
|
||||
pages.GET("/auth/callback", rt.loginCallback)
|
||||
pages.GET("/auth/callback/cas", rt.loginCallbackCas)
|
||||
pages.GET("/auth/callback/oauth", rt.loginCallbackOAuth)
|
||||
pages.GET("/auth/callback/dingtalk", rt.loginCallbackDingTalk)
|
||||
pages.GET("/auth/callback/feishu", rt.loginCallbackFeiShu)
|
||||
pages.GET("/auth/perms", rt.allPerms)
|
||||
|
||||
pages.GET("/metrics/desc", rt.metricsDescGetFile)
|
||||
@@ -370,7 +366,6 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
// pages.GET("/alert-rules/builtin/alerts-cates", rt.auth(), rt.user(), rt.builtinAlertCateGets)
|
||||
// pages.GET("/alert-rules/builtin/list", rt.auth(), rt.user(), rt.builtinAlertRules)
|
||||
pages.GET("/alert-rules/callbacks", rt.auth(), rt.user(), rt.alertRuleCallbacks)
|
||||
pages.GET("/timezones", rt.auth(), rt.user(), rt.timezonesGet)
|
||||
|
||||
pages.GET("/busi-groups/alert-rules", rt.auth(), rt.user(), rt.perm("/alert-rules"), rt.alertRuleGetsByGids)
|
||||
pages.GET("/busi-group/:id/alert-rules", rt.auth(), rt.user(), rt.perm("/alert-rules"), rt.alertRuleGets)
|
||||
@@ -394,8 +389,8 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.GET("/busi-group/:id/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules"), rt.recordingRuleGets)
|
||||
pages.POST("/busi-group/:id/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules/add"), rt.bgrw(), rt.recordingRuleAddByFE)
|
||||
pages.DELETE("/busi-group/:id/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules/del"), rt.bgrw(), rt.recordingRuleDel)
|
||||
pages.PUT("/busi-group/:id/recording-rule/:rrid", rt.auth(), rt.user(), rt.perm("/recording-rules/put"), rt.bgrw(), rt.recordingRulePutByFE)
|
||||
pages.GET("/recording-rule/:rrid", rt.auth(), rt.user(), rt.perm("/recording-rules"), rt.recordingRuleGet)
|
||||
pages.PUT("/recording-rule/:rrid", rt.auth(), rt.user(), rt.perm("/recording-rules"), rt.recordingRulePutByFE)
|
||||
pages.PUT("/busi-group/:id/recording-rules/fields", rt.auth(), rt.user(), rt.perm("/recording-rules/put"), rt.recordingRulePutFields)
|
||||
|
||||
pages.GET("/busi-groups/alert-mutes", rt.auth(), rt.user(), rt.perm("/alert-mutes"), rt.alertMuteGetsByGids)
|
||||
@@ -419,9 +414,6 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.GET("/alert-cur-event/:eid", rt.alertCurEventGet)
|
||||
pages.GET("/alert-his-event/:eid", rt.alertHisEventGet)
|
||||
pages.GET("/event-notify-records/:eid", rt.notificationRecordList)
|
||||
pages.GET("/event-detail/:hash", rt.eventDetailPage)
|
||||
pages.GET("/alert-eval-detail/:id", rt.alertEvalDetailPage)
|
||||
pages.GET("/trace-logs/:traceid", rt.traceLogsPage)
|
||||
|
||||
// card logic
|
||||
pages.GET("/alert-cur-events/list", rt.auth(), rt.user(), rt.alertCurEventsList)
|
||||
@@ -566,19 +558,6 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.POST("/event-pipeline-tryrun", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.tryRunEventPipeline)
|
||||
pages.POST("/event-processor-tryrun", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.tryRunEventProcessor)
|
||||
|
||||
// API 触发工作流
|
||||
pages.POST("/event-pipeline/:id/trigger", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.triggerEventPipelineByAPI)
|
||||
// SSE 流式执行工作流
|
||||
pages.POST("/event-pipeline/:id/stream", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.streamEventPipeline)
|
||||
|
||||
// 事件Pipeline执行记录路由
|
||||
pages.GET("/event-pipeline-executions", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.listAllEventPipelineExecutions)
|
||||
pages.GET("/event-pipeline/:id/executions", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.listEventPipelineExecutions)
|
||||
pages.GET("/event-pipeline/:id/execution/:exec_id", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.getEventPipelineExecution)
|
||||
pages.GET("/event-pipeline-execution/:exec_id", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.getEventPipelineExecution)
|
||||
pages.GET("/event-pipeline/:id/execution-stats", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.getEventPipelineExecutionStats)
|
||||
pages.POST("/event-pipeline-executions/clean", rt.auth(), rt.user(), rt.admin(), rt.cleanEventPipelineExecutions)
|
||||
|
||||
pages.POST("/notify-channel-configs", rt.auth(), rt.user(), rt.perm("/notification-channels/add"), rt.notifyChannelsAdd)
|
||||
pages.DELETE("/notify-channel-configs", rt.auth(), rt.user(), rt.perm("/notification-channels/del"), rt.notifyChannelsDel)
|
||||
pages.PUT("/notify-channel-config/:id", rt.auth(), rt.user(), rt.perm("/notification-channels/put"), rt.notifyChannelPut)
|
||||
@@ -590,14 +569,6 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.GET("/pagerduty-service-list/:id", rt.auth(), rt.user(), rt.pagerDutyNotifyServicesGet)
|
||||
pages.GET("/notify-channel-config", rt.auth(), rt.user(), rt.notifyChannelGetBy)
|
||||
pages.GET("/notify-channel-config/idents", rt.notifyChannelIdentsGet)
|
||||
|
||||
// saved view 查询条件保存相关路由
|
||||
pages.GET("/saved-views", rt.auth(), rt.user(), rt.savedViewGets)
|
||||
pages.POST("/saved-views", rt.auth(), rt.user(), rt.savedViewAdd)
|
||||
pages.PUT("/saved-view/:id", rt.auth(), rt.user(), rt.savedViewPut)
|
||||
pages.DELETE("/saved-view/:id", rt.auth(), rt.user(), rt.savedViewDel)
|
||||
pages.POST("/saved-view/:id/favorite", rt.auth(), rt.user(), rt.savedViewFavoriteAdd)
|
||||
pages.DELETE("/saved-view/:id/favorite", rt.auth(), rt.user(), rt.savedViewFavoriteDel)
|
||||
}
|
||||
|
||||
r.GET("/api/n9e/versions", func(c *gin.Context) {
|
||||
@@ -654,7 +625,6 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
service.GET("/busi-groups", rt.busiGroupGetsByService)
|
||||
|
||||
service.GET("/datasources", rt.datasourceGetsByService)
|
||||
service.GET("/datasource-rsa-config", rt.datasourceRsaConfigGet)
|
||||
service.GET("/datasource-ids", rt.getDatasourceIds)
|
||||
service.POST("/server-heartbeat", rt.serverHeartbeat)
|
||||
service.GET("/servers-active", rt.serversActive)
|
||||
@@ -711,9 +681,6 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
service.GET("/message-templates", rt.messageTemplateGets)
|
||||
|
||||
service.GET("/event-pipelines", rt.eventPipelinesListByService)
|
||||
service.POST("/event-pipeline/:id/trigger", rt.triggerEventPipelineByService)
|
||||
service.POST("/event-pipeline/:id/stream", rt.streamEventPipelineByService)
|
||||
service.POST("/event-pipeline-execution", rt.eventPipelineExecutionAdd)
|
||||
|
||||
// 手机号加密存储配置接口
|
||||
service.POST("/users/phone/encrypt", rt.usersPhoneEncrypt)
|
||||
|
||||
@@ -4,9 +4,9 @@ import (
|
||||
"net/http"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
// no param
|
||||
|
||||
@@ -10,9 +10,9 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/strx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
|
||||
@@ -1,168 +0,0 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/loggrep"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
// alertEvalDetailPage renders an HTML log viewer page for alert rule evaluation logs.
|
||||
func (rt *Router) alertEvalDetailPage(c *gin.Context) {
|
||||
id := ginx.UrlParamStr(c, "id")
|
||||
if !loggrep.IsValidRuleID(id) {
|
||||
c.String(http.StatusBadRequest, "invalid rule id format")
|
||||
return
|
||||
}
|
||||
|
||||
logs, instance, err := rt.getAlertEvalLogs(id)
|
||||
if err != nil {
|
||||
c.String(http.StatusInternalServerError, "Error: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
c.Header("Content-Type", "text/html; charset=utf-8")
|
||||
err = loggrep.RenderAlertEvalHTML(c.Writer, loggrep.AlertEvalPageData{
|
||||
RuleID: id,
|
||||
Instance: instance,
|
||||
Logs: logs,
|
||||
Total: len(logs),
|
||||
})
|
||||
if err != nil {
|
||||
c.String(http.StatusInternalServerError, "render error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// alertEvalDetailJSON returns JSON for alert rule evaluation logs.
|
||||
func (rt *Router) alertEvalDetailJSON(c *gin.Context) {
|
||||
id := ginx.UrlParamStr(c, "id")
|
||||
if !loggrep.IsValidRuleID(id) {
|
||||
ginx.Bomb(200, "invalid rule id format")
|
||||
}
|
||||
|
||||
logs, instance, err := rt.getAlertEvalLogs(id)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(loggrep.EventDetailResp{
|
||||
Logs: logs,
|
||||
Instance: instance,
|
||||
}, nil)
|
||||
}
|
||||
|
||||
// getAlertEvalLogs resolves the target instance(s) and retrieves alert eval logs.
|
||||
func (rt *Router) getAlertEvalLogs(id string) ([]string, string, error) {
|
||||
ruleId, _ := strconv.ParseInt(id, 10, 64)
|
||||
rule, err := models.AlertRuleGetById(rt.Ctx, ruleId)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
if rule == nil {
|
||||
return nil, "", fmt.Errorf("no such alert rule")
|
||||
}
|
||||
|
||||
instance := fmt.Sprintf("%s:%d", rt.Alert.Heartbeat.IP, rt.HTTP.Port)
|
||||
keyword := fmt.Sprintf("alert_eval_%s", id)
|
||||
|
||||
// Get datasource IDs for this rule
|
||||
dsIds := rt.DatasourceCache.GetIDsByDsCateAndQueries(rule.Cate, rule.DatasourceQueries)
|
||||
if len(dsIds) == 0 {
|
||||
// No datasources found (e.g. host rule), try local grep
|
||||
logs, err := loggrep.GrepLogDir(rt.LogDir, keyword)
|
||||
return logs, instance, err
|
||||
}
|
||||
|
||||
// Find unique target nodes via hash ring, with DB fallback
|
||||
nodeSet := make(map[string]struct{})
|
||||
for _, dsId := range dsIds {
|
||||
node, err := rt.getNodeForDatasource(dsId, id)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
nodeSet[node] = struct{}{}
|
||||
}
|
||||
|
||||
if len(nodeSet) == 0 {
|
||||
// Hash ring not ready, grep locally
|
||||
logs, err := loggrep.GrepLogDir(rt.LogDir, keyword)
|
||||
return logs, instance, err
|
||||
}
|
||||
|
||||
// Collect logs from all target nodes
|
||||
var allLogs []string
|
||||
var instances []string
|
||||
|
||||
for node := range nodeSet {
|
||||
if node == instance {
|
||||
logs, err := loggrep.GrepLogDir(rt.LogDir, keyword)
|
||||
if err == nil {
|
||||
allLogs = append(allLogs, logs...)
|
||||
instances = append(instances, node)
|
||||
}
|
||||
} else {
|
||||
logs, nodeAddr, err := rt.forwardAlertEvalDetail(node, id)
|
||||
if err == nil {
|
||||
allLogs = append(allLogs, logs...)
|
||||
instances = append(instances, nodeAddr)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sort logs by timestamp descending
|
||||
sort.Slice(allLogs, func(i, j int) bool {
|
||||
return allLogs[i] > allLogs[j]
|
||||
})
|
||||
|
||||
if len(allLogs) > loggrep.MaxLogLines {
|
||||
allLogs = allLogs[:loggrep.MaxLogLines]
|
||||
}
|
||||
|
||||
return allLogs, strings.Join(instances, ", "), nil
|
||||
}
|
||||
|
||||
func (rt *Router) forwardAlertEvalDetail(node, id string) ([]string, string, error) {
|
||||
url := fmt.Sprintf("http://%s/v1/n9e/alert-eval-detail/%s", node, id)
|
||||
req, err := http.NewRequest("GET", url, nil)
|
||||
if err != nil {
|
||||
return nil, node, err
|
||||
}
|
||||
|
||||
for user, pass := range rt.HTTP.APIForService.BasicAuth {
|
||||
req.SetBasicAuth(user, pass)
|
||||
break
|
||||
}
|
||||
|
||||
client := &http.Client{Timeout: 15 * time.Second}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, node, fmt.Errorf("forward to %s failed: %v", node, err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
body, err := io.ReadAll(io.LimitReader(resp.Body, 10*1024*1024)) // 10MB limit
|
||||
if err != nil {
|
||||
return nil, node, err
|
||||
}
|
||||
|
||||
var result struct {
|
||||
Dat loggrep.EventDetailResp `json:"dat"`
|
||||
Err string `json:"err"`
|
||||
}
|
||||
if err := json.Unmarshal(body, &result); err != nil {
|
||||
return nil, node, err
|
||||
}
|
||||
if result.Err != "" {
|
||||
return nil, node, fmt.Errorf("%s", result.Err)
|
||||
}
|
||||
|
||||
return result.Dat.Logs, result.Dat.Instance, nil
|
||||
}
|
||||
@@ -8,9 +8,9 @@ import (
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
"golang.org/x/exp/slices"
|
||||
)
|
||||
|
||||
@@ -16,12 +16,12 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/pkg/strx"
|
||||
"github.com/ccfos/nightingale/v6/pushgw/pconf"
|
||||
"github.com/ccfos/nightingale/v6/pushgw/writer"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/jinzhu/copier"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/prometheus/prometheus/prompb"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
)
|
||||
|
||||
@@ -36,7 +36,6 @@ func (rt *Router) alertRuleGets(c *gin.Context) {
|
||||
for i := 0; i < len(ars); i++ {
|
||||
ars[i].FillNotifyGroups(rt.Ctx, cache)
|
||||
}
|
||||
models.FillUpdateByNicknames(rt.Ctx, ars)
|
||||
}
|
||||
ginx.NewRender(c).Data(ars, err)
|
||||
}
|
||||
@@ -77,6 +76,7 @@ func (rt *Router) alertRuleGetsByGids(c *gin.Context) {
|
||||
if err == nil {
|
||||
cache := make(map[int64]*models.UserGroup)
|
||||
rids := make([]int64, 0, len(ars))
|
||||
names := make([]string, 0, len(ars))
|
||||
for i := 0; i < len(ars); i++ {
|
||||
ars[i].FillNotifyGroups(rt.Ctx, cache)
|
||||
|
||||
@@ -85,6 +85,7 @@ func (rt *Router) alertRuleGetsByGids(c *gin.Context) {
|
||||
}
|
||||
|
||||
rids = append(rids, ars[i].Id)
|
||||
names = append(names, ars[i].UpdateBy)
|
||||
}
|
||||
|
||||
stime, etime := GetAlertCueEventTimeRange(c)
|
||||
@@ -95,7 +96,14 @@ func (rt *Router) alertRuleGetsByGids(c *gin.Context) {
|
||||
}
|
||||
}
|
||||
|
||||
models.FillUpdateByNicknames(rt.Ctx, ars)
|
||||
users := models.UserMapGet(rt.Ctx, "username in (?)", names)
|
||||
if users != nil {
|
||||
for i := 0; i < len(ars); i++ {
|
||||
if user, exist := users[ars[i].UpdateBy]; exist {
|
||||
ars[i].UpdateByNickname = user.Nickname
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
ginx.NewRender(c).Data(ars, err)
|
||||
}
|
||||
@@ -127,7 +135,6 @@ func (rt *Router) alertRulesGetByService(c *gin.Context) {
|
||||
ars[i].DatasourceIdsJson = rt.DatasourceCache.GetIDsByDsCateAndQueries(ars[i].Cate, ars[i].DatasourceQueries)
|
||||
}
|
||||
}
|
||||
models.FillUpdateByNicknames(rt.Ctx, ars)
|
||||
}
|
||||
ginx.NewRender(c).Data(ars, err)
|
||||
}
|
||||
@@ -882,27 +889,3 @@ func (rt *Router) batchAlertRuleClone(c *gin.Context) {
|
||||
|
||||
ginx.NewRender(c).Data(reterr, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) timezonesGet(c *gin.Context) {
|
||||
// 返回常用时区列表(按时差去重,每个时差只保留一个代表性时区)
|
||||
timezones := []string{
|
||||
"UTC",
|
||||
"Asia/Shanghai", // UTC+8 (代表 Asia/Hong_Kong, Asia/Singapore 等)
|
||||
"Asia/Tokyo", // UTC+9 (代表 Asia/Seoul 等)
|
||||
"Asia/Dubai", // UTC+4
|
||||
"Asia/Kolkata", // UTC+5:30
|
||||
"Asia/Bangkok", // UTC+7 (代表 Asia/Jakarta 等)
|
||||
"Europe/London", // UTC+0 (代表 UTC)
|
||||
"Europe/Paris", // UTC+1 (代表 Europe/Berlin, Europe/Rome, Europe/Madrid 等)
|
||||
"Europe/Moscow", // UTC+3
|
||||
"America/New_York", // UTC-5 (代表 America/Toronto 等)
|
||||
"America/Chicago", // UTC-6 (代表 America/Mexico_City 等)
|
||||
"America/Denver", // UTC-7
|
||||
"America/Los_Angeles", // UTC-8
|
||||
"America/Sao_Paulo", // UTC-3
|
||||
"Australia/Sydney", // UTC+10 (代表 Australia/Melbourne 等)
|
||||
"Pacific/Auckland", // UTC+12
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(timezones, nil)
|
||||
}
|
||||
|
||||
@@ -9,9 +9,9 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/alert/common"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/strx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
)
|
||||
|
||||
@@ -30,7 +30,6 @@ func (rt *Router) alertSubscribeGets(c *gin.Context) {
|
||||
ginx.Dangerous(lst[i].FillDatasourceIds(rt.Ctx))
|
||||
ginx.Dangerous(lst[i].DB2FE())
|
||||
}
|
||||
models.FillUpdateByNicknames(rt.Ctx, lst)
|
||||
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
@@ -67,7 +66,6 @@ func (rt *Router) alertSubscribeGetsByGids(c *gin.Context) {
|
||||
ginx.Dangerous(lst[i].FillDatasourceIds(rt.Ctx))
|
||||
ginx.Dangerous(lst[i].DB2FE())
|
||||
}
|
||||
models.FillUpdateByNicknames(rt.Ctx, lst)
|
||||
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
|
||||
@@ -7,9 +7,9 @@ import (
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/strx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
)
|
||||
|
||||
@@ -260,9 +260,6 @@ func (rt *Router) boardGets(c *gin.Context) {
|
||||
query := ginx.QueryStr(c, "query", "")
|
||||
|
||||
boards, err := models.BoardGetsByGroupId(rt.Ctx, bgid, query)
|
||||
if err == nil {
|
||||
models.FillUpdateByNicknames(rt.Ctx, boards)
|
||||
}
|
||||
ginx.NewRender(c).Data(boards, err)
|
||||
}
|
||||
|
||||
@@ -276,9 +273,6 @@ func (rt *Router) publicBoardGets(c *gin.Context) {
|
||||
ginx.Dangerous(err)
|
||||
|
||||
boards, err := models.BoardGets(rt.Ctx, "", "public=1 and (public_cate in (?) or id in (?))", []int64{0, 1}, boardIds)
|
||||
if err == nil {
|
||||
models.FillUpdateByNicknames(rt.Ctx, boards)
|
||||
}
|
||||
ginx.NewRender(c).Data(boards, err)
|
||||
}
|
||||
|
||||
@@ -318,7 +312,6 @@ func (rt *Router) boardGetsByGids(c *gin.Context) {
|
||||
boards[i].Bgids = ids
|
||||
}
|
||||
}
|
||||
models.FillUpdateByNicknames(rt.Ctx, boards)
|
||||
|
||||
ginx.NewRender(c).Data(boards, err)
|
||||
}
|
||||
|
||||
@@ -8,10 +8,10 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/file"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
"github.com/toolkits/pkg/runner"
|
||||
)
|
||||
|
||||
@@ -5,9 +5,9 @@ import (
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"gorm.io/gorm"
|
||||
)
|
||||
|
||||
|
||||
@@ -3,8 +3,8 @@ package router
|
||||
import (
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/prom"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func (rt *Router) metricFilterGets(c *gin.Context) {
|
||||
@@ -27,8 +27,6 @@ func (rt *Router) metricFilterGets(c *gin.Context) {
|
||||
}
|
||||
}
|
||||
|
||||
models.FillUpdateByNicknames(rt.Ctx, arr)
|
||||
|
||||
ginx.NewRender(c).Data(arr, err)
|
||||
}
|
||||
|
||||
|
||||
@@ -7,9 +7,9 @@ import (
|
||||
|
||||
"github.com/ccfos/nightingale/v6/center/integration"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
)
|
||||
|
||||
|
||||
@@ -9,8 +9,8 @@ import (
|
||||
"github.com/BurntSushi/toml"
|
||||
"github.com/ccfos/nightingale/v6/center/integration"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
)
|
||||
|
||||
|
||||
@@ -5,9 +5,9 @@ import (
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/strx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
@@ -119,9 +119,6 @@ func (rt *Router) busiGroupGets(c *gin.Context) {
|
||||
if len(lst) == 0 {
|
||||
lst = []models.BusiGroup{}
|
||||
}
|
||||
if err == nil {
|
||||
models.FillUpdateByNicknames(rt.Ctx, lst)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
|
||||
@@ -5,9 +5,9 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/storage"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/gin-gonic/gin"
|
||||
captcha "github.com/mojocn/base64Captcha"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
|
||||
@@ -5,9 +5,9 @@ import (
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/strx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func (rt *Router) chartShareGets(c *gin.Context) {
|
||||
|
||||
@@ -4,9 +4,9 @@ import (
|
||||
"encoding/json"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func (rt *Router) notifyChannelsGets(c *gin.Context) {
|
||||
|
||||
@@ -4,9 +4,9 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
const EMBEDDEDDASHBOARD = "embedded-dashboards"
|
||||
@@ -15,9 +15,6 @@ func (rt *Router) configsGet(c *gin.Context) {
|
||||
prefix := ginx.QueryStr(c, "prefix", "")
|
||||
limit := ginx.QueryInt(c, "limit", 10)
|
||||
configs, err := models.ConfigsGets(rt.Ctx, prefix, limit, ginx.Offset(c, limit))
|
||||
if err == nil {
|
||||
models.FillUpdateByNicknames(rt.Ctx, configs)
|
||||
}
|
||||
ginx.NewRender(c).Data(configs, err)
|
||||
}
|
||||
|
||||
|
||||
@@ -2,9 +2,9 @@ package router
|
||||
|
||||
import (
|
||||
"github.com/ccfos/nightingale/v6/pkg/secu"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
type confPropCrypto struct {
|
||||
|
||||
@@ -7,9 +7,9 @@ import (
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func checkAnnotationPermission(c *gin.Context, ctx *ctx.Context, dashboardId int64) {
|
||||
|
||||
@@ -3,20 +3,19 @@ package router
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/datasource/opensearch"
|
||||
"github.com/ccfos/nightingale/v6/dskit/clickhouse"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
@@ -53,41 +52,9 @@ func (rt *Router) datasourceList(c *gin.Context) {
|
||||
func (rt *Router) datasourceGetsByService(c *gin.Context) {
|
||||
typ := ginx.QueryStr(c, "typ", "")
|
||||
lst, err := models.GetDatasourcesGetsBy(rt.Ctx, typ, "", "", "")
|
||||
|
||||
openRsa := rt.Center.RSA.OpenRSA
|
||||
for _, item := range lst {
|
||||
if err := item.Encrypt(openRsa, rt.HTTP.RSA.RSAPublicKey); err != nil {
|
||||
logger.Errorf("datasource %+v encrypt failed: %v", item, err)
|
||||
continue
|
||||
}
|
||||
}
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
|
||||
func (rt *Router) datasourceRsaConfigGet(c *gin.Context) {
|
||||
if rt.Center.RSA.OpenRSA {
|
||||
publicKey := ""
|
||||
privateKey := ""
|
||||
if len(rt.HTTP.RSA.RSAPublicKey) > 0 {
|
||||
publicKey = base64.StdEncoding.EncodeToString(rt.HTTP.RSA.RSAPublicKey)
|
||||
}
|
||||
if len(rt.HTTP.RSA.RSAPrivateKey) > 0 {
|
||||
privateKey = base64.StdEncoding.EncodeToString(rt.HTTP.RSA.RSAPrivateKey)
|
||||
}
|
||||
logger.Debugf("OpenRSA=%v", rt.Center.RSA.OpenRSA)
|
||||
ginx.NewRender(c).Data(models.RsaConfig{
|
||||
OpenRSA: rt.Center.RSA.OpenRSA,
|
||||
RSAPublicKey: publicKey,
|
||||
RSAPrivateKey: privateKey,
|
||||
RSAPassWord: rt.HTTP.RSA.RSAPassWord,
|
||||
}, nil)
|
||||
} else {
|
||||
ginx.NewRender(c).Data(models.RsaConfig{
|
||||
OpenRSA: rt.Center.RSA.OpenRSA,
|
||||
}, nil)
|
||||
}
|
||||
}
|
||||
|
||||
func (rt *Router) datasourceBriefs(c *gin.Context) {
|
||||
var dss []*models.Datasource
|
||||
list, err := models.GetDatasourcesGetsBy(rt.Ctx, "", "", "", "")
|
||||
@@ -230,37 +197,6 @@ func (rt *Router) datasourceUpsert(c *gin.Context) {
|
||||
}
|
||||
}
|
||||
|
||||
if req.PluginType == models.ELASTICSEARCH {
|
||||
skipAuto := false
|
||||
// 若用户输入了version(version字符串存在且不为空),则不自动获取
|
||||
if req.SettingsJson != nil {
|
||||
if v, ok := req.SettingsJson["version"]; ok {
|
||||
switch vv := v.(type) {
|
||||
case string:
|
||||
if strings.TrimSpace(vv) != "" {
|
||||
skipAuto = true
|
||||
}
|
||||
default:
|
||||
if strings.TrimSpace(fmt.Sprint(vv)) != "" {
|
||||
skipAuto = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !skipAuto {
|
||||
version, err := getElasticsearchVersion(req, 10*time.Second)
|
||||
if err != nil {
|
||||
logger.Warningf("failed to get elasticsearch version: %v", err)
|
||||
} else {
|
||||
if req.SettingsJson == nil {
|
||||
req.SettingsJson = make(map[string]interface{})
|
||||
}
|
||||
req.SettingsJson["version"] = version
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if req.Id == 0 {
|
||||
req.CreatedBy = username
|
||||
req.Status = "enabled"
|
||||
@@ -276,7 +212,7 @@ func (rt *Router) datasourceUpsert(c *gin.Context) {
|
||||
}
|
||||
err = req.Add(rt.Ctx)
|
||||
} else {
|
||||
err = req.Update(rt.Ctx, "name", "identifier", "description", "cluster_name", "settings", "http", "auth", "updated_by", "updated_at", "is_default", "weight")
|
||||
err = req.Update(rt.Ctx, "name", "identifier", "description", "cluster_name", "settings", "http", "auth", "updated_by", "updated_at", "is_default")
|
||||
}
|
||||
|
||||
Render(c, nil, err)
|
||||
@@ -293,15 +229,11 @@ func DatasourceCheck(c *gin.Context, ds models.Datasource) error {
|
||||
}
|
||||
}
|
||||
|
||||
// 使用 TLS 配置(支持 mTLS)
|
||||
tlsConfig, err := ds.HTTPJson.TLS.TLSConfig()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create TLS config: %v", err)
|
||||
}
|
||||
|
||||
client := &http.Client{
|
||||
Transport: &http.Transport{
|
||||
TLSClientConfig: tlsConfig,
|
||||
TLSClientConfig: &tls.Config{
|
||||
InsecureSkipVerify: ds.HTTPJson.TLS.SkipTlsVerify,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
@@ -459,82 +391,3 @@ func (rt *Router) datasourceQuery(c *gin.Context) {
|
||||
}
|
||||
ginx.NewRender(c).Data(req, err)
|
||||
}
|
||||
|
||||
// getElasticsearchVersion 该函数尝试从提供的Elasticsearch数据源中获取版本号,遍历所有URL,
|
||||
// 直到成功获取版本号或所有URL均尝试失败为止。
|
||||
func getElasticsearchVersion(ds models.Datasource, timeout time.Duration) (string, error) {
|
||||
client := &http.Client{
|
||||
Timeout: timeout,
|
||||
Transport: &http.Transport{
|
||||
TLSClientConfig: &tls.Config{
|
||||
InsecureSkipVerify: ds.HTTPJson.TLS.SkipTlsVerify,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
urls := make([]string, 0)
|
||||
if len(ds.HTTPJson.Urls) > 0 {
|
||||
urls = append(urls, ds.HTTPJson.Urls...)
|
||||
}
|
||||
if ds.HTTPJson.Url != "" {
|
||||
urls = append(urls, ds.HTTPJson.Url)
|
||||
}
|
||||
if len(urls) == 0 {
|
||||
return "", fmt.Errorf("no url provided")
|
||||
}
|
||||
|
||||
var lastErr error
|
||||
for _, raw := range urls {
|
||||
baseURL := strings.TrimRight(raw, "/") + "/"
|
||||
req, err := http.NewRequest("GET", baseURL, nil)
|
||||
if err != nil {
|
||||
lastErr = err
|
||||
continue
|
||||
}
|
||||
|
||||
if ds.AuthJson.BasicAuthUser != "" {
|
||||
req.SetBasicAuth(ds.AuthJson.BasicAuthUser, ds.AuthJson.BasicAuthPassword)
|
||||
}
|
||||
|
||||
for k, v := range ds.HTTPJson.Headers {
|
||||
req.Header.Set(k, v)
|
||||
}
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
lastErr = err
|
||||
continue
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
resp.Body.Close()
|
||||
if err != nil {
|
||||
lastErr = err
|
||||
continue
|
||||
}
|
||||
|
||||
if resp.StatusCode != 200 {
|
||||
lastErr = fmt.Errorf("request to %s failed with status: %d body:%s", baseURL, resp.StatusCode, string(body))
|
||||
continue
|
||||
}
|
||||
|
||||
var result map[string]interface{}
|
||||
if err := json.Unmarshal(body, &result); err != nil {
|
||||
lastErr = err
|
||||
continue
|
||||
}
|
||||
|
||||
if version, ok := result["version"].(map[string]interface{}); ok {
|
||||
if number, ok := version["number"].(string); ok && number != "" {
|
||||
return number, nil
|
||||
}
|
||||
}
|
||||
|
||||
lastErr = fmt.Errorf("version not found in response from %s", baseURL)
|
||||
}
|
||||
|
||||
if lastErr != nil {
|
||||
return "", lastErr
|
||||
}
|
||||
return "", fmt.Errorf("failed to get elasticsearch version")
|
||||
}
|
||||
|
||||
@@ -6,10 +6,10 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/dscache"
|
||||
"github.com/ccfos/nightingale/v6/dskit/types"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/logx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
func (rt *Router) ShowDatabases(c *gin.Context) {
|
||||
@@ -18,7 +18,7 @@ func (rt *Router) ShowDatabases(c *gin.Context) {
|
||||
|
||||
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
|
||||
if !exists {
|
||||
logx.Warningf(c.Request.Context(), "cluster:%d not exists", f.DatasourceId)
|
||||
logger.Warningf("cluster:%d not exists", f.DatasourceId)
|
||||
ginx.Bomb(200, "cluster not exists")
|
||||
}
|
||||
|
||||
@@ -48,7 +48,7 @@ func (rt *Router) ShowTables(c *gin.Context) {
|
||||
|
||||
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
|
||||
if !exists {
|
||||
logx.Warningf(c.Request.Context(), "cluster:%d not exists", f.DatasourceId)
|
||||
logger.Warningf("cluster:%d not exists", f.DatasourceId)
|
||||
ginx.Bomb(200, "cluster not exists")
|
||||
}
|
||||
|
||||
@@ -78,7 +78,7 @@ func (rt *Router) DescribeTable(c *gin.Context) {
|
||||
|
||||
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
|
||||
if !exists {
|
||||
logx.Warningf(c.Request.Context(), "cluster:%d not exists", f.DatasourceId)
|
||||
logger.Warningf("cluster:%d not exists", f.DatasourceId)
|
||||
ginx.Bomb(200, "cluster not exists")
|
||||
}
|
||||
// 只接受一个入参
|
||||
|
||||
@@ -5,15 +5,14 @@ import (
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func (rt *Router) embeddedProductGets(c *gin.Context) {
|
||||
products, err := models.EmbeddedProductGets(rt.Ctx)
|
||||
ginx.Dangerous(err)
|
||||
models.FillUpdateByNicknames(rt.Ctx, products)
|
||||
// 获取当前用户可访问的Group ID 列表
|
||||
me := c.MustGet("user").(*models.User)
|
||||
|
||||
|
||||
@@ -3,10 +3,10 @@ package router
|
||||
import (
|
||||
"github.com/ccfos/nightingale/v6/datasource/es"
|
||||
"github.com/ccfos/nightingale/v6/dscache"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/logx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
type IndexReq struct {
|
||||
@@ -34,7 +34,7 @@ func (rt *Router) QueryIndices(c *gin.Context) {
|
||||
|
||||
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
|
||||
if !exists {
|
||||
logx.Warningf(c.Request.Context(), "cluster:%d not exists", f.DatasourceId)
|
||||
logger.Warningf("cluster:%d not exists", f.DatasourceId)
|
||||
ginx.Bomb(200, "cluster not exists")
|
||||
}
|
||||
|
||||
@@ -50,7 +50,7 @@ func (rt *Router) QueryFields(c *gin.Context) {
|
||||
|
||||
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
|
||||
if !exists {
|
||||
logx.Warningf(c.Request.Context(), "cluster:%d not exists", f.DatasourceId)
|
||||
logger.Warningf("cluster:%d not exists", f.DatasourceId)
|
||||
ginx.Bomb(200, "cluster not exists")
|
||||
}
|
||||
|
||||
@@ -66,7 +66,7 @@ func (rt *Router) QueryESVariable(c *gin.Context) {
|
||||
|
||||
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
|
||||
if !exists {
|
||||
logx.Warningf(c.Request.Context(), "cluster:%d not exists", f.DatasourceId)
|
||||
logger.Warningf("cluster:%d not exists", f.DatasourceId)
|
||||
ginx.Bomb(200, "cluster not exists")
|
||||
}
|
||||
|
||||
|
||||
@@ -5,8 +5,8 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
// 创建 ES Index Pattern
|
||||
@@ -69,10 +69,6 @@ func (rt *Router) esIndexPatternGetList(c *gin.Context) {
|
||||
lst, err = models.EsIndexPatternGets(rt.Ctx, "")
|
||||
}
|
||||
|
||||
if err == nil {
|
||||
models.FillUpdateByNicknames(rt.Ctx, lst)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
|
||||
|
||||
@@ -1,149 +0,0 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/naming"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/loggrep"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
// eventDetailPage renders an HTML log viewer page (for pages group).
|
||||
func (rt *Router) eventDetailPage(c *gin.Context) {
|
||||
hash := ginx.UrlParamStr(c, "hash")
|
||||
if !loggrep.IsValidHash(hash) {
|
||||
c.String(http.StatusBadRequest, "invalid hash format")
|
||||
return
|
||||
}
|
||||
|
||||
logs, instance, err := rt.getEventLogs(hash)
|
||||
if err != nil {
|
||||
c.String(http.StatusInternalServerError, "Error: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
c.Header("Content-Type", "text/html; charset=utf-8")
|
||||
err = loggrep.RenderHTML(c.Writer, loggrep.PageData{
|
||||
Hash: hash,
|
||||
Instance: instance,
|
||||
Logs: logs,
|
||||
Total: len(logs),
|
||||
})
|
||||
if err != nil {
|
||||
c.String(http.StatusInternalServerError, "render error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// eventDetailJSON returns JSON (for service group).
|
||||
func (rt *Router) eventDetailJSON(c *gin.Context) {
|
||||
hash := ginx.UrlParamStr(c, "hash")
|
||||
if !loggrep.IsValidHash(hash) {
|
||||
ginx.Bomb(200, "invalid hash format")
|
||||
}
|
||||
|
||||
logs, instance, err := rt.getEventLogs(hash)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(loggrep.EventDetailResp{
|
||||
Logs: logs,
|
||||
Instance: instance,
|
||||
}, nil)
|
||||
}
|
||||
|
||||
// getNodeForDatasource returns the alert engine instance responsible for the given
|
||||
// datasource and primary key. It first checks the local hashring, and falls back
|
||||
// to querying the database for active instances if the hashring is empty
|
||||
// (e.g. when the datasource belongs to another engine cluster).
|
||||
func (rt *Router) getNodeForDatasource(datasourceId int64, pk string) (string, error) {
|
||||
dsIdStr := strconv.FormatInt(datasourceId, 10)
|
||||
node, err := naming.DatasourceHashRing.GetNode(dsIdStr, pk)
|
||||
if err == nil {
|
||||
return node, nil
|
||||
}
|
||||
|
||||
// Hashring is empty for this datasource (likely belongs to another engine cluster).
|
||||
// Query the DB for active instances.
|
||||
servers, dbErr := models.AlertingEngineGetsInstances(rt.Ctx,
|
||||
"datasource_id = ? and clock > ?",
|
||||
datasourceId, time.Now().Unix()-30)
|
||||
if dbErr != nil {
|
||||
return "", dbErr
|
||||
}
|
||||
if len(servers) == 0 {
|
||||
return "", fmt.Errorf("no active instances for datasource %d", datasourceId)
|
||||
}
|
||||
|
||||
ring := naming.NewConsistentHashRing(int32(naming.NodeReplicas), servers)
|
||||
return ring.Get(pk)
|
||||
}
|
||||
|
||||
// getEventLogs resolves the target instance and retrieves logs.
|
||||
func (rt *Router) getEventLogs(hash string) ([]string, string, error) {
|
||||
event, err := models.AlertHisEventGetByHash(rt.Ctx, hash)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
if event == nil {
|
||||
return nil, "", fmt.Errorf("no such alert event")
|
||||
}
|
||||
|
||||
ruleId := strconv.FormatInt(event.RuleId, 10)
|
||||
|
||||
instance := fmt.Sprintf("%s:%d", rt.Alert.Heartbeat.IP, rt.HTTP.Port)
|
||||
|
||||
node, err := rt.getNodeForDatasource(event.DatasourceId, ruleId)
|
||||
if err != nil || node == instance {
|
||||
// hashring not ready or target is self, handle locally
|
||||
logs, err := loggrep.GrepLogDir(rt.LogDir, hash)
|
||||
return logs, instance, err
|
||||
}
|
||||
|
||||
// forward to the target alert instance
|
||||
return rt.forwardEventDetail(node, hash)
|
||||
}
|
||||
|
||||
func (rt *Router) forwardEventDetail(node, hash string) ([]string, string, error) {
|
||||
url := fmt.Sprintf("http://%s/v1/n9e/event-detail/%s", node, hash)
|
||||
req, err := http.NewRequest("GET", url, nil)
|
||||
if err != nil {
|
||||
return nil, node, err
|
||||
}
|
||||
|
||||
for user, pass := range rt.HTTP.APIForService.BasicAuth {
|
||||
req.SetBasicAuth(user, pass)
|
||||
break
|
||||
}
|
||||
|
||||
client := &http.Client{Timeout: 15 * time.Second}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, node, fmt.Errorf("forward to %s failed: %v", node, err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
body, err := io.ReadAll(io.LimitReader(resp.Body, 10*1024*1024)) // 10MB limit
|
||||
if err != nil {
|
||||
return nil, node, err
|
||||
}
|
||||
|
||||
var result struct {
|
||||
Dat loggrep.EventDetailResp `json:"dat"`
|
||||
Err string `json:"err"`
|
||||
}
|
||||
if err := json.Unmarshal(body, &result); err != nil {
|
||||
return nil, node, err
|
||||
}
|
||||
if result.Err != "" {
|
||||
return nil, node, fmt.Errorf("%s", result.Err)
|
||||
}
|
||||
|
||||
return result.Dat.Logs, result.Dat.Instance, nil
|
||||
}
|
||||
@@ -1,19 +1,14 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/pipeline/engine"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/google/uuid"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
// 获取事件Pipeline列表
|
||||
@@ -32,38 +27,18 @@ func (rt *Router) eventPipelinesList(c *gin.Context) {
|
||||
for _, tid := range pipeline.TeamIds {
|
||||
pipeline.TeamNames = append(pipeline.TeamNames, ugMap[tid])
|
||||
}
|
||||
// 兼容处理:自动填充工作流字段
|
||||
pipeline.FillWorkflowFields()
|
||||
}
|
||||
models.FillUpdateByNicknames(rt.Ctx, pipelines)
|
||||
|
||||
gids, err := models.MyGroupIdsMap(rt.Ctx, me.Id)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if me.IsAdmin() {
|
||||
for _, pipeline := range pipelines {
|
||||
if pipeline.TriggerMode == "" {
|
||||
pipeline.TriggerMode = models.TriggerModeEvent
|
||||
}
|
||||
|
||||
if pipeline.UseCase == "" {
|
||||
pipeline.UseCase = models.UseCaseEventPipeline
|
||||
}
|
||||
}
|
||||
ginx.NewRender(c).Data(pipelines, nil)
|
||||
return
|
||||
}
|
||||
|
||||
res := make([]*models.EventPipeline, 0)
|
||||
for _, pipeline := range pipelines {
|
||||
if pipeline.TriggerMode == "" {
|
||||
pipeline.TriggerMode = models.TriggerModeEvent
|
||||
}
|
||||
|
||||
if pipeline.UseCase == "" {
|
||||
pipeline.UseCase = models.UseCaseEventPipeline
|
||||
}
|
||||
|
||||
for _, tid := range pipeline.TeamIds {
|
||||
if _, ok := gids[tid]; ok {
|
||||
res = append(res, pipeline)
|
||||
@@ -86,15 +61,6 @@ func (rt *Router) getEventPipeline(c *gin.Context) {
|
||||
err = pipeline.FillTeamNames(rt.Ctx)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
// 兼容处理:自动填充工作流字段
|
||||
pipeline.FillWorkflowFields()
|
||||
if pipeline.TriggerMode == "" {
|
||||
pipeline.TriggerMode = models.TriggerModeEvent
|
||||
}
|
||||
if pipeline.UseCase == "" {
|
||||
pipeline.UseCase = models.UseCaseEventPipeline
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(pipeline, nil)
|
||||
}
|
||||
|
||||
@@ -165,9 +131,7 @@ func (rt *Router) tryRunEventPipeline(c *gin.Context) {
|
||||
var f struct {
|
||||
EventId int64 `json:"event_id"`
|
||||
PipelineConfig models.EventPipeline `json:"pipeline_config"`
|
||||
InputVariables map[string]string `json:"input_variables,omitempty"`
|
||||
}
|
||||
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
hisEvent, err := models.AlertHisEventGetById(rt.Ctx, f.EventId)
|
||||
@@ -177,33 +141,30 @@ func (rt *Router) tryRunEventPipeline(c *gin.Context) {
|
||||
event := hisEvent.ToCur()
|
||||
|
||||
lang := c.GetHeader("X-Language")
|
||||
me := c.MustGet("user").(*models.User)
|
||||
var result string
|
||||
for _, p := range f.PipelineConfig.ProcessorConfigs {
|
||||
processor, err := models.GetProcessorByType(p.Typ, p.Config)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, "get processor: %+v err: %+v", p, err)
|
||||
}
|
||||
event, result, err = processor.Process(rt.Ctx, event)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, "processor: %+v err: %+v", p, err)
|
||||
}
|
||||
|
||||
// 统一使用工作流引擎执行(兼容线性模式和工作流模式)
|
||||
workflowEngine := engine.NewWorkflowEngine(rt.Ctx)
|
||||
|
||||
triggerCtx := &models.WorkflowTriggerContext{
|
||||
Mode: models.TriggerModeAPI,
|
||||
TriggerBy: me.Username,
|
||||
InputsOverrides: f.InputVariables,
|
||||
}
|
||||
|
||||
resultEvent, result, err := workflowEngine.Execute(&f.PipelineConfig, event, triggerCtx)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, "pipeline execute error: %v", err)
|
||||
if event == nil {
|
||||
ginx.NewRender(c).Data(map[string]interface{}{
|
||||
"event": event,
|
||||
"result": i18n.Sprintf(lang, "event is dropped"),
|
||||
}, nil)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
m := map[string]interface{}{
|
||||
"event": resultEvent,
|
||||
"result": i18n.Sprintf(lang, result.Message),
|
||||
"status": result.Status,
|
||||
"node_results": result.NodeResults,
|
||||
"event": event,
|
||||
"result": i18n.Sprintf(lang, result),
|
||||
}
|
||||
|
||||
if resultEvent == nil {
|
||||
m["result"] = i18n.Sprintf(lang, "event is dropped")
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(m, nil)
|
||||
}
|
||||
|
||||
@@ -225,18 +186,14 @@ func (rt *Router) tryRunEventProcessor(c *gin.Context) {
|
||||
if err != nil {
|
||||
ginx.Bomb(200, "get processor err: %+v", err)
|
||||
}
|
||||
wfCtx := &models.WorkflowContext{
|
||||
Event: event,
|
||||
Vars: make(map[string]interface{}),
|
||||
}
|
||||
wfCtx, res, err := processor.Process(rt.Ctx, wfCtx)
|
||||
event, res, err := processor.Process(rt.Ctx, event)
|
||||
if err != nil {
|
||||
ginx.Bomb(200, "processor err: %+v", err)
|
||||
}
|
||||
|
||||
lang := c.GetHeader("X-Language")
|
||||
ginx.NewRender(c).Data(map[string]interface{}{
|
||||
"event": wfCtx.Event,
|
||||
"event": event,
|
||||
"result": i18n.Sprintf(lang, res),
|
||||
}, nil)
|
||||
}
|
||||
@@ -266,10 +223,6 @@ func (rt *Router) tryRunEventProcessorByNotifyRule(c *gin.Context) {
|
||||
ginx.Bomb(http.StatusBadRequest, "processors not found")
|
||||
}
|
||||
|
||||
wfCtx := &models.WorkflowContext{
|
||||
Event: event,
|
||||
Vars: make(map[string]interface{}),
|
||||
}
|
||||
for _, pl := range pipelines {
|
||||
for _, p := range pl.ProcessorConfigs {
|
||||
processor, err := models.GetProcessorByType(p.Typ, p.Config)
|
||||
@@ -277,14 +230,14 @@ func (rt *Router) tryRunEventProcessorByNotifyRule(c *gin.Context) {
|
||||
ginx.Bomb(http.StatusBadRequest, "get processor: %+v err: %+v", p, err)
|
||||
}
|
||||
|
||||
wfCtx, _, err = processor.Process(rt.Ctx, wfCtx)
|
||||
event, _, err := processor.Process(rt.Ctx, event)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, "processor: %+v err: %+v", p, err)
|
||||
}
|
||||
if wfCtx == nil || wfCtx.Event == nil {
|
||||
if event == nil {
|
||||
lang := c.GetHeader("X-Language")
|
||||
ginx.NewRender(c).Data(map[string]interface{}{
|
||||
"event": nil,
|
||||
"event": event,
|
||||
"result": i18n.Sprintf(lang, "event is dropped"),
|
||||
}, nil)
|
||||
return
|
||||
@@ -292,348 +245,10 @@ func (rt *Router) tryRunEventProcessorByNotifyRule(c *gin.Context) {
|
||||
}
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(wfCtx.Event, nil)
|
||||
ginx.NewRender(c).Data(event, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) eventPipelinesListByService(c *gin.Context) {
|
||||
pipelines, err := models.ListEventPipelines(rt.Ctx)
|
||||
ginx.NewRender(c).Data(pipelines, err)
|
||||
}
|
||||
|
||||
type EventPipelineRequest struct {
|
||||
// 事件数据(可选,如果不传则使用空事件)
|
||||
Event *models.AlertCurEvent `json:"event,omitempty"`
|
||||
// 输入参数覆盖
|
||||
InputsOverrides map[string]string `json:"inputs_overrides,omitempty"`
|
||||
|
||||
Username string `json:"username,omitempty"`
|
||||
}
|
||||
|
||||
// executePipelineTrigger 执行 Pipeline 触发的公共逻辑
|
||||
func (rt *Router) executePipelineTrigger(pipeline *models.EventPipeline, req *EventPipelineRequest, triggerBy string) (string, error) {
|
||||
// 准备事件数据
|
||||
var event *models.AlertCurEvent
|
||||
if req.Event != nil {
|
||||
event = req.Event
|
||||
} else {
|
||||
// 创建空事件
|
||||
event = &models.AlertCurEvent{
|
||||
TriggerTime: time.Now().Unix(),
|
||||
}
|
||||
}
|
||||
|
||||
// 生成执行ID
|
||||
executionID := uuid.New().String()
|
||||
|
||||
// 创建触发上下文
|
||||
triggerCtx := &models.WorkflowTriggerContext{
|
||||
Mode: models.TriggerModeAPI,
|
||||
TriggerBy: triggerBy,
|
||||
InputsOverrides: req.InputsOverrides,
|
||||
RequestID: executionID,
|
||||
}
|
||||
|
||||
// 异步执行工作流
|
||||
go func() {
|
||||
workflowEngine := engine.NewWorkflowEngine(rt.Ctx)
|
||||
_, _, err := workflowEngine.Execute(pipeline, event, triggerCtx)
|
||||
if err != nil {
|
||||
logger.Errorf("async workflow execute error: pipeline_id=%d execution_id=%s err=%v",
|
||||
pipeline.ID, executionID, err)
|
||||
}
|
||||
}()
|
||||
|
||||
return executionID, nil
|
||||
}
|
||||
|
||||
// triggerEventPipelineByService Service 调用触发工作流执行
|
||||
func (rt *Router) triggerEventPipelineByService(c *gin.Context) {
|
||||
pipelineID := ginx.UrlParamInt64(c, "id")
|
||||
var f EventPipelineRequest
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
// 获取 Pipeline
|
||||
pipeline, err := models.GetEventPipeline(rt.Ctx, pipelineID)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusNotFound, "pipeline not found: %v", err)
|
||||
}
|
||||
|
||||
executionID, err := rt.executePipelineTrigger(pipeline, &f, f.Username)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, "%v", err)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(gin.H{
|
||||
"execution_id": executionID,
|
||||
"message": "workflow execution started",
|
||||
}, nil)
|
||||
}
|
||||
|
||||
// triggerEventPipelineByAPI API 触发工作流执行
|
||||
func (rt *Router) triggerEventPipelineByAPI(c *gin.Context) {
|
||||
pipelineID := ginx.UrlParamInt64(c, "id")
|
||||
var f EventPipelineRequest
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
// 获取 Pipeline
|
||||
pipeline, err := models.GetEventPipeline(rt.Ctx, pipelineID)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusNotFound, "pipeline not found: %v", err)
|
||||
}
|
||||
|
||||
// 检查权限
|
||||
me := c.MustGet("user").(*models.User)
|
||||
ginx.Dangerous(me.CheckGroupPermission(rt.Ctx, pipeline.TeamIds))
|
||||
|
||||
executionID, err := rt.executePipelineTrigger(pipeline, &f, me.Username)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, err.Error())
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(gin.H{
|
||||
"execution_id": executionID,
|
||||
"message": "workflow execution started",
|
||||
}, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) listAllEventPipelineExecutions(c *gin.Context) {
|
||||
pipelineId := ginx.QueryInt64(c, "pipeline_id", 0)
|
||||
pipelineName := ginx.QueryStr(c, "pipeline_name", "")
|
||||
mode := ginx.QueryStr(c, "mode", "")
|
||||
status := ginx.QueryStr(c, "status", "")
|
||||
limit := ginx.QueryInt(c, "limit", 20)
|
||||
offset := ginx.QueryInt(c, "p", 1)
|
||||
|
||||
if limit <= 0 || limit > 1000 {
|
||||
limit = 20
|
||||
}
|
||||
if offset <= 0 {
|
||||
offset = 1
|
||||
}
|
||||
|
||||
executions, total, err := models.ListAllEventPipelineExecutions(rt.Ctx, pipelineId, pipelineName, mode, status, limit, (offset-1)*limit)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(gin.H{
|
||||
"list": executions,
|
||||
"total": total,
|
||||
}, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) listEventPipelineExecutions(c *gin.Context) {
|
||||
pipelineID := ginx.UrlParamInt64(c, "id")
|
||||
mode := ginx.QueryStr(c, "mode", "")
|
||||
status := ginx.QueryStr(c, "status", "")
|
||||
limit := ginx.QueryInt(c, "limit", 20)
|
||||
offset := ginx.QueryInt(c, "p", 1)
|
||||
|
||||
if limit <= 0 || limit > 1000 {
|
||||
limit = 20
|
||||
}
|
||||
if offset <= 0 {
|
||||
offset = 1
|
||||
}
|
||||
|
||||
executions, total, err := models.ListEventPipelineExecutions(rt.Ctx, pipelineID, mode, status, limit, (offset-1)*limit)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(gin.H{
|
||||
"list": executions,
|
||||
"total": total,
|
||||
}, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) getEventPipelineExecution(c *gin.Context) {
|
||||
execID := ginx.UrlParamStr(c, "exec_id")
|
||||
|
||||
detail, err := models.GetEventPipelineExecutionDetail(rt.Ctx, execID)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusNotFound, "execution not found: %v", err)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(detail, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) getEventPipelineExecutionStats(c *gin.Context) {
|
||||
pipelineID := ginx.UrlParamInt64(c, "id")
|
||||
|
||||
stats, err := models.GetEventPipelineExecutionStatistics(rt.Ctx, pipelineID)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(stats, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) cleanEventPipelineExecutions(c *gin.Context) {
|
||||
var f struct {
|
||||
BeforeDays int `json:"before_days"`
|
||||
}
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
if f.BeforeDays <= 0 {
|
||||
f.BeforeDays = 30
|
||||
}
|
||||
|
||||
beforeTime := time.Now().AddDate(0, 0, -f.BeforeDays).Unix()
|
||||
affected, err := models.DeleteEventPipelineExecutions(rt.Ctx, beforeTime)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(gin.H{
|
||||
"deleted": affected,
|
||||
}, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) streamEventPipeline(c *gin.Context) {
|
||||
pipelineID := ginx.UrlParamInt64(c, "id")
|
||||
|
||||
var f EventPipelineRequest
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
pipeline, err := models.GetEventPipeline(rt.Ctx, pipelineID)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusNotFound, "pipeline not found: %v", err)
|
||||
}
|
||||
|
||||
me := c.MustGet("user").(*models.User)
|
||||
ginx.Dangerous(me.CheckGroupPermission(rt.Ctx, pipeline.TeamIds))
|
||||
|
||||
var event *models.AlertCurEvent
|
||||
if f.Event != nil {
|
||||
event = f.Event
|
||||
} else {
|
||||
event = &models.AlertCurEvent{
|
||||
TriggerTime: time.Now().Unix(),
|
||||
}
|
||||
}
|
||||
|
||||
triggerCtx := &models.WorkflowTriggerContext{
|
||||
Mode: models.TriggerModeAPI,
|
||||
TriggerBy: me.Username,
|
||||
InputsOverrides: f.InputsOverrides,
|
||||
RequestID: uuid.New().String(),
|
||||
Stream: true, // 流式端点强制启用流式输出
|
||||
}
|
||||
|
||||
workflowEngine := engine.NewWorkflowEngine(rt.Ctx)
|
||||
_, result, err := workflowEngine.Execute(pipeline, event, triggerCtx)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusInternalServerError, "execute failed: %v", err)
|
||||
}
|
||||
|
||||
if result.Stream && result.StreamChan != nil {
|
||||
rt.handleStreamResponse(c, result, triggerCtx.RequestID)
|
||||
return
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(result, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) handleStreamResponse(c *gin.Context, result *models.WorkflowResult, requestID string) {
|
||||
// 设置 SSE 响应头
|
||||
c.Header("Content-Type", "text/event-stream")
|
||||
c.Header("Cache-Control", "no-cache")
|
||||
c.Header("Connection", "keep-alive")
|
||||
c.Header("X-Accel-Buffering", "no") // 禁用 nginx 缓冲
|
||||
c.Header("X-Request-ID", requestID)
|
||||
|
||||
flusher, ok := c.Writer.(http.Flusher)
|
||||
if !ok {
|
||||
ginx.Bomb(http.StatusInternalServerError, "streaming not supported")
|
||||
return
|
||||
}
|
||||
|
||||
// 发送初始连接成功消息
|
||||
initData := fmt.Sprintf(`{"type":"connected","request_id":"%s","timestamp":%d}`, requestID, time.Now().UnixMilli())
|
||||
fmt.Fprintf(c.Writer, "data: %s\n\n", initData)
|
||||
flusher.Flush()
|
||||
|
||||
// 从 channel 读取并发送 SSE
|
||||
timeout := time.After(30 * time.Minute) // 最长流式输出时间
|
||||
for {
|
||||
select {
|
||||
case chunk, ok := <-result.StreamChan:
|
||||
if !ok {
|
||||
// channel 关闭,发送结束标记
|
||||
return
|
||||
}
|
||||
|
||||
data, err := json.Marshal(chunk)
|
||||
if err != nil {
|
||||
logger.Errorf("stream: failed to marshal chunk: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
fmt.Fprintf(c.Writer, "data: %s\n\n", data)
|
||||
flusher.Flush()
|
||||
|
||||
if chunk.Done {
|
||||
return
|
||||
}
|
||||
|
||||
case <-c.Request.Context().Done():
|
||||
// 客户端断开连接
|
||||
logger.Infof("stream: client disconnected, request_id=%s", requestID)
|
||||
return
|
||||
case <-timeout:
|
||||
logger.Errorf("stream: timeout, request_id=%s", requestID)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (rt *Router) streamEventPipelineByService(c *gin.Context) {
|
||||
pipelineID := ginx.UrlParamInt64(c, "id")
|
||||
|
||||
var f EventPipelineRequest
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
pipeline, err := models.GetEventPipeline(rt.Ctx, pipelineID)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusNotFound, "pipeline not found: %v", err)
|
||||
}
|
||||
|
||||
var event *models.AlertCurEvent
|
||||
if f.Event != nil {
|
||||
event = f.Event
|
||||
} else {
|
||||
event = &models.AlertCurEvent{
|
||||
TriggerTime: time.Now().Unix(),
|
||||
}
|
||||
}
|
||||
|
||||
triggerCtx := &models.WorkflowTriggerContext{
|
||||
Mode: models.TriggerModeAPI,
|
||||
TriggerBy: f.Username,
|
||||
InputsOverrides: f.InputsOverrides,
|
||||
RequestID: uuid.New().String(),
|
||||
Stream: true, // 流式端点强制启用流式输出
|
||||
}
|
||||
|
||||
workflowEngine := engine.NewWorkflowEngine(rt.Ctx)
|
||||
_, result, err := workflowEngine.Execute(pipeline, event, triggerCtx)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusInternalServerError, "execute failed: %v", err)
|
||||
}
|
||||
|
||||
// 检查是否是流式输出
|
||||
if result.Stream && result.StreamChan != nil {
|
||||
rt.handleStreamResponse(c, result, triggerCtx.RequestID)
|
||||
return
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(result, nil)
|
||||
}
|
||||
|
||||
// eventPipelineExecutionAdd 接收 edge 节点同步的 Pipeline 执行记录
|
||||
func (rt *Router) eventPipelineExecutionAdd(c *gin.Context) {
|
||||
var execution models.EventPipelineExecution
|
||||
ginx.BindJSON(c, &execution)
|
||||
|
||||
if execution.ID == "" {
|
||||
ginx.Bomb(http.StatusBadRequest, "id is required")
|
||||
}
|
||||
if execution.PipelineID <= 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, "pipeline_id is required")
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Message(models.DB(rt.Ctx).Create(&execution).Error)
|
||||
}
|
||||
|
||||
@@ -7,9 +7,9 @@ import (
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
const defaultLimit = 300
|
||||
|
||||
@@ -15,9 +15,9 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pushgw/idents"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
|
||||
@@ -12,18 +12,17 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/cas"
|
||||
"github.com/ccfos/nightingale/v6/pkg/dingtalk"
|
||||
"github.com/ccfos/nightingale/v6/pkg/feishu"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ldapx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/logx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/oauth2x"
|
||||
"github.com/ccfos/nightingale/v6/pkg/oidcx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/secu"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/dgrijalva/jwt-go"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/pelletier/go-toml/v2"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
"gorm.io/gorm"
|
||||
)
|
||||
|
||||
@@ -37,9 +36,7 @@ type loginForm struct {
|
||||
func (rt *Router) loginPost(c *gin.Context) {
|
||||
var f loginForm
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
rctx := c.Request.Context()
|
||||
logx.Infof(rctx, "username:%s login from:%s", f.Username, c.ClientIP())
|
||||
logger.Infof("username:%s login from:%s", f.Username, c.ClientIP())
|
||||
|
||||
if rt.HTTP.ShowCaptcha.Enable {
|
||||
if !CaptchaVerify(f.Captchaid, f.Verifyvalue) {
|
||||
@@ -52,25 +49,23 @@ func (rt *Router) loginPost(c *gin.Context) {
|
||||
if rt.HTTP.RSA.OpenRSA {
|
||||
decPassWord, err := secu.Decrypt(f.Password, rt.HTTP.RSA.RSAPrivateKey, rt.HTTP.RSA.RSAPassWord)
|
||||
if err != nil {
|
||||
logx.Errorf(rctx, "RSA Decrypt failed: %v username: %s", err, f.Username)
|
||||
logger.Errorf("RSA Decrypt failed: %v username: %s", err, f.Username)
|
||||
ginx.NewRender(c).Message(err)
|
||||
return
|
||||
}
|
||||
authPassWord = decPassWord
|
||||
}
|
||||
|
||||
reqCtx := rt.Ctx.WithContext(rctx)
|
||||
|
||||
var user *models.User
|
||||
var err error
|
||||
lc := rt.Sso.LDAP.Copy()
|
||||
if lc.Enable {
|
||||
user, err = ldapx.LdapLogin(reqCtx, f.Username, authPassWord, lc.DefaultRoles, lc.DefaultTeams, lc)
|
||||
user, err = ldapx.LdapLogin(rt.Ctx, f.Username, authPassWord, lc.DefaultRoles, lc.DefaultTeams, lc)
|
||||
if err != nil {
|
||||
logx.Debugf(rctx, "ldap login failed: %v username: %s", err, f.Username)
|
||||
logger.Debugf("ldap login failed: %v username: %s", err, f.Username)
|
||||
var errLoginInN9e error
|
||||
// to use n9e as the minimum guarantee for login
|
||||
if user, errLoginInN9e = models.PassLogin(reqCtx, rt.Redis, f.Username, authPassWord); errLoginInN9e != nil {
|
||||
if user, errLoginInN9e = models.PassLogin(rt.Ctx, rt.Redis, f.Username, authPassWord); errLoginInN9e != nil {
|
||||
ginx.NewRender(c).Message("ldap login failed: %v; n9e login failed: %v", err, errLoginInN9e)
|
||||
return
|
||||
}
|
||||
@@ -78,7 +73,7 @@ func (rt *Router) loginPost(c *gin.Context) {
|
||||
user.RolesLst = strings.Fields(user.Roles)
|
||||
}
|
||||
} else {
|
||||
user, err = models.PassLogin(reqCtx, rt.Redis, f.Username, authPassWord)
|
||||
user, err = models.PassLogin(rt.Ctx, rt.Redis, f.Username, authPassWord)
|
||||
ginx.Dangerous(err)
|
||||
}
|
||||
|
||||
@@ -102,8 +97,7 @@ func (rt *Router) loginPost(c *gin.Context) {
|
||||
}
|
||||
|
||||
func (rt *Router) logoutPost(c *gin.Context) {
|
||||
rctx := c.Request.Context()
|
||||
logx.Infof(rctx, "username:%s logout from:%s", c.GetString("username"), c.ClientIP())
|
||||
logger.Infof("username:%s logout from:%s", c.GetString("username"), c.ClientIP())
|
||||
metadata, err := rt.extractTokenMetadata(c.Request)
|
||||
if err != nil {
|
||||
ginx.NewRender(c, http.StatusBadRequest).Message("failed to parse jwt token")
|
||||
@@ -122,7 +116,7 @@ func (rt *Router) logoutPost(c *gin.Context) {
|
||||
// 获取用户的 id_token
|
||||
idToken, err := rt.fetchIdToken(c.Request.Context(), user.Id)
|
||||
if err != nil {
|
||||
logx.Debugf(rctx, "fetch id_token failed: %v, user_id: %d", err, user.Id)
|
||||
logger.Debugf("fetch id_token failed: %v, user_id: %d", err, user.Id)
|
||||
idToken = "" // 如果获取失败,使用空字符串
|
||||
}
|
||||
|
||||
@@ -225,7 +219,7 @@ func (rt *Router) refreshPost(c *gin.Context) {
|
||||
// 注意:这里不会获取新的 id_token,只是延长 Redis 中现有 id_token 的 TTL
|
||||
if idToken, err := rt.fetchIdToken(c.Request.Context(), userid); err == nil && idToken != "" {
|
||||
if err := rt.saveIdToken(c.Request.Context(), userid, idToken); err != nil {
|
||||
logx.Debugf(c.Request.Context(), "refresh id_token ttl failed: %v, user_id: %d", err, userid)
|
||||
logger.Debugf("refresh id_token ttl failed: %v, user_id: %d", err, userid)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -276,13 +270,12 @@ type CallbackOutput struct {
|
||||
}
|
||||
|
||||
func (rt *Router) loginCallback(c *gin.Context) {
|
||||
rctx := c.Request.Context()
|
||||
code := ginx.QueryStr(c, "code", "")
|
||||
state := ginx.QueryStr(c, "state", "")
|
||||
|
||||
ret, err := rt.Sso.OIDC.Callback(rt.Redis, rctx, code, state)
|
||||
ret, err := rt.Sso.OIDC.Callback(rt.Redis, c.Request.Context(), code, state)
|
||||
if err != nil {
|
||||
logx.Errorf(rctx, "sso_callback fail. code:%s, state:%s, get ret: %+v. error: %v", code, state, ret, err)
|
||||
logger.Errorf("sso_callback fail. code:%s, state:%s, get ret: %+v. error: %v", code, state, ret, err)
|
||||
ginx.NewRender(c).Data(CallbackOutput{}, err)
|
||||
return
|
||||
}
|
||||
@@ -305,7 +298,7 @@ func (rt *Router) loginCallback(c *gin.Context) {
|
||||
for _, gid := range rt.Sso.OIDC.DefaultTeams {
|
||||
err = models.UserGroupMemberAdd(rt.Ctx, gid, user.Id)
|
||||
if err != nil {
|
||||
logx.Errorf(rctx, "user:%v UserGroupMemberAdd: %s", user, err)
|
||||
logger.Errorf("user:%v UserGroupMemberAdd: %s", user, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -315,12 +308,12 @@ func (rt *Router) loginCallback(c *gin.Context) {
|
||||
userIdentity := fmt.Sprintf("%d-%s", user.Id, user.Username)
|
||||
ts, err := rt.createTokens(rt.HTTP.JWTAuth.SigningKey, userIdentity)
|
||||
ginx.Dangerous(err)
|
||||
ginx.Dangerous(rt.createAuth(rctx, userIdentity, ts))
|
||||
ginx.Dangerous(rt.createAuth(c.Request.Context(), userIdentity, ts))
|
||||
|
||||
// 保存 id_token 到 Redis,用于登出时使用
|
||||
if ret.IdToken != "" {
|
||||
if err := rt.saveIdToken(rctx, user.Id, ret.IdToken); err != nil {
|
||||
logx.Errorf(rctx, "save id_token failed: %v, user_id: %d", err, user.Id)
|
||||
if err := rt.saveIdToken(c.Request.Context(), user.Id, ret.IdToken); err != nil {
|
||||
logger.Errorf("save id_token failed: %v, user_id: %d", err, user.Id)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -361,7 +354,7 @@ func (rt *Router) loginRedirectCas(c *gin.Context) {
|
||||
}
|
||||
|
||||
if !rt.Sso.CAS.Enable {
|
||||
logx.Errorf(c.Request.Context(), "cas is not enable")
|
||||
logger.Error("cas is not enable")
|
||||
ginx.NewRender(c).Data("", nil)
|
||||
return
|
||||
}
|
||||
@@ -376,18 +369,17 @@ func (rt *Router) loginRedirectCas(c *gin.Context) {
|
||||
}
|
||||
|
||||
func (rt *Router) loginCallbackCas(c *gin.Context) {
|
||||
rctx := c.Request.Context()
|
||||
ticket := ginx.QueryStr(c, "ticket", "")
|
||||
state := ginx.QueryStr(c, "state", "")
|
||||
ret, err := rt.Sso.CAS.ValidateServiceTicket(rctx, ticket, state, rt.Redis)
|
||||
ret, err := rt.Sso.CAS.ValidateServiceTicket(c.Request.Context(), ticket, state, rt.Redis)
|
||||
if err != nil {
|
||||
logx.Errorf(rctx, "ValidateServiceTicket: %s", err)
|
||||
logger.Errorf("ValidateServiceTicket: %s", err)
|
||||
ginx.NewRender(c).Data("", err)
|
||||
return
|
||||
}
|
||||
user, err := models.UserGet(rt.Ctx, "username=?", ret.Username)
|
||||
if err != nil {
|
||||
logx.Errorf(rctx, "UserGet: %s", err)
|
||||
logger.Errorf("UserGet: %s", err)
|
||||
}
|
||||
ginx.Dangerous(err)
|
||||
if user != nil {
|
||||
@@ -406,10 +398,10 @@ func (rt *Router) loginCallbackCas(c *gin.Context) {
|
||||
userIdentity := fmt.Sprintf("%d-%s", user.Id, user.Username)
|
||||
ts, err := rt.createTokens(rt.HTTP.JWTAuth.SigningKey, userIdentity)
|
||||
if err != nil {
|
||||
logx.Errorf(rctx, "createTokens: %s", err)
|
||||
logger.Errorf("createTokens: %s", err)
|
||||
}
|
||||
ginx.Dangerous(err)
|
||||
ginx.Dangerous(rt.createAuth(rctx, userIdentity, ts))
|
||||
ginx.Dangerous(rt.createAuth(c.Request.Context(), userIdentity, ts))
|
||||
|
||||
redirect := "/"
|
||||
if ret.Redirect != "/login" {
|
||||
@@ -482,13 +474,12 @@ func (rt *Router) loginRedirectDingTalk(c *gin.Context) {
|
||||
}
|
||||
|
||||
func (rt *Router) loginCallbackDingTalk(c *gin.Context) {
|
||||
rctx := c.Request.Context()
|
||||
code := ginx.QueryStr(c, "code", "")
|
||||
state := ginx.QueryStr(c, "state", "")
|
||||
|
||||
ret, err := rt.Sso.DingTalk.Callback(rt.Redis, rctx, code, state)
|
||||
ret, err := rt.Sso.DingTalk.Callback(rt.Redis, c.Request.Context(), code, state)
|
||||
if err != nil {
|
||||
logx.Errorf(rctx, "sso_callback DingTalk fail. code:%s, state:%s, get ret: %+v. error: %v", code, state, ret, err)
|
||||
logger.Errorf("sso_callback DingTalk fail. code:%s, state:%s, get ret: %+v. error: %v", code, state, ret, err)
|
||||
ginx.NewRender(c).Data(CallbackOutput{}, err)
|
||||
return
|
||||
}
|
||||
@@ -528,104 +519,13 @@ func (rt *Router) loginCallbackDingTalk(c *gin.Context) {
|
||||
|
||||
}
|
||||
|
||||
func (rt *Router) loginRedirectFeiShu(c *gin.Context) {
|
||||
redirect := ginx.QueryStr(c, "redirect", "/")
|
||||
|
||||
v, exists := c.Get("userid")
|
||||
if exists {
|
||||
userid := v.(int64)
|
||||
user, err := models.UserGetById(rt.Ctx, userid)
|
||||
ginx.Dangerous(err)
|
||||
if user == nil {
|
||||
ginx.Bomb(200, "user not found")
|
||||
}
|
||||
|
||||
if user.Username != "" { // already login
|
||||
ginx.NewRender(c).Data(redirect, nil)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if rt.Sso.FeiShu == nil || !rt.Sso.FeiShu.Enable {
|
||||
ginx.NewRender(c).Data("", nil)
|
||||
return
|
||||
}
|
||||
|
||||
redirect, err := rt.Sso.FeiShu.Authorize(rt.Redis, redirect)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(redirect, err)
|
||||
}
|
||||
|
||||
func (rt *Router) loginCallbackFeiShu(c *gin.Context) {
|
||||
rctx := c.Request.Context()
|
||||
code := ginx.QueryStr(c, "code", "")
|
||||
state := ginx.QueryStr(c, "state", "")
|
||||
|
||||
ret, err := rt.Sso.FeiShu.Callback(rt.Redis, rctx, code, state)
|
||||
if err != nil {
|
||||
logx.Errorf(rctx, "sso_callback FeiShu fail. code:%s, state:%s, get ret: %+v. error: %v", code, state, ret, err)
|
||||
ginx.NewRender(c).Data(CallbackOutput{}, err)
|
||||
return
|
||||
}
|
||||
|
||||
user, err := models.UserGet(rt.Ctx, "username=?", ret.Username)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if user != nil {
|
||||
if rt.Sso.FeiShu != nil && rt.Sso.FeiShu.FeiShuConfig != nil && rt.Sso.FeiShu.FeiShuConfig.CoverAttributes {
|
||||
updatedFields := user.UpdateSsoFields(feishu.SsoTypeName, ret.Nickname, ret.Phone, ret.Email)
|
||||
ginx.Dangerous(user.Update(rt.Ctx, "update_at", updatedFields...))
|
||||
}
|
||||
} else {
|
||||
user = new(models.User)
|
||||
defaultRoles := []string{}
|
||||
defaultUserGroups := []int64{}
|
||||
if rt.Sso.FeiShu != nil && rt.Sso.FeiShu.FeiShuConfig != nil {
|
||||
defaultRoles = rt.Sso.FeiShu.FeiShuConfig.DefaultRoles
|
||||
defaultUserGroups = rt.Sso.FeiShu.FeiShuConfig.DefaultUserGroups
|
||||
}
|
||||
|
||||
user.FullSsoFields(feishu.SsoTypeName, ret.Username, ret.Nickname, ret.Phone, ret.Email, defaultRoles)
|
||||
ginx.Dangerous(user.Add(rt.Ctx))
|
||||
|
||||
if len(defaultUserGroups) > 0 {
|
||||
err = user.AddToUserGroups(rt.Ctx, defaultUserGroups)
|
||||
if err != nil {
|
||||
logx.Errorf(rctx, "sso feishu add user group error %v %v", ret, err)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// set user login state
|
||||
userIdentity := fmt.Sprintf("%d-%s", user.Id, user.Username)
|
||||
ts, err := rt.createTokens(rt.HTTP.JWTAuth.SigningKey, userIdentity)
|
||||
ginx.Dangerous(err)
|
||||
ginx.Dangerous(rt.createAuth(c.Request.Context(), userIdentity, ts))
|
||||
|
||||
redirect := "/"
|
||||
if ret.Redirect != "/login" {
|
||||
redirect = ret.Redirect
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(CallbackOutput{
|
||||
Redirect: redirect,
|
||||
User: user,
|
||||
AccessToken: ts.AccessToken,
|
||||
RefreshToken: ts.RefreshToken,
|
||||
}, nil)
|
||||
|
||||
}
|
||||
|
||||
func (rt *Router) loginCallbackOAuth(c *gin.Context) {
|
||||
rctx := c.Request.Context()
|
||||
code := ginx.QueryStr(c, "code", "")
|
||||
state := ginx.QueryStr(c, "state", "")
|
||||
|
||||
ret, err := rt.Sso.OAuth2.Callback(rt.Redis, rctx, code, state)
|
||||
ret, err := rt.Sso.OAuth2.Callback(rt.Redis, c.Request.Context(), code, state)
|
||||
if err != nil {
|
||||
logx.Debugf(rctx, "sso.callback() get ret %+v error %v", ret, err)
|
||||
logger.Debugf("sso.callback() get ret %+v error %v", ret, err)
|
||||
ginx.NewRender(c).Data(CallbackOutput{}, err)
|
||||
return
|
||||
}
|
||||
@@ -669,11 +569,10 @@ type SsoConfigOutput struct {
|
||||
CasDisplayName string `json:"casDisplayName"`
|
||||
OauthDisplayName string `json:"oauthDisplayName"`
|
||||
DingTalkDisplayName string `json:"dingTalkDisplayName"`
|
||||
FeiShuDisplayName string `json:"feishuDisplayName"`
|
||||
}
|
||||
|
||||
func (rt *Router) ssoConfigNameGet(c *gin.Context) {
|
||||
var oidcDisplayName, casDisplayName, oauthDisplayName, dingTalkDisplayName, feiShuDisplayName string
|
||||
var oidcDisplayName, casDisplayName, oauthDisplayName, dingTalkDisplayName string
|
||||
if rt.Sso.OIDC != nil {
|
||||
oidcDisplayName = rt.Sso.OIDC.GetDisplayName()
|
||||
}
|
||||
@@ -690,16 +589,11 @@ func (rt *Router) ssoConfigNameGet(c *gin.Context) {
|
||||
dingTalkDisplayName = rt.Sso.DingTalk.GetDisplayName()
|
||||
}
|
||||
|
||||
if rt.Sso.FeiShu != nil {
|
||||
feiShuDisplayName = rt.Sso.FeiShu.GetDisplayName()
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(SsoConfigOutput{
|
||||
OidcDisplayName: oidcDisplayName,
|
||||
CasDisplayName: casDisplayName,
|
||||
OauthDisplayName: oauthDisplayName,
|
||||
DingTalkDisplayName: dingTalkDisplayName,
|
||||
FeiShuDisplayName: feiShuDisplayName,
|
||||
}, nil)
|
||||
}
|
||||
|
||||
@@ -714,7 +608,6 @@ func (rt *Router) ssoConfigGets(c *gin.Context) {
|
||||
|
||||
// TODO: dingTalkExist 为了兼容当前前端配置, 后期单点登陆统一调整后不在预先设置默认内容
|
||||
dingTalkExist := false
|
||||
feiShuExist := false
|
||||
for _, config := range lst {
|
||||
var ssoReqConfig models.SsoConfig
|
||||
ssoReqConfig.Id = config.Id
|
||||
@@ -725,10 +618,6 @@ func (rt *Router) ssoConfigGets(c *gin.Context) {
|
||||
dingTalkExist = true
|
||||
err := json.Unmarshal([]byte(config.Content), &ssoReqConfig.SettingJson)
|
||||
ginx.Dangerous(err)
|
||||
case feishu.SsoTypeName:
|
||||
feiShuExist = true
|
||||
err := json.Unmarshal([]byte(config.Content), &ssoReqConfig.SettingJson)
|
||||
ginx.Dangerous(err)
|
||||
default:
|
||||
ssoReqConfig.Content = config.Content
|
||||
}
|
||||
@@ -741,11 +630,6 @@ func (rt *Router) ssoConfigGets(c *gin.Context) {
|
||||
ssoConfig.Name = dingtalk.SsoTypeName
|
||||
ssoConfigs = append(ssoConfigs, ssoConfig)
|
||||
}
|
||||
if !feiShuExist {
|
||||
var ssoConfig models.SsoConfig
|
||||
ssoConfig.Name = feishu.SsoTypeName
|
||||
ssoConfigs = append(ssoConfigs, ssoConfig)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(ssoConfigs, nil)
|
||||
}
|
||||
@@ -773,23 +657,6 @@ func (rt *Router) ssoConfigUpdate(c *gin.Context) {
|
||||
err = f.Update(rt.Ctx)
|
||||
}
|
||||
ginx.Dangerous(err)
|
||||
case feishu.SsoTypeName:
|
||||
f.Name = ssoConfig.Name
|
||||
setting, err := json.Marshal(ssoConfig.SettingJson)
|
||||
ginx.Dangerous(err)
|
||||
f.Content = string(setting)
|
||||
f.UpdateAt = time.Now().Unix()
|
||||
sso, err := f.Query(rt.Ctx)
|
||||
if !errors.Is(err, gorm.ErrRecordNotFound) {
|
||||
ginx.Dangerous(err)
|
||||
}
|
||||
if errors.Is(err, gorm.ErrRecordNotFound) {
|
||||
err = f.Create(rt.Ctx)
|
||||
} else {
|
||||
f.Id = sso.Id
|
||||
err = f.Update(rt.Ctx)
|
||||
}
|
||||
ginx.Dangerous(err)
|
||||
default:
|
||||
f.Id = ssoConfig.Id
|
||||
f.Name = ssoConfig.Name
|
||||
@@ -828,14 +695,6 @@ func (rt *Router) ssoConfigUpdate(c *gin.Context) {
|
||||
rt.Sso.DingTalk = dingtalk.New(config)
|
||||
}
|
||||
rt.Sso.DingTalk.Reload(config)
|
||||
case feishu.SsoTypeName:
|
||||
var config feishu.Config
|
||||
err := json.Unmarshal([]byte(f.Content), &config)
|
||||
ginx.Dangerous(err)
|
||||
if rt.Sso.FeiShu == nil {
|
||||
rt.Sso.FeiShu = feishu.New(config)
|
||||
}
|
||||
rt.Sso.FeiShu.Reload(config)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Message(nil)
|
||||
|
||||
@@ -12,10 +12,10 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/pkg/slice"
|
||||
"github.com/ccfos/nightingale/v6/pkg/strx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/tplx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/google/uuid"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func (rt *Router) messageTemplatesAdd(c *gin.Context) {
|
||||
@@ -154,7 +154,6 @@ func (rt *Router) messageTemplatesGet(c *gin.Context) {
|
||||
|
||||
lst, err := models.MessageTemplatesGetBy(rt.Ctx, notifyChannelIdents)
|
||||
ginx.Dangerous(err)
|
||||
models.FillUpdateByNicknames(rt.Ctx, lst)
|
||||
|
||||
if me.IsAdmin() {
|
||||
ginx.NewRender(c).Data(lst, nil)
|
||||
|
||||
@@ -2,9 +2,9 @@ package router
|
||||
|
||||
import (
|
||||
"github.com/ccfos/nightingale/v6/center/cconf"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func (rt *Router) metricsDescGetFile(c *gin.Context) {
|
||||
|
||||
@@ -4,9 +4,9 @@ import (
|
||||
"net/http"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
// no param
|
||||
|
||||
@@ -9,9 +9,9 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/alert/mute"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/strx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
)
|
||||
|
||||
@@ -22,9 +22,6 @@ func (rt *Router) alertMuteGetsByBG(c *gin.Context) {
|
||||
query := ginx.QueryStr(c, "query", "")
|
||||
expired := ginx.QueryInt(c, "expired", -1)
|
||||
lst, err := models.AlertMuteGets(rt.Ctx, prods, bgid, -1, expired, query)
|
||||
if err == nil {
|
||||
models.FillUpdateByNicknames(rt.Ctx, lst)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
@@ -50,9 +47,6 @@ func (rt *Router) alertMuteGetsByGids(c *gin.Context) {
|
||||
}
|
||||
|
||||
lst, err := models.AlertMuteGetsByBGIds(rt.Ctx, gids)
|
||||
if err == nil {
|
||||
models.FillUpdateByNicknames(rt.Ctx, lst)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
@@ -64,9 +58,6 @@ func (rt *Router) alertMuteGets(c *gin.Context) {
|
||||
disabled := ginx.QueryInt(c, "disabled", -1)
|
||||
expired := ginx.QueryInt(c, "expired", -1)
|
||||
lst, err := models.AlertMuteGets(rt.Ctx, prods, bgid, disabled, expired, query)
|
||||
if err == nil {
|
||||
models.FillUpdateByNicknames(rt.Ctx, lst)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
|
||||
@@ -11,11 +11,11 @@ import (
|
||||
|
||||
"github.com/ccfos/nightingale/v6/center/cstats"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/golang-jwt/jwt"
|
||||
"github.com/google/uuid"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
const (
|
||||
|
||||
@@ -6,9 +6,9 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/alert/sender"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
|
||||
@@ -11,8 +11,8 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func (rt *Router) notifyChannelsAdd(c *gin.Context) {
|
||||
@@ -118,9 +118,6 @@ func (rt *Router) notifyChannelGetBy(c *gin.Context) {
|
||||
|
||||
func (rt *Router) notifyChannelsGet(c *gin.Context) {
|
||||
lst, err := models.NotifyChannelsGet(rt.Ctx, "", nil)
|
||||
if err == nil {
|
||||
models.FillUpdateByNicknames(rt.Ctx, lst)
|
||||
}
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
|
||||
|
||||
@@ -10,10 +10,10 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/memsto"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/tplx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/pelletier/go-toml/v2"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/str"
|
||||
)
|
||||
|
||||
|
||||
@@ -10,9 +10,9 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/slice"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
@@ -118,7 +118,6 @@ func (rt *Router) notifyRulesGet(c *gin.Context) {
|
||||
|
||||
lst, err := models.NotifyRulesGet(rt.Ctx, "", nil)
|
||||
ginx.Dangerous(err)
|
||||
models.FillUpdateByNicknames(rt.Ctx, lst)
|
||||
if me.IsAdmin() {
|
||||
ginx.NewRender(c).Data(lst, nil)
|
||||
return
|
||||
@@ -222,7 +221,7 @@ func SendNotifyChannelMessage(ctx *ctx.Context, userCache *memsto.UserCacheType,
|
||||
return "", fmt.Errorf("failed to send flashduty notify: %v", err)
|
||||
}
|
||||
}
|
||||
logger.Infof("channel_name: %v, event:%s, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0].Hash, tplContent, customParams, resp, err)
|
||||
logger.Infof("channel_name: %v, event:%+v, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0], tplContent, customParams, resp, err)
|
||||
return resp, nil
|
||||
case "pagerduty":
|
||||
client, err := models.GetHTTPClient(notifyChannel)
|
||||
@@ -236,7 +235,7 @@ func SendNotifyChannelMessage(ctx *ctx.Context, userCache *memsto.UserCacheType,
|
||||
return "", fmt.Errorf("failed to send pagerduty notify: %v", err)
|
||||
}
|
||||
}
|
||||
logger.Infof("channel_name: %v, event:%s, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0].Hash, tplContent, customParams, resp, err)
|
||||
logger.Infof("channel_name: %v, event:%+v, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0], tplContent, customParams, resp, err)
|
||||
return resp, nil
|
||||
case "http":
|
||||
client, err := models.GetHTTPClient(notifyChannel)
|
||||
@@ -254,7 +253,7 @@ func SendNotifyChannelMessage(ctx *ctx.Context, userCache *memsto.UserCacheType,
|
||||
|
||||
if dispatch.NeedBatchContacts(notifyChannel.RequestConfig.HTTPRequestConfig) || len(sendtos) == 0 {
|
||||
resp, err = notifyChannel.SendHTTP(events, tplContent, customParams, sendtos, client)
|
||||
logger.Infof("channel_name: %v, event:%s, sendtos:%+v, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0].Hash, sendtos, tplContent, customParams, resp, err)
|
||||
logger.Infof("channel_name: %v, event:%+v, sendtos:%+v, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0], sendtos, tplContent, customParams, resp, err)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to send http notify: %v", err)
|
||||
}
|
||||
@@ -262,7 +261,7 @@ func SendNotifyChannelMessage(ctx *ctx.Context, userCache *memsto.UserCacheType,
|
||||
} else {
|
||||
for i := range sendtos {
|
||||
resp, err = notifyChannel.SendHTTP(events, tplContent, customParams, []string{sendtos[i]}, client)
|
||||
logger.Infof("channel_name: %v, event:%s, tplContent:%s, customParams:%v, sendto:%+v, respBody: %v, err: %v", notifyChannel.Name, events[0].Hash, tplContent, customParams, sendtos[i], resp, err)
|
||||
logger.Infof("channel_name: %v, event:%+v, tplContent:%s, customParams:%v, sendto:%+v, respBody: %v, err: %v", notifyChannel.Name, events[0], tplContent, customParams, sendtos[i], resp, err)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to send http notify: %v", err)
|
||||
}
|
||||
@@ -281,7 +280,7 @@ func SendNotifyChannelMessage(ctx *ctx.Context, userCache *memsto.UserCacheType,
|
||||
return resp, nil
|
||||
case "script":
|
||||
resp, _, err := notifyChannel.SendScript(events, tplContent, customParams, sendtos)
|
||||
logger.Infof("channel_name: %v, event:%s, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0].Hash, tplContent, customParams, resp, err)
|
||||
logger.Infof("channel_name: %v, event:%+v, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0], tplContent, customParams, resp, err)
|
||||
return resp, err
|
||||
default:
|
||||
logger.Errorf("unsupported request type: %v", notifyChannel.RequestType)
|
||||
|
||||
@@ -11,9 +11,9 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/center/cconf"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/tplx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/str"
|
||||
)
|
||||
|
||||
@@ -25,14 +25,11 @@ func (rt *Router) notifyTplGets(c *gin.Context) {
|
||||
m[models.EmailSubject] = struct{}{}
|
||||
|
||||
lst, err := models.NotifyTplGets(rt.Ctx)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
for i := 0; i < len(lst); i++ {
|
||||
if _, exists := m[lst[i].Channel]; exists {
|
||||
lst[i].BuiltIn = true
|
||||
}
|
||||
}
|
||||
models.FillUpdateByNicknames(rt.Ctx, lst)
|
||||
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
@@ -203,9 +200,6 @@ func (rt *Router) messageTemplateGets(c *gin.Context) {
|
||||
ident := ginx.QueryStr(c, "ident", "")
|
||||
|
||||
tpls, err := models.MessageTemplateGets(rt.Ctx, id, name, ident)
|
||||
if err == nil {
|
||||
models.FillUpdateByNicknames(rt.Ctx, tpls)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(tpls, err)
|
||||
}
|
||||
|
||||
@@ -3,9 +3,9 @@ package router
|
||||
import (
|
||||
"github.com/ccfos/nightingale/v6/datasource/opensearch"
|
||||
"github.com/ccfos/nightingale/v6/dscache"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@ package router
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/http"
|
||||
@@ -12,13 +13,12 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/pkg/logx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/poster"
|
||||
pkgprom "github.com/ccfos/nightingale/v6/pkg/prom"
|
||||
"github.com/ccfos/nightingale/v6/prom"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
"github.com/toolkits/pkg/net/httplib"
|
||||
)
|
||||
@@ -39,16 +39,15 @@ func (rt *Router) promBatchQueryRange(c *gin.Context) {
|
||||
var f BatchQueryForm
|
||||
ginx.Dangerous(c.BindJSON(&f))
|
||||
|
||||
lst, err := PromBatchQueryRange(c.Request.Context(), rt.PromClients, f)
|
||||
lst, err := PromBatchQueryRange(rt.PromClients, f)
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
|
||||
func PromBatchQueryRange(ctx context.Context, pc *prom.PromClientMap, f BatchQueryForm) ([]model.Value, error) {
|
||||
func PromBatchQueryRange(pc *prom.PromClientMap, f BatchQueryForm) ([]model.Value, error) {
|
||||
var lst []model.Value
|
||||
|
||||
cli := pc.GetCli(f.DatasourceId)
|
||||
if cli == nil {
|
||||
logx.Warningf(ctx, "no such datasource id: %d", f.DatasourceId)
|
||||
return lst, fmt.Errorf("no such datasource id: %d", f.DatasourceId)
|
||||
}
|
||||
|
||||
@@ -59,9 +58,8 @@ func PromBatchQueryRange(ctx context.Context, pc *prom.PromClientMap, f BatchQue
|
||||
Step: time.Duration(item.Step) * time.Second,
|
||||
}
|
||||
|
||||
resp, _, err := cli.QueryRange(ctx, item.Query, r)
|
||||
resp, _, err := cli.QueryRange(context.Background(), item.Query, r)
|
||||
if err != nil {
|
||||
logx.Warningf(ctx, "query range error: query:%s err:%v", item.Query, err)
|
||||
return lst, err
|
||||
}
|
||||
|
||||
@@ -84,23 +82,22 @@ func (rt *Router) promBatchQueryInstant(c *gin.Context) {
|
||||
var f BatchInstantForm
|
||||
ginx.Dangerous(c.BindJSON(&f))
|
||||
|
||||
lst, err := PromBatchQueryInstant(c.Request.Context(), rt.PromClients, f)
|
||||
lst, err := PromBatchQueryInstant(rt.PromClients, f)
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
|
||||
func PromBatchQueryInstant(ctx context.Context, pc *prom.PromClientMap, f BatchInstantForm) ([]model.Value, error) {
|
||||
func PromBatchQueryInstant(pc *prom.PromClientMap, f BatchInstantForm) ([]model.Value, error) {
|
||||
var lst []model.Value
|
||||
|
||||
cli := pc.GetCli(f.DatasourceId)
|
||||
if cli == nil {
|
||||
logx.Warningf(ctx, "no such datasource id: %d", f.DatasourceId)
|
||||
logger.Warningf("no such datasource id: %d", f.DatasourceId)
|
||||
return lst, fmt.Errorf("no such datasource id: %d", f.DatasourceId)
|
||||
}
|
||||
|
||||
for _, item := range f.Queries {
|
||||
resp, _, err := cli.Query(ctx, item.Query, time.Unix(item.Time, 0))
|
||||
resp, _, err := cli.Query(context.Background(), item.Query, time.Unix(item.Time, 0))
|
||||
if err != nil {
|
||||
logx.Warningf(ctx, "query instant error: query:%s err:%v", item.Query, err)
|
||||
return lst, err
|
||||
}
|
||||
|
||||
@@ -172,15 +169,8 @@ func (rt *Router) dsProxy(c *gin.Context) {
|
||||
|
||||
transport, has := transportGet(dsId, ds.UpdatedAt)
|
||||
if !has {
|
||||
// 使用 TLS 配置(支持 mTLS)
|
||||
tlsConfig, err := ds.HTTPJson.TLS.TLSConfig()
|
||||
if err != nil {
|
||||
c.String(http.StatusInternalServerError, "failed to create TLS config: %s", err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
transport = &http.Transport{
|
||||
TLSClientConfig: tlsConfig,
|
||||
TLSClientConfig: &tls.Config{InsecureSkipVerify: ds.HTTPJson.TLS.SkipTlsVerify},
|
||||
Proxy: http.ProxyFromEnvironment,
|
||||
DialContext: (&net.Dialer{
|
||||
Timeout: time.Duration(ds.HTTPJson.DialTimeout) * time.Millisecond,
|
||||
@@ -193,7 +183,7 @@ func (rt *Router) dsProxy(c *gin.Context) {
|
||||
|
||||
modifyResponse := func(r *http.Response) error {
|
||||
if r.StatusCode == http.StatusUnauthorized {
|
||||
logx.Warningf(c.Request.Context(), "proxy path:%s unauthorized access ", c.Request.URL.Path)
|
||||
logger.Warningf("proxy path:%s unauthorized access ", c.Request.URL.Path)
|
||||
return fmt.Errorf("unauthorized access")
|
||||
}
|
||||
|
||||
|
||||
@@ -5,17 +5,14 @@ import (
|
||||
"sort"
|
||||
"sync"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/eval"
|
||||
"github.com/ccfos/nightingale/v6/dscache"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/logx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
type CheckDsPermFunc func(c *gin.Context, dsId int64, cate string, q interface{}) bool
|
||||
|
||||
var CheckDsPerm CheckDsPermFunc = func(c *gin.Context, dsId int64, cate string, q interface{}) bool {
|
||||
func CheckDsPerm(c *gin.Context, dsId int64, cate string, q interface{}) bool {
|
||||
// todo: 后续需要根据 cate 判断是否需要权限
|
||||
return true
|
||||
}
|
||||
@@ -47,7 +44,6 @@ func QueryLogBatchConcurrently(anonymousAccess bool, ctx *gin.Context, f QueryFr
|
||||
var mu sync.Mutex
|
||||
var wg sync.WaitGroup
|
||||
var errs []error
|
||||
rctx := ctx.Request.Context()
|
||||
|
||||
for _, q := range f.Queries {
|
||||
if !anonymousAccess && !CheckDsPerm(ctx, q.Did, q.DsCate, q) {
|
||||
@@ -56,27 +52,20 @@ func QueryLogBatchConcurrently(anonymousAccess bool, ctx *gin.Context, f QueryFr
|
||||
|
||||
plug, exists := dscache.DsCache.Get(q.DsCate, q.Did)
|
||||
if !exists {
|
||||
logx.Warningf(rctx, "cluster:%d not exists query:%+v", q.Did, q)
|
||||
logger.Warningf("cluster:%d not exists query:%+v", q.Did, q)
|
||||
return LogResp{}, fmt.Errorf("cluster not exists")
|
||||
}
|
||||
|
||||
// 根据数据源类型对 Query 进行模板渲染处理
|
||||
err := eval.ExecuteQueryTemplate(q.DsCate, q.Query, nil)
|
||||
if err != nil {
|
||||
logx.Warningf(rctx, "query template execute error: %v", err)
|
||||
return LogResp{}, fmt.Errorf("query template execute error: %v", err)
|
||||
}
|
||||
|
||||
wg.Add(1)
|
||||
go func(query Query) {
|
||||
defer wg.Done()
|
||||
|
||||
data, total, err := plug.QueryLog(rctx, query.Query)
|
||||
data, total, err := plug.QueryLog(ctx.Request.Context(), query.Query)
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
if err != nil {
|
||||
errMsg := fmt.Sprintf("query data error: %v query:%v\n ", err, query)
|
||||
logx.Warningf(rctx, "%s", errMsg)
|
||||
logger.Warningf(errMsg)
|
||||
errs = append(errs, err)
|
||||
return
|
||||
}
|
||||
@@ -122,7 +111,6 @@ func QueryDataConcurrently(anonymousAccess bool, ctx *gin.Context, f models.Quer
|
||||
var mu sync.Mutex
|
||||
var wg sync.WaitGroup
|
||||
var errs []error
|
||||
rctx := ctx.Request.Context()
|
||||
|
||||
for _, q := range f.Queries {
|
||||
if !anonymousAccess && !CheckDsPerm(ctx, f.DatasourceId, f.Cate, q) {
|
||||
@@ -131,7 +119,7 @@ func QueryDataConcurrently(anonymousAccess bool, ctx *gin.Context, f models.Quer
|
||||
|
||||
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
|
||||
if !exists {
|
||||
logx.Warningf(rctx, "cluster:%d not exists", f.DatasourceId)
|
||||
logger.Warningf("cluster:%d not exists", f.DatasourceId)
|
||||
return nil, fmt.Errorf("cluster not exists")
|
||||
}
|
||||
|
||||
@@ -139,16 +127,16 @@ func QueryDataConcurrently(anonymousAccess bool, ctx *gin.Context, f models.Quer
|
||||
go func(query interface{}) {
|
||||
defer wg.Done()
|
||||
|
||||
data, err := plug.QueryData(rctx, query)
|
||||
data, err := plug.QueryData(ctx.Request.Context(), query)
|
||||
if err != nil {
|
||||
logx.Warningf(rctx, "query data error: req:%+v err:%v", query, err)
|
||||
logger.Warningf("query data error: req:%+v err:%v", query, err)
|
||||
mu.Lock()
|
||||
errs = append(errs, err)
|
||||
mu.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
logx.Debugf(rctx, "query data: req:%+v resp:%+v", query, data)
|
||||
logger.Debugf("query data: req:%+v resp:%+v", query, data)
|
||||
mu.Lock()
|
||||
resp = append(resp, data...)
|
||||
mu.Unlock()
|
||||
@@ -194,7 +182,6 @@ func QueryLogConcurrently(anonymousAccess bool, ctx *gin.Context, f models.Query
|
||||
var mu sync.Mutex
|
||||
var wg sync.WaitGroup
|
||||
var errs []error
|
||||
rctx := ctx.Request.Context()
|
||||
|
||||
for _, q := range f.Queries {
|
||||
if !anonymousAccess && !CheckDsPerm(ctx, f.DatasourceId, f.Cate, q) {
|
||||
@@ -203,7 +190,7 @@ func QueryLogConcurrently(anonymousAccess bool, ctx *gin.Context, f models.Query
|
||||
|
||||
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
|
||||
if !exists {
|
||||
logx.Warningf(rctx, "cluster:%d not exists query:%+v", f.DatasourceId, f)
|
||||
logger.Warningf("cluster:%d not exists query:%+v", f.DatasourceId, f)
|
||||
return LogResp{}, fmt.Errorf("cluster not exists")
|
||||
}
|
||||
|
||||
@@ -211,11 +198,11 @@ func QueryLogConcurrently(anonymousAccess bool, ctx *gin.Context, f models.Query
|
||||
go func(query interface{}) {
|
||||
defer wg.Done()
|
||||
|
||||
data, total, err := plug.QueryLog(rctx, query)
|
||||
logx.Debugf(rctx, "query log: req:%+v resp:%+v", query, data)
|
||||
data, total, err := plug.QueryLog(ctx.Request.Context(), query)
|
||||
logger.Debugf("query log: req:%+v resp:%+v", query, data)
|
||||
if err != nil {
|
||||
errMsg := fmt.Sprintf("query data error: %v query:%v\n ", err, query)
|
||||
logx.Warningf(rctx, "%s", errMsg)
|
||||
logger.Warningf(errMsg)
|
||||
mu.Lock()
|
||||
errs = append(errs, err)
|
||||
mu.Unlock()
|
||||
@@ -253,7 +240,6 @@ func (rt *Router) QueryLogV2(c *gin.Context) {
|
||||
func (rt *Router) QueryLog(c *gin.Context) {
|
||||
var f models.QueryParam
|
||||
ginx.BindJSON(c, &f)
|
||||
rctx := c.Request.Context()
|
||||
|
||||
var resp []interface{}
|
||||
for _, q := range f.Queries {
|
||||
@@ -263,13 +249,13 @@ func (rt *Router) QueryLog(c *gin.Context) {
|
||||
|
||||
plug, exists := dscache.DsCache.Get("elasticsearch", f.DatasourceId)
|
||||
if !exists {
|
||||
logx.Warningf(rctx, "cluster:%d not exists", f.DatasourceId)
|
||||
logger.Warningf("cluster:%d not exists", f.DatasourceId)
|
||||
ginx.Bomb(200, "cluster not exists")
|
||||
}
|
||||
|
||||
data, _, err := plug.QueryLog(rctx, q)
|
||||
data, _, err := plug.QueryLog(c.Request.Context(), q)
|
||||
if err != nil {
|
||||
logx.Warningf(rctx, "query data error: %v", err)
|
||||
logger.Warningf("query data error: %v", err)
|
||||
ginx.Bomb(200, "err:%v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -7,17 +7,14 @@ import (
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/strx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func (rt *Router) recordingRuleGets(c *gin.Context) {
|
||||
busiGroupId := ginx.UrlParamInt64(c, "id")
|
||||
ars, err := models.RecordingRuleGets(rt.Ctx, busiGroupId)
|
||||
if err == nil {
|
||||
models.FillUpdateByNicknames(rt.Ctx, ars)
|
||||
}
|
||||
ginx.NewRender(c).Data(ars, err)
|
||||
}
|
||||
|
||||
@@ -42,9 +39,6 @@ func (rt *Router) recordingRuleGetsByGids(c *gin.Context) {
|
||||
}
|
||||
|
||||
ars, err := models.RecordingRuleGetsByBGIds(rt.Ctx, gids)
|
||||
if err == nil {
|
||||
models.FillUpdateByNicknames(rt.Ctx, ars)
|
||||
}
|
||||
ginx.NewRender(c).Data(ars, err)
|
||||
}
|
||||
|
||||
@@ -118,7 +112,6 @@ func (rt *Router) recordingRulePutByFE(c *gin.Context) {
|
||||
}
|
||||
|
||||
rt.bgrwCheck(c, ar.GroupId)
|
||||
rt.bgroCheck(c, f.GroupId)
|
||||
|
||||
f.UpdateBy = c.MustGet("username").(string)
|
||||
ginx.NewRender(c).Message(ar.Update(rt.Ctx, f))
|
||||
|
||||
@@ -6,9 +6,9 @@ import (
|
||||
|
||||
"github.com/ccfos/nightingale/v6/center/cconf"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func (rt *Router) rolesGets(c *gin.Context) {
|
||||
|
||||
@@ -5,8 +5,8 @@ import (
|
||||
|
||||
"github.com/ccfos/nightingale/v6/center/cconf"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
)
|
||||
|
||||
|
||||
@@ -1,145 +0,0 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/slice"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
func (rt *Router) savedViewGets(c *gin.Context) {
|
||||
page := ginx.QueryStr(c, "page", "")
|
||||
|
||||
me := c.MustGet("user").(*models.User)
|
||||
|
||||
lst, err := models.SavedViewGets(rt.Ctx, page)
|
||||
if err != nil {
|
||||
ginx.NewRender(c).Data(nil, err)
|
||||
return
|
||||
}
|
||||
models.FillUpdateByNicknames(rt.Ctx, lst)
|
||||
|
||||
userGids, err := models.MyGroupIds(rt.Ctx, me.Id)
|
||||
if err != nil {
|
||||
ginx.NewRender(c).Data(nil, err)
|
||||
return
|
||||
}
|
||||
|
||||
favoriteMap, err := models.SavedViewFavoriteGetByUserId(rt.Ctx, me.Id)
|
||||
if err != nil {
|
||||
ginx.NewRender(c).Data(nil, err)
|
||||
return
|
||||
}
|
||||
|
||||
favoriteViews := make([]models.SavedView, 0)
|
||||
normalViews := make([]models.SavedView, 0)
|
||||
|
||||
for _, view := range lst {
|
||||
visible := view.CreateBy == me.Username ||
|
||||
view.PublicCate == 2 ||
|
||||
(view.PublicCate == 1 && slice.HaveIntersection[int64](userGids, view.Gids))
|
||||
|
||||
if !visible {
|
||||
continue
|
||||
}
|
||||
|
||||
view.IsFavorite = favoriteMap[view.Id]
|
||||
|
||||
// 收藏的排前面
|
||||
if view.IsFavorite {
|
||||
favoriteViews = append(favoriteViews, view)
|
||||
} else {
|
||||
normalViews = append(normalViews, view)
|
||||
}
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(append(favoriteViews, normalViews...), nil)
|
||||
}
|
||||
|
||||
func (rt *Router) savedViewAdd(c *gin.Context) {
|
||||
var f models.SavedView
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
me := c.MustGet("user").(*models.User)
|
||||
f.Id = 0
|
||||
f.CreateBy = me.Username
|
||||
f.UpdateBy = me.Username
|
||||
|
||||
err := models.SavedViewAdd(rt.Ctx, &f)
|
||||
ginx.NewRender(c).Data(f.Id, err)
|
||||
}
|
||||
|
||||
func (rt *Router) savedViewPut(c *gin.Context) {
|
||||
id := ginx.UrlParamInt64(c, "id")
|
||||
|
||||
view, err := models.SavedViewGetById(rt.Ctx, id)
|
||||
if err != nil {
|
||||
ginx.NewRender(c).Data(nil, err)
|
||||
return
|
||||
}
|
||||
if view == nil {
|
||||
ginx.NewRender(c, http.StatusNotFound).Message("saved view not found")
|
||||
return
|
||||
}
|
||||
|
||||
me := c.MustGet("user").(*models.User)
|
||||
// 只有创建者可以更新
|
||||
if view.CreateBy != me.Username && !me.IsAdmin() {
|
||||
ginx.NewRender(c, http.StatusForbidden).Message("forbidden")
|
||||
return
|
||||
}
|
||||
|
||||
var f models.SavedView
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
view.Name = f.Name
|
||||
view.Filter = f.Filter
|
||||
view.PublicCate = f.PublicCate
|
||||
view.Gids = f.Gids
|
||||
|
||||
err = models.SavedViewUpdate(rt.Ctx, view, me.Username)
|
||||
ginx.NewRender(c).Message(err)
|
||||
}
|
||||
|
||||
func (rt *Router) savedViewDel(c *gin.Context) {
|
||||
id := ginx.UrlParamInt64(c, "id")
|
||||
|
||||
view, err := models.SavedViewGetById(rt.Ctx, id)
|
||||
if err != nil {
|
||||
ginx.NewRender(c).Data(nil, err)
|
||||
return
|
||||
}
|
||||
if view == nil {
|
||||
ginx.NewRender(c, http.StatusNotFound).Message("saved view not found")
|
||||
return
|
||||
}
|
||||
|
||||
me := c.MustGet("user").(*models.User)
|
||||
// 只有创建者或管理员可以删除
|
||||
if view.CreateBy != me.Username && !me.IsAdmin() {
|
||||
ginx.NewRender(c, http.StatusForbidden).Message("forbidden")
|
||||
return
|
||||
}
|
||||
|
||||
err = models.SavedViewDel(rt.Ctx, id)
|
||||
ginx.NewRender(c).Message(err)
|
||||
}
|
||||
|
||||
func (rt *Router) savedViewFavoriteAdd(c *gin.Context) {
|
||||
id := ginx.UrlParamInt64(c, "id")
|
||||
me := c.MustGet("user").(*models.User)
|
||||
|
||||
err := models.UserViewFavoriteAdd(rt.Ctx, id, me.Id)
|
||||
ginx.NewRender(c).Message(err)
|
||||
}
|
||||
|
||||
func (rt *Router) savedViewFavoriteDel(c *gin.Context) {
|
||||
id := ginx.UrlParamInt64(c, "id")
|
||||
me := c.MustGet("user").(*models.User)
|
||||
|
||||
err := models.UserViewFavoriteDel(rt.Ctx, id, me.Id)
|
||||
ginx.NewRender(c).Message(err)
|
||||
}
|
||||
@@ -5,10 +5,10 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/pkg/flashduty"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ormx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/secu"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/google/uuid"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
|
||||
@@ -4,9 +4,9 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func (rt *Router) serversGet(c *gin.Context) {
|
||||
|
||||
@@ -5,10 +5,10 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/google/uuid"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
// sourceTokenAdd 生成新的源令牌
|
||||
|
||||
@@ -13,10 +13,10 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/pkg/strx"
|
||||
"github.com/ccfos/nightingale/v6/pushgw/idents"
|
||||
"github.com/ccfos/nightingale/v6/storage"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
@@ -38,16 +38,6 @@ func (rt *Router) targetGetsByHostFilter(c *gin.Context) {
|
||||
total, err := models.TargetCountByFilter(rt.Ctx, query)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
models.FillTargetsBeatTime(rt.Redis, hosts)
|
||||
now := time.Now().Unix()
|
||||
for i := 0; i < len(hosts); i++ {
|
||||
if now-hosts[i].BeatTime < 60 {
|
||||
hosts[i].TargetUp = 2
|
||||
} else if now-hosts[i].BeatTime < 180 {
|
||||
hosts[i].TargetUp = 1
|
||||
}
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(gin.H{
|
||||
"list": hosts,
|
||||
"total": total,
|
||||
@@ -91,24 +81,9 @@ func (rt *Router) targetGets(c *gin.Context) {
|
||||
models.BuildTargetWhereWithBgids(bgids),
|
||||
models.BuildTargetWhereWithDsIds(dsIds),
|
||||
models.BuildTargetWhereWithQuery(query),
|
||||
models.BuildTargetWhereWithDowntime(downtime),
|
||||
models.BuildTargetWhereWithHosts(hosts),
|
||||
}
|
||||
|
||||
// downtime 筛选:从缓存获取心跳时间,选择较小的集合用 IN 或 NOT IN 过滤
|
||||
if downtime != 0 {
|
||||
downtimeOpt, hasMatch := rt.downtimeFilter(downtime)
|
||||
if !hasMatch {
|
||||
ginx.NewRender(c).Data(gin.H{
|
||||
"list": []*models.Target{},
|
||||
"total": 0,
|
||||
}, nil)
|
||||
return
|
||||
}
|
||||
if downtimeOpt != nil {
|
||||
options = append(options, downtimeOpt)
|
||||
}
|
||||
}
|
||||
|
||||
total, err := models.TargetTotal(rt.Ctx, options...)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
@@ -127,17 +102,14 @@ func (rt *Router) targetGets(c *gin.Context) {
|
||||
now := time.Now()
|
||||
cache := make(map[int64]*models.BusiGroup)
|
||||
|
||||
// 从 Redis 补全 BeatTime
|
||||
models.FillTargetsBeatTime(rt.Redis, list)
|
||||
|
||||
var keys []string
|
||||
for i := 0; i < len(list); i++ {
|
||||
ginx.Dangerous(list[i].FillGroup(rt.Ctx, cache))
|
||||
keys = append(keys, models.WrapIdent(list[i].Ident))
|
||||
|
||||
if now.Unix()-list[i].BeatTime < 60 {
|
||||
if now.Unix()-list[i].UpdateAt < 60 {
|
||||
list[i].TargetUp = 2
|
||||
} else if now.Unix()-list[i].BeatTime < 180 {
|
||||
} else if now.Unix()-list[i].UpdateAt < 180 {
|
||||
list[i].TargetUp = 1
|
||||
}
|
||||
}
|
||||
@@ -176,43 +148,6 @@ func (rt *Router) targetGets(c *gin.Context) {
|
||||
}, nil)
|
||||
}
|
||||
|
||||
// downtimeFilter 从缓存获取心跳时间,生成 downtime 筛选条件
|
||||
// 选择匹配集和非匹配集中较小的一方,用 IN 或 NOT IN 来减少 SQL 参数量
|
||||
// 返回值:
|
||||
// - option: 筛选条件,nil 表示所有 target 都符合条件(无需过滤)
|
||||
// - hasMatch: 是否有符合条件的 target,false 表示无匹配应返回空结果
|
||||
func (rt *Router) downtimeFilter(downtime int64) (option models.BuildTargetWhereOption, hasMatch bool) {
|
||||
now := time.Now().Unix()
|
||||
targets := rt.TargetCache.GetAll()
|
||||
var matchIdents, nonMatchIdents []string
|
||||
for _, target := range targets {
|
||||
matched := false
|
||||
if downtime > 0 {
|
||||
matched = target.BeatTime < now-downtime
|
||||
} else if downtime < 0 {
|
||||
matched = target.BeatTime > now+downtime
|
||||
}
|
||||
if matched {
|
||||
matchIdents = append(matchIdents, target.Ident)
|
||||
} else {
|
||||
nonMatchIdents = append(nonMatchIdents, target.Ident)
|
||||
}
|
||||
}
|
||||
|
||||
if len(matchIdents) == 0 {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
if len(nonMatchIdents) == 0 {
|
||||
return nil, true
|
||||
}
|
||||
|
||||
if len(matchIdents) <= len(nonMatchIdents) {
|
||||
return models.BuildTargetWhereWithIdents(matchIdents), true
|
||||
}
|
||||
return models.BuildTargetWhereExcludeIdents(nonMatchIdents), true
|
||||
}
|
||||
|
||||
func (rt *Router) targetExtendInfoByIdent(c *gin.Context) {
|
||||
ident := ginx.QueryStr(c, "ident", "")
|
||||
key := models.WrapExtendIdent(ident)
|
||||
|
||||
@@ -7,9 +7,9 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/alert/sender"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/strx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
)
|
||||
|
||||
|
||||
@@ -8,9 +8,9 @@ import (
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/strx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
"github.com/toolkits/pkg/str"
|
||||
)
|
||||
@@ -25,7 +25,6 @@ func (rt *Router) taskTplGets(c *gin.Context) {
|
||||
|
||||
list, err := models.TaskTplGets(rt.Ctx, []int64{groupId}, query, limit, ginx.Offset(c, limit))
|
||||
ginx.Dangerous(err)
|
||||
models.FillUpdateByNicknames(rt.Ctx, list)
|
||||
|
||||
ginx.NewRender(c).Data(gin.H{
|
||||
"total": total,
|
||||
@@ -61,7 +60,6 @@ func (rt *Router) taskTplGetsByGids(c *gin.Context) {
|
||||
|
||||
list, err := models.TaskTplGets(rt.Ctx, gids, query, limit, ginx.Offset(c, limit))
|
||||
ginx.Dangerous(err)
|
||||
models.FillUpdateByNicknames(rt.Ctx, list)
|
||||
|
||||
ginx.NewRender(c).Data(gin.H{
|
||||
"total": total,
|
||||
|
||||
@@ -2,14 +2,13 @@ package router
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/center/cconf"
|
||||
"github.com/ccfos/nightingale/v6/datasource/tdengine"
|
||||
"github.com/ccfos/nightingale/v6/dscache"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
type databasesQueryForm struct {
|
||||
|
||||
@@ -1,136 +0,0 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/loggrep"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
// traceLogsPage renders an HTML log viewer page for trace logs.
|
||||
func (rt *Router) traceLogsPage(c *gin.Context) {
|
||||
traceId := ginx.UrlParamStr(c, "traceid")
|
||||
if !loggrep.IsValidTraceID(traceId) {
|
||||
c.String(http.StatusBadRequest, "invalid trace id format")
|
||||
return
|
||||
}
|
||||
|
||||
logs, instance, err := rt.getTraceLogs(traceId)
|
||||
if err != nil {
|
||||
c.String(http.StatusInternalServerError, "Error: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
c.Header("Content-Type", "text/html; charset=utf-8")
|
||||
err = loggrep.RenderTraceLogsHTML(c.Writer, loggrep.TraceLogsPageData{
|
||||
TraceID: traceId,
|
||||
Instance: instance,
|
||||
Logs: logs,
|
||||
Total: len(logs),
|
||||
})
|
||||
if err != nil {
|
||||
c.String(http.StatusInternalServerError, "render error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// traceLogsJSON returns JSON for trace logs.
|
||||
func (rt *Router) traceLogsJSON(c *gin.Context) {
|
||||
traceId := ginx.UrlParamStr(c, "traceid")
|
||||
if !loggrep.IsValidTraceID(traceId) {
|
||||
ginx.Bomb(200, "invalid trace id format")
|
||||
}
|
||||
|
||||
logs, instance, err := rt.getTraceLogs(traceId)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(loggrep.EventDetailResp{
|
||||
Logs: logs,
|
||||
Instance: instance,
|
||||
}, nil)
|
||||
}
|
||||
|
||||
// getTraceLogs finds the same-engine instances and queries each one
|
||||
// until trace logs are found. Trace logs belong to a single instance.
|
||||
func (rt *Router) getTraceLogs(traceId string) ([]string, string, error) {
|
||||
keyword := "trace_id=" + traceId
|
||||
instance := fmt.Sprintf("%s:%d", rt.Alert.Heartbeat.IP, rt.HTTP.Port)
|
||||
engineName := rt.Alert.Heartbeat.EngineName
|
||||
|
||||
// try local first
|
||||
logs, err := loggrep.GrepLatestLogFiles(rt.LogDir, keyword)
|
||||
if err == nil && len(logs) > 0 {
|
||||
return logs, instance, nil
|
||||
}
|
||||
|
||||
// find all instances with the same engineName
|
||||
servers, err := models.AlertingEngineGetsInstances(rt.Ctx,
|
||||
"engine_cluster = ? and clock > ?",
|
||||
engineName, time.Now().Unix()-30)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
|
||||
// loop through remote instances until we find logs
|
||||
for _, node := range servers {
|
||||
if node == instance {
|
||||
continue // already tried local
|
||||
}
|
||||
|
||||
logs, nodeAddr, err := rt.forwardTraceLogs(node, traceId)
|
||||
if err != nil {
|
||||
logger.Errorf("forwardTraceLogs failed: %v", err)
|
||||
continue
|
||||
}
|
||||
if len(logs) > 0 {
|
||||
return logs, nodeAddr, nil
|
||||
}
|
||||
}
|
||||
|
||||
return nil, instance, nil
|
||||
}
|
||||
|
||||
func (rt *Router) forwardTraceLogs(node, traceId string) ([]string, string, error) {
|
||||
url := fmt.Sprintf("http://%s/v1/n9e/trace-logs/%s", node, traceId)
|
||||
req, err := http.NewRequest("GET", url, nil)
|
||||
if err != nil {
|
||||
return nil, node, err
|
||||
}
|
||||
|
||||
for user, pass := range rt.HTTP.APIForService.BasicAuth {
|
||||
req.SetBasicAuth(user, pass)
|
||||
break
|
||||
}
|
||||
|
||||
client := &http.Client{Timeout: 15 * time.Second}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, node, fmt.Errorf("forward to %s failed: %v", node, err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
body, err := io.ReadAll(io.LimitReader(resp.Body, 10*1024*1024))
|
||||
if err != nil {
|
||||
return nil, node, err
|
||||
}
|
||||
|
||||
var result struct {
|
||||
Dat loggrep.EventDetailResp `json:"dat"`
|
||||
Err string `json:"err"`
|
||||
}
|
||||
if err := json.Unmarshal(body, &result); err != nil {
|
||||
return nil, node, err
|
||||
}
|
||||
if result.Err != "" {
|
||||
return nil, node, fmt.Errorf("%s", result.Err)
|
||||
}
|
||||
|
||||
return result.Dat.Logs, result.Dat.Instance, nil
|
||||
}
|
||||
@@ -9,9 +9,9 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/pkg/flashduty"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ormx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/secu"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
"gorm.io/gorm"
|
||||
)
|
||||
|
||||
@@ -7,9 +7,9 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/flashduty"
|
||||
"github.com/ccfos/nightingale/v6/pkg/strx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
@@ -27,9 +27,6 @@ func (rt *Router) userGroupGets(c *gin.Context) {
|
||||
|
||||
me := c.MustGet("user").(*models.User)
|
||||
lst, err := me.UserGroups(rt.Ctx, limit, query)
|
||||
if err == nil {
|
||||
models.FillUpdateByNicknames(rt.Ctx, lst)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
|
||||
@@ -5,9 +5,9 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func (rt *Router) userVariableConfigGets(context *gin.Context) {
|
||||
|
||||
@@ -12,7 +12,6 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/pkg/cas"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/dingtalk"
|
||||
"github.com/ccfos/nightingale/v6/pkg/feishu"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ldapx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/oauth2x"
|
||||
"github.com/ccfos/nightingale/v6/pkg/oidcx"
|
||||
@@ -28,7 +27,6 @@ type SsoClient struct {
|
||||
CAS *cas.SsoClient
|
||||
OAuth2 *oauth2x.SsoClient
|
||||
DingTalk *dingtalk.SsoClient
|
||||
FeiShu *feishu.SsoClient
|
||||
LastUpdateTime int64
|
||||
configCache *memsto.ConfigCache
|
||||
configLastUpdateTime int64
|
||||
@@ -205,13 +203,6 @@ func Init(center cconf.Center, ctx *ctx.Context, configCache *memsto.ConfigCache
|
||||
log.Fatalf("init %s failed: %s", dingtalk.SsoTypeName, err)
|
||||
}
|
||||
ssoClient.DingTalk = dingtalk.New(config)
|
||||
case feishu.SsoTypeName:
|
||||
var config feishu.Config
|
||||
err := json.Unmarshal([]byte(cfg.Content), &config)
|
||||
if err != nil {
|
||||
log.Fatalf("init %s failed: %s", feishu.SsoTypeName, err)
|
||||
}
|
||||
ssoClient.FeiShu = feishu.New(config)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -300,22 +291,6 @@ func (s *SsoClient) reload(ctx *ctx.Context) error {
|
||||
s.DingTalk = nil
|
||||
}
|
||||
|
||||
if feiShuConfig, ok := ssoConfigMap[feishu.SsoTypeName]; ok {
|
||||
var config feishu.Config
|
||||
err := json.Unmarshal([]byte(feiShuConfig.Content), &config)
|
||||
if err != nil {
|
||||
logger.Warningf("reload %s failed: %s", feishu.SsoTypeName, err)
|
||||
} else {
|
||||
if s.FeiShu != nil {
|
||||
s.FeiShu.Reload(config)
|
||||
} else {
|
||||
s.FeiShu = feishu.New(config)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
s.FeiShu = nil
|
||||
}
|
||||
|
||||
s.LastUpdateTime = lastUpdateTime
|
||||
s.configLastUpdateTime = lastCacheUpdateTime
|
||||
return nil
|
||||
|
||||
@@ -71,10 +71,7 @@ CREATE TABLE `datasource`
|
||||
`updated_at` bigint not null default 0,
|
||||
`updated_by` varchar(64) not null default '',
|
||||
PRIMARY KEY (`id`)
|
||||
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
|
||||
|
||||
-- datasource add weight field
|
||||
alter table `datasource` add `weight` int not null default 0;
|
||||
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
|
||||
|
||||
CREATE TABLE `builtin_cate` (
|
||||
`id` bigint unsigned not null auto_increment,
|
||||
|
||||
@@ -87,7 +87,7 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
|
||||
alert.Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache,
|
||||
alertRuleCache, notifyConfigCache, taskTplsCache, dsCache, ctx, promClients, userCache, userGroupCache, notifyRuleCache, notifyChannelCache, messageTemplateCache, configCvalCache)
|
||||
|
||||
alertrtRouter := alertrt.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors, config.Log.Dir)
|
||||
alertrtRouter := alertrt.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors)
|
||||
|
||||
alertrtRouter.Config(r)
|
||||
|
||||
|
||||
@@ -1,67 +0,0 @@
|
||||
package cron
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
|
||||
"github.com/robfig/cron/v3"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultBatchSize = 100 // 每批删除数量
|
||||
defaultSleepMs = 10 // 每批删除后休眠时间(毫秒)
|
||||
)
|
||||
|
||||
// cleanPipelineExecutionInBatches 分批删除执行记录,避免大批量删除影响数据库性能
|
||||
func cleanPipelineExecutionInBatches(ctx *ctx.Context, day int) {
|
||||
threshold := time.Now().Unix() - 86400*int64(day)
|
||||
var totalDeleted int64
|
||||
|
||||
for {
|
||||
deleted, err := models.DeleteEventPipelineExecutionsInBatches(ctx, threshold, defaultBatchSize)
|
||||
if err != nil {
|
||||
logger.Errorf("Failed to clean pipeline execution records in batch: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
totalDeleted += deleted
|
||||
|
||||
// 如果本批删除数量小于 batchSize,说明已删除完毕
|
||||
if deleted < int64(defaultBatchSize) {
|
||||
break
|
||||
}
|
||||
|
||||
// 休眠一段时间,降低数据库压力
|
||||
time.Sleep(time.Duration(defaultSleepMs) * time.Millisecond)
|
||||
}
|
||||
|
||||
if totalDeleted > 0 {
|
||||
logger.Infof("Cleaned %d pipeline execution records older than %d days", totalDeleted, day)
|
||||
}
|
||||
}
|
||||
|
||||
// CleanPipelineExecution starts a cron job to clean old pipeline execution records in batches
|
||||
// Runs daily at 6:00 AM
|
||||
// day: 数据保留天数,默认 7 天
|
||||
// 使用分批删除方式,每批 100 条,间隔 10ms,避免大批量删除影响数据库性能
|
||||
func CleanPipelineExecution(ctx *ctx.Context, day int) {
|
||||
c := cron.New()
|
||||
if day < 1 {
|
||||
day = 7 // default retention: 7 days
|
||||
}
|
||||
|
||||
_, err := c.AddFunc("0 6 * * *", func() {
|
||||
cleanPipelineExecutionInBatches(ctx, day)
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
logger.Errorf("Failed to add clean pipeline execution cron job: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
c.Start()
|
||||
logger.Infof("Pipeline execution cleanup cron started, retention: %d days, batch_size: %d, sleep_ms: %d", day, defaultBatchSize, defaultSleepMs)
|
||||
}
|
||||
@@ -12,8 +12,6 @@ import (
|
||||
|
||||
"github.com/mitchellh/mapstructure"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/pkg/logx"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -180,9 +178,14 @@ func (c *Clickhouse) QueryData(ctx context.Context, query interface{}) ([]models
|
||||
|
||||
rows, err := c.QueryTimeseries(ctx, ckQueryParam)
|
||||
if err != nil {
|
||||
logx.Warningf(ctx, "query:%+v get data err:%v", ckQueryParam, err)
|
||||
logger.Warningf("query:%+v get data err:%v", ckQueryParam, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
logger.Warningf("query:%+v get data err:%v", ckQueryParam, err)
|
||||
return []models.DataResp{}, err
|
||||
}
|
||||
data := make([]models.DataResp, 0)
|
||||
for i := range rows {
|
||||
data = append(data, models.DataResp{
|
||||
@@ -211,7 +214,7 @@ func (c *Clickhouse) QueryLog(ctx context.Context, query interface{}) ([]interfa
|
||||
|
||||
rows, err := c.Query(ctx, ckQueryParam)
|
||||
if err != nil {
|
||||
logx.Warningf(ctx, "query:%+v get data err:%v", ckQueryParam, err)
|
||||
logger.Warningf("query:%+v get data err:%v", ckQueryParam, err)
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
|
||||
@@ -17,7 +17,6 @@ import (
|
||||
|
||||
"github.com/ccfos/nightingale/v6/memsto"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/logx"
|
||||
)
|
||||
|
||||
type FixedField string
|
||||
@@ -27,10 +26,6 @@ const (
|
||||
FieldId FixedField = "_id"
|
||||
)
|
||||
|
||||
// LabelSeparator 用于分隔多个标签的分隔符
|
||||
// 使用 ASCII 控制字符 Record Separator (0x1E),避免与用户数据中的 "--" 冲突
|
||||
const LabelSeparator = "\x1e"
|
||||
|
||||
type Query struct {
|
||||
Ref string `json:"ref" mapstructure:"ref"`
|
||||
IndexType string `json:"index_type" mapstructure:"index_type"` // 普通索引:index 索引模式:index_pattern
|
||||
@@ -133,7 +128,7 @@ func TransferData(metric, ref string, m map[string][][]float64) []models.DataRes
|
||||
}
|
||||
|
||||
data.Metric["__name__"] = model.LabelValue(metric)
|
||||
labels := strings.Split(k, LabelSeparator)
|
||||
labels := strings.Split(k, "--")
|
||||
for _, label := range labels {
|
||||
arr := strings.SplitN(label, "=", 2)
|
||||
if len(arr) == 2 {
|
||||
@@ -202,7 +197,7 @@ func GetBuckets(labelKey string, keys []string, arr []interface{}, metrics *Metr
|
||||
case json.Number, string:
|
||||
if !getTs {
|
||||
if labels != "" {
|
||||
newlabels = fmt.Sprintf("%s%s%s=%v", labels, LabelSeparator, labelKey, keyValue)
|
||||
newlabels = fmt.Sprintf("%s--%s=%v", labels, labelKey, keyValue)
|
||||
} else {
|
||||
newlabels = fmt.Sprintf("%s=%v", labelKey, keyValue)
|
||||
}
|
||||
@@ -391,8 +386,8 @@ func QueryData(ctx context.Context, queryParam interface{}, cliTimeout int64, ve
|
||||
now := time.Now().Unix()
|
||||
var start, end int64
|
||||
if param.End != 0 && param.Start != 0 {
|
||||
end = param.End
|
||||
start = param.Start
|
||||
end = param.End - param.End%param.Interval
|
||||
start = param.Start - param.Start%param.Interval
|
||||
} else {
|
||||
end = now
|
||||
start = end - param.Interval
|
||||
@@ -544,7 +539,7 @@ func QueryData(ctx context.Context, queryParam interface{}, cliTimeout int64, ve
|
||||
|
||||
source, _ := queryString.Source()
|
||||
b, _ := json.Marshal(source)
|
||||
logx.Debugf(ctx, "query_data q:%+v indexArr:%+v tsAggr:%+v query_string:%s", param, indexArr, tsAggr, string(b))
|
||||
logger.Debugf("query_data q:%+v indexArr:%+v tsAggr:%+v query_string:%s", param, indexArr, tsAggr, string(b))
|
||||
|
||||
searchSource := elastic.NewSearchSource().
|
||||
Query(queryString).
|
||||
@@ -552,29 +547,21 @@ func QueryData(ctx context.Context, queryParam interface{}, cliTimeout int64, ve
|
||||
|
||||
searchSourceString, err := searchSource.Source()
|
||||
if err != nil {
|
||||
logx.Warningf(ctx, "query_data searchSource:%s to string error:%v", searchSourceString, err)
|
||||
logger.Warningf("query_data searchSource:%s to string error:%v", searchSourceString, err)
|
||||
}
|
||||
|
||||
jsonSearchSource, err := json.Marshal(searchSourceString)
|
||||
if err != nil {
|
||||
logx.Warningf(ctx, "query_data searchSource:%s to json error:%v", searchSourceString, err)
|
||||
logger.Warningf("query_data searchSource:%s to json error:%v", searchSourceString, err)
|
||||
}
|
||||
|
||||
result, err := search(ctx, indexArr, searchSource, param.Timeout, param.MaxShard)
|
||||
if err != nil {
|
||||
logx.Warningf(ctx, "query_data searchSource:%s query_data error:%v", searchSourceString, err)
|
||||
logger.Warningf("query_data searchSource:%s query_data error:%v", searchSourceString, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// 检查是否有 shard failures,有部分数据时仅记录警告继续处理
|
||||
if shardErr := checkShardFailures(ctx, result.Shards, "query_data", searchSourceString); shardErr != nil {
|
||||
if len(result.Aggregations["ts"]) == 0 {
|
||||
return nil, shardErr
|
||||
}
|
||||
// 有部分数据,checkShardFailures 已记录警告,继续处理
|
||||
}
|
||||
|
||||
logx.Infof(ctx, "query_data searchSource:%s resp:%s", string(jsonSearchSource), string(result.Aggregations["ts"]))
|
||||
logger.Debugf("query_data searchSource:%s resp:%s", string(jsonSearchSource), string(result.Aggregations["ts"]))
|
||||
|
||||
js, err := simplejson.NewJson(result.Aggregations["ts"])
|
||||
if err != nil {
|
||||
@@ -611,40 +598,6 @@ func QueryData(ctx context.Context, queryParam interface{}, cliTimeout int64, ve
|
||||
return items, nil
|
||||
}
|
||||
|
||||
// checkShardFailures 检查 ES 查询结果中的 shard failures,返回格式化的错误信息
|
||||
func checkShardFailures(ctx context.Context, shards *elastic.ShardsInfo, logPrefix string, queryContext interface{}) error {
|
||||
if shards == nil || shards.Failed == 0 || len(shards.Failures) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
var failureReasons []string
|
||||
for _, failure := range shards.Failures {
|
||||
reason := ""
|
||||
if failure.Reason != nil {
|
||||
if reasonType, ok := failure.Reason["type"].(string); ok {
|
||||
reason = reasonType
|
||||
}
|
||||
if reasonMsg, ok := failure.Reason["reason"].(string); ok {
|
||||
if reason != "" {
|
||||
reason += ": " + reasonMsg
|
||||
} else {
|
||||
reason = reasonMsg
|
||||
}
|
||||
}
|
||||
}
|
||||
if reason != "" {
|
||||
failureReasons = append(failureReasons, fmt.Sprintf("index=%s shard=%d: %s", failure.Index, failure.Shard, reason))
|
||||
}
|
||||
}
|
||||
|
||||
if len(failureReasons) > 0 {
|
||||
errMsg := fmt.Sprintf("elasticsearch shard failures (%d/%d failed): %s", shards.Failed, shards.Total, strings.Join(failureReasons, "; "))
|
||||
logx.Warningf(ctx, "%s query:%v %s", logPrefix, queryContext, errMsg)
|
||||
return fmt.Errorf("%s", errMsg)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func HitFilter(typ string) bool {
|
||||
switch typ {
|
||||
case "keyword", "date", "long", "integer", "short", "byte", "double", "float", "half_float", "scaled_float", "unsigned_long":
|
||||
@@ -721,34 +674,28 @@ func QueryLog(ctx context.Context, queryParam interface{}, timeout int64, versio
|
||||
} else {
|
||||
source = source.From(param.P).Sort(param.DateField, param.Ascending)
|
||||
}
|
||||
sourceBytes, _ := json.Marshal(source)
|
||||
result, err := search(ctx, indexArr, source, param.Timeout, param.MaxShard)
|
||||
if err != nil {
|
||||
logx.Warningf(ctx, "query_log source:%s error:%v", string(sourceBytes), err)
|
||||
logger.Warningf("query data error:%v", err)
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
// 检查是否有 shard failures,有部分数据时仅记录警告继续处理
|
||||
if shardErr := checkShardFailures(ctx, result.Shards, "query_log", string(sourceBytes)); shardErr != nil {
|
||||
if len(result.Hits.Hits) == 0 {
|
||||
return nil, 0, shardErr
|
||||
}
|
||||
// 有部分数据,checkShardFailures 已记录警告,继续处理
|
||||
}
|
||||
|
||||
total := result.TotalHits()
|
||||
|
||||
var ret []interface{}
|
||||
logx.Debugf(ctx, "query_log source:%s len:%d total:%d", string(sourceBytes), len(result.Hits.Hits), total)
|
||||
|
||||
b, _ := json.Marshal(source)
|
||||
logger.Debugf("query data result query source:%s len:%d total:%d", string(b), len(result.Hits.Hits), total)
|
||||
|
||||
resultBytes, _ := json.Marshal(result)
|
||||
logx.Debugf(ctx, "query_log source:%s result:%s", string(sourceBytes), string(resultBytes))
|
||||
logger.Debugf("query data result query source:%s result:%s", string(b), string(resultBytes))
|
||||
|
||||
if strings.HasPrefix(version, "6") {
|
||||
for i := 0; i < len(result.Hits.Hits); i++ {
|
||||
var x map[string]interface{}
|
||||
err := json.Unmarshal(result.Hits.Hits[i].Source, &x)
|
||||
if err != nil {
|
||||
logx.Warningf(ctx, "Unmarshal source error:%v", err)
|
||||
logger.Warningf("Unmarshal source error:%v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
|
||||
@@ -67,13 +67,6 @@ func init() {
|
||||
PluginType: "pgsql",
|
||||
PluginTypeName: "PostgreSQL",
|
||||
}
|
||||
|
||||
DatasourceTypes[7] = DatasourceType{
|
||||
Id: 7,
|
||||
Category: "logging",
|
||||
PluginType: "victorialogs",
|
||||
PluginTypeName: "VictoriaLogs",
|
||||
}
|
||||
}
|
||||
|
||||
type NewDatasourceFn func(settings map[string]interface{}) (Datasource, error)
|
||||
@@ -133,5 +126,4 @@ type DatasourceInfo struct {
|
||||
CreatedAt int64 `json:"created_at"`
|
||||
UpdatedAt int64 `json:"updated_at"`
|
||||
IsDefault bool `json:"is_default"`
|
||||
Weight int `json:"weight"`
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user