mirror of
https://github.com/ccfos/nightingale.git
synced 2026-03-02 22:19:10 +00:00
Compare commits
82 Commits
es-sql-ale
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b49ab44818 | ||
|
|
4d37594a0a | ||
|
|
5b941d2ce5 | ||
|
|
932199fde1 | ||
|
|
5d1636d1a5 | ||
|
|
6167eb3b13 | ||
|
|
5beee98cde | ||
|
|
c34c008080 | ||
|
|
75218f9d5a | ||
|
|
341f82ecde | ||
|
|
a6056a5fab | ||
|
|
01e8370882 | ||
|
|
8b11e18754 | ||
|
|
aa749065da | ||
|
|
f5811bc5f7 | ||
|
|
5de63d7307 | ||
|
|
6a44da4dda | ||
|
|
0a65616fbb | ||
|
|
a0e8c5f764 | ||
|
|
d64dbb6909 | ||
|
|
656b91e976 | ||
|
|
fe6dce403f | ||
|
|
faa348a086 | ||
|
|
635b781ae1 | ||
|
|
f60771ad9c | ||
|
|
6bd2f9a89f | ||
|
|
a76049822c | ||
|
|
97746f7469 | ||
|
|
903d75e4b8 | ||
|
|
42637e546d | ||
|
|
7bf000932d | ||
|
|
3202cd1410 | ||
|
|
e28dd079f9 | ||
|
|
72cb35a4ed | ||
|
|
80d0193ac0 | ||
|
|
54a8e2590e | ||
|
|
b296d5bcc3 | ||
|
|
996c9812bd | ||
|
|
0f8bb8b2af | ||
|
|
8c54a97292 | ||
|
|
47cab69088 | ||
|
|
c432636d8d | ||
|
|
959b0389c6 | ||
|
|
3d8f1b3ef5 | ||
|
|
ce838036ad | ||
|
|
578ac096e5 | ||
|
|
48ee6117e9 | ||
|
|
5afd6a60e9 | ||
|
|
37372ae9ea | ||
|
|
48e7c34ebf | ||
|
|
acd0ec4bef | ||
|
|
c1ad946bc5 | ||
|
|
4c2affc7da | ||
|
|
273d282beb | ||
|
|
3e86656381 | ||
|
|
f942772d2b | ||
|
|
fbc0c22d7a | ||
|
|
abd452a6df | ||
|
|
47f05627d9 | ||
|
|
edd8e2a3db | ||
|
|
c4ca2920ef | ||
|
|
afc8d7d21c | ||
|
|
c0e13e2870 | ||
|
|
4f186a71ba | ||
|
|
104c275f2d | ||
|
|
2ba7a970e8 | ||
|
|
c98241b3fd | ||
|
|
b30caf625b | ||
|
|
32e8b961c2 | ||
|
|
2ff0a8fdbb | ||
|
|
7ff74d0948 | ||
|
|
da58d825c0 | ||
|
|
0014b77c4d | ||
|
|
fc7fdde2d5 | ||
|
|
61b63fc75c | ||
|
|
80f564ec63 | ||
|
|
203c2a885b | ||
|
|
9bee3e1379 | ||
|
|
c214580e87 | ||
|
|
f6faed0659 | ||
|
|
990819d6c1 | ||
|
|
5fff517cce |
@@ -31,7 +31,9 @@
|
||||
|
||||
Nightingale is an open-source monitoring project that focuses on alerting. Similar to Grafana, Nightingale also connects with various existing data sources. However, while Grafana emphasizes visualization, Nightingale places greater emphasis on the alerting engine, as well as the processing and distribution of alarms.
|
||||
|
||||
> The Nightingale project was initially developed and open-sourced by DiDi.inc. On May 11, 2022, it was donated to the Open Source Development Committee of the China Computer Federation (CCF ODC).
|
||||
> 💡 Nightingale has now officially launched the [MCP-Server](https://github.com/n9e/n9e-mcp-server/). This MCP Server enables AI assistants to interact with the Nightingale API using natural language, facilitating alert management, monitoring, and observability tasks.
|
||||
>
|
||||
> The Nightingale project was initially developed and open-sourced by DiDi.inc. On May 11, 2022, it was donated to the Open Source Development Committee of the China Computer Federation (CCF ODTC).
|
||||
|
||||

|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
<img src="doc/img/Nightingale_L_V.png" alt="nightingale - cloud native monitoring" width="100" /></a>
|
||||
</p>
|
||||
<p align="center">
|
||||
<b>开源告警管理专家</b>
|
||||
<b>开源监控告警管理专家</b>
|
||||
</p>
|
||||
|
||||
<p align="center">
|
||||
@@ -33,7 +33,8 @@
|
||||
|
||||
夜莺侧重于监控告警,类似于 Grafana 的数据源集成方式,夜莺也是对接多种既有的数据源,不过 Grafana 侧重于可视化,夜莺则是侧重于告警引擎、告警事件的处理和分发。
|
||||
|
||||
> 夜莺监控项目,最初由滴滴开发和开源,并于 2022 年 5 月 11 日,捐赠予中国计算机学会开源发展技术委员会(CCF ODTC),为 CCF ODTC 成立后接受捐赠的第一个开源项目。
|
||||
> - 💡夜莺正式推出了 [MCP-Server](https://github.com/n9e/n9e-mcp-server/),此 MCP Server 允许 AI 助手通过自然语言与夜莺 API 交互,实现告警管理、监控和可观测性任务。
|
||||
> - 夜莺监控项目,最初由滴滴开发和开源,并于 2022 年 5 月 11 日,捐赠予中国计算机学会开源发展技术委员会(CCF ODTC),为 CCF ODTC 成立后接受捐赠的第一个开源项目。
|
||||
|
||||

|
||||
|
||||
|
||||
@@ -79,7 +79,7 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
|
||||
|
||||
r := httpx.GinEngine(config.Global.RunMode, config.HTTP,
|
||||
configCvalCache.PrintBodyPaths, configCvalCache.PrintAccessLog)
|
||||
rt := router.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors)
|
||||
rt := router.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors, config.Log.Dir)
|
||||
|
||||
if config.Ibex.Enable {
|
||||
ibex.ServerStart(false, nil, redis, config.HTTP.APIForService.BasicAuth, config.Alert.Heartbeat, &config.CenterApi, r, nil, config.Ibex, config.HTTP.Port)
|
||||
|
||||
@@ -8,7 +8,6 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/aconf"
|
||||
"github.com/ccfos/nightingale/v6/alert/common"
|
||||
"github.com/ccfos/nightingale/v6/alert/queue"
|
||||
"github.com/ccfos/nightingale/v6/memsto"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
@@ -99,12 +98,12 @@ func (e *Consumer) consumeOne(event *models.AlertCurEvent) {
|
||||
e.dispatch.Astats.CounterAlertsTotal.WithLabelValues(event.Cluster, eventType, event.GroupName).Inc()
|
||||
|
||||
if err := event.ParseRule("rule_name"); err != nil {
|
||||
logger.Warningf("ruleid:%d failed to parse rule name: %v", event.RuleId, err)
|
||||
logger.Warningf("alert_eval_%d datasource_%d failed to parse rule name: %v", event.RuleId, event.DatasourceId, err)
|
||||
event.RuleName = fmt.Sprintf("failed to parse rule name: %v", err)
|
||||
}
|
||||
|
||||
if err := event.ParseRule("annotations"); err != nil {
|
||||
logger.Warningf("ruleid:%d failed to parse annotations: %v", event.RuleId, err)
|
||||
logger.Warningf("alert_eval_%d datasource_%d failed to parse annotations: %v", event.RuleId, event.DatasourceId, err)
|
||||
event.Annotations = fmt.Sprintf("failed to parse annotations: %v", err)
|
||||
event.AnnotationsJSON["error"] = event.Annotations
|
||||
}
|
||||
@@ -112,7 +111,7 @@ func (e *Consumer) consumeOne(event *models.AlertCurEvent) {
|
||||
e.queryRecoveryVal(event)
|
||||
|
||||
if err := event.ParseRule("rule_note"); err != nil {
|
||||
logger.Warningf("ruleid:%d failed to parse rule note: %v", event.RuleId, err)
|
||||
logger.Warningf("alert_eval_%d datasource_%d failed to parse rule note: %v", event.RuleId, event.DatasourceId, err)
|
||||
event.RuleNote = fmt.Sprintf("failed to parse rule note: %v", err)
|
||||
}
|
||||
|
||||
@@ -131,7 +130,7 @@ func (e *Consumer) persist(event *models.AlertCurEvent) {
|
||||
var err error
|
||||
event.Id, err = poster.PostByUrlsWithResp[int64](e.ctx, "/v1/n9e/event-persist", event)
|
||||
if err != nil {
|
||||
logger.Errorf("event:%+v persist err:%v", event, err)
|
||||
logger.Errorf("event:%s persist err:%v", event.Hash, err)
|
||||
e.dispatch.Astats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", event.DatasourceId), "persist_event", event.GroupName, fmt.Sprintf("%v", event.RuleId)).Inc()
|
||||
}
|
||||
return
|
||||
@@ -139,7 +138,7 @@ func (e *Consumer) persist(event *models.AlertCurEvent) {
|
||||
|
||||
err := models.EventPersist(e.ctx, event)
|
||||
if err != nil {
|
||||
logger.Errorf("event%+v persist err:%v", event, err)
|
||||
logger.Errorf("event:%s persist err:%v", event.Hash, err)
|
||||
e.dispatch.Astats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", event.DatasourceId), "persist_event", event.GroupName, fmt.Sprintf("%v", event.RuleId)).Inc()
|
||||
}
|
||||
}
|
||||
@@ -157,12 +156,12 @@ func (e *Consumer) queryRecoveryVal(event *models.AlertCurEvent) {
|
||||
|
||||
promql = strings.TrimSpace(promql)
|
||||
if promql == "" {
|
||||
logger.Warningf("rule_eval:%s promql is blank", getKey(event))
|
||||
logger.Warningf("alert_eval_%d datasource_%d promql is blank", event.RuleId, event.DatasourceId)
|
||||
return
|
||||
}
|
||||
|
||||
if e.promClients.IsNil(event.DatasourceId) {
|
||||
logger.Warningf("rule_eval:%s error reader client is nil", getKey(event))
|
||||
logger.Warningf("alert_eval_%d datasource_%d error reader client is nil", event.RuleId, event.DatasourceId)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -171,7 +170,7 @@ func (e *Consumer) queryRecoveryVal(event *models.AlertCurEvent) {
|
||||
var warnings promsdk.Warnings
|
||||
value, warnings, err := readerClient.Query(e.ctx.Ctx, promql, time.Now())
|
||||
if err != nil {
|
||||
logger.Errorf("rule_eval:%s promql:%s, error:%v", getKey(event), promql, err)
|
||||
logger.Errorf("alert_eval_%d datasource_%d promql:%s, error:%v", event.RuleId, event.DatasourceId, promql, err)
|
||||
event.AnnotationsJSON["recovery_promql_error"] = fmt.Sprintf("promql:%s error:%v", promql, err)
|
||||
|
||||
b, err := json.Marshal(event.AnnotationsJSON)
|
||||
@@ -185,12 +184,12 @@ func (e *Consumer) queryRecoveryVal(event *models.AlertCurEvent) {
|
||||
}
|
||||
|
||||
if len(warnings) > 0 {
|
||||
logger.Errorf("rule_eval:%s promql:%s, warnings:%v", getKey(event), promql, warnings)
|
||||
logger.Errorf("alert_eval_%d datasource_%d promql:%s, warnings:%v", event.RuleId, event.DatasourceId, promql, warnings)
|
||||
}
|
||||
|
||||
anomalyPoints := models.ConvertAnomalyPoints(value)
|
||||
if len(anomalyPoints) == 0 {
|
||||
logger.Warningf("rule_eval:%s promql:%s, result is empty", getKey(event), promql)
|
||||
logger.Warningf("alert_eval_%d datasource_%d promql:%s, result is empty", event.RuleId, event.DatasourceId, promql)
|
||||
event.AnnotationsJSON["recovery_promql_error"] = fmt.Sprintf("promql:%s error:%s", promql, "result is empty")
|
||||
} else {
|
||||
event.AnnotationsJSON["recovery_value"] = fmt.Sprintf("%v", anomalyPoints[0].Value)
|
||||
@@ -205,6 +204,3 @@ func (e *Consumer) queryRecoveryVal(event *models.AlertCurEvent) {
|
||||
}
|
||||
}
|
||||
|
||||
func getKey(event *models.AlertCurEvent) string {
|
||||
return common.RuleKey(event.DatasourceId, event.RuleId)
|
||||
}
|
||||
|
||||
@@ -16,6 +16,7 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/alert/astats"
|
||||
"github.com/ccfos/nightingale/v6/alert/common"
|
||||
"github.com/ccfos/nightingale/v6/alert/pipeline"
|
||||
"github.com/ccfos/nightingale/v6/alert/pipeline/engine"
|
||||
"github.com/ccfos/nightingale/v6/alert/sender"
|
||||
"github.com/ccfos/nightingale/v6/memsto"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
@@ -170,7 +171,7 @@ func (e *Dispatch) HandleEventWithNotifyRule(eventOrigin *models.AlertCurEvent)
|
||||
// 深拷贝新的 event,避免并发修改 event 冲突
|
||||
eventCopy := eventOrigin.DeepCopy()
|
||||
|
||||
logger.Infof("notify rule ids: %v, event: %+v", notifyRuleId, eventCopy)
|
||||
logger.Infof("notify rule ids: %v, event: %s", notifyRuleId, eventCopy.Hash)
|
||||
notifyRule := e.notifyRuleCache.Get(notifyRuleId)
|
||||
if notifyRule == nil {
|
||||
continue
|
||||
@@ -184,7 +185,7 @@ func (e *Dispatch) HandleEventWithNotifyRule(eventOrigin *models.AlertCurEvent)
|
||||
|
||||
eventCopy = HandleEventPipeline(notifyRule.PipelineConfigs, eventOrigin, eventCopy, e.eventProcessorCache, e.ctx, notifyRuleId, "notify_rule")
|
||||
if ShouldSkipNotify(e.ctx, eventCopy, notifyRuleId) {
|
||||
logger.Infof("notify_id: %d, event:%+v, should skip notify", notifyRuleId, eventCopy)
|
||||
logger.Infof("notify_id: %d, event:%s, should skip notify", notifyRuleId, eventCopy.Hash)
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -192,7 +193,7 @@ func (e *Dispatch) HandleEventWithNotifyRule(eventOrigin *models.AlertCurEvent)
|
||||
for i := range notifyRule.NotifyConfigs {
|
||||
err := NotifyRuleMatchCheck(¬ifyRule.NotifyConfigs[i], eventCopy)
|
||||
if err != nil {
|
||||
logger.Errorf("notify_id: %d, event:%+v, channel_id:%d, template_id: %d, notify_config:%+v, err:%v", notifyRuleId, eventCopy, notifyRule.NotifyConfigs[i].ChannelID, notifyRule.NotifyConfigs[i].TemplateID, notifyRule.NotifyConfigs[i], err)
|
||||
logger.Errorf("notify_id: %d, event:%s, channel_id:%d, template_id: %d, notify_config:%+v, err:%v", notifyRuleId, eventCopy.Hash, notifyRule.NotifyConfigs[i].ChannelID, notifyRule.NotifyConfigs[i].TemplateID, notifyRule.NotifyConfigs[i], err)
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -200,12 +201,12 @@ func (e *Dispatch) HandleEventWithNotifyRule(eventOrigin *models.AlertCurEvent)
|
||||
messageTemplate := e.messageTemplateCache.Get(notifyRule.NotifyConfigs[i].TemplateID)
|
||||
if notifyChannel == nil {
|
||||
sender.NotifyRecord(e.ctx, []*models.AlertCurEvent{eventCopy}, notifyRuleId, fmt.Sprintf("notify_channel_id:%d", notifyRule.NotifyConfigs[i].ChannelID), "", "", errors.New("notify_channel not found"))
|
||||
logger.Warningf("notify_id: %d, event:%+v, channel_id:%d, template_id: %d, notify_channel not found", notifyRuleId, eventCopy, notifyRule.NotifyConfigs[i].ChannelID, notifyRule.NotifyConfigs[i].TemplateID)
|
||||
logger.Warningf("notify_id: %d, event:%s, channel_id:%d, template_id: %d, notify_channel not found", notifyRuleId, eventCopy.Hash, notifyRule.NotifyConfigs[i].ChannelID, notifyRule.NotifyConfigs[i].TemplateID)
|
||||
continue
|
||||
}
|
||||
|
||||
if notifyChannel.RequestType != "flashduty" && notifyChannel.RequestType != "pagerduty" && messageTemplate == nil {
|
||||
logger.Warningf("notify_id: %d, channel_name: %v, event:%+v, template_id: %d, message_template not found", notifyRuleId, notifyChannel.Ident, eventCopy, notifyRule.NotifyConfigs[i].TemplateID)
|
||||
logger.Warningf("notify_id: %d, channel_name: %v, event:%s, template_id: %d, message_template not found", notifyRuleId, notifyChannel.Ident, eventCopy.Hash, notifyRule.NotifyConfigs[i].TemplateID)
|
||||
sender.NotifyRecord(e.ctx, []*models.AlertCurEvent{eventCopy}, notifyRuleId, notifyChannel.Name, "", "", errors.New("message_template not found"))
|
||||
|
||||
continue
|
||||
@@ -231,6 +232,8 @@ func shouldSkipNotify(ctx *ctx.Context, event *models.AlertCurEvent, notifyRuleI
|
||||
}
|
||||
|
||||
func HandleEventPipeline(pipelineConfigs []models.PipelineConfig, eventOrigin, event *models.AlertCurEvent, eventProcessorCache *memsto.EventProcessorCacheType, ctx *ctx.Context, id int64, from string) *models.AlertCurEvent {
|
||||
workflowEngine := engine.NewWorkflowEngine(ctx)
|
||||
|
||||
for _, pipelineConfig := range pipelineConfigs {
|
||||
if !pipelineConfig.Enable {
|
||||
continue
|
||||
@@ -238,32 +241,37 @@ func HandleEventPipeline(pipelineConfigs []models.PipelineConfig, eventOrigin, e
|
||||
|
||||
eventPipeline := eventProcessorCache.Get(pipelineConfig.PipelineId)
|
||||
if eventPipeline == nil {
|
||||
logger.Warningf("processor_by_%s_id:%d pipeline_id:%d, event pipeline not found, event: %+v", from, id, pipelineConfig.PipelineId, event)
|
||||
logger.Warningf("processor_by_%s_id:%d pipeline_id:%d, event pipeline not found, event: %s", from, id, pipelineConfig.PipelineId, event.Hash)
|
||||
continue
|
||||
}
|
||||
|
||||
if !PipelineApplicable(eventPipeline, event) {
|
||||
logger.Debugf("processor_by_%s_id:%d pipeline_id:%d, event pipeline not applicable, event: %+v", from, id, pipelineConfig.PipelineId, event)
|
||||
logger.Debugf("processor_by_%s_id:%d pipeline_id:%d, event pipeline not applicable, event: %s", from, id, pipelineConfig.PipelineId, event.Hash)
|
||||
continue
|
||||
}
|
||||
|
||||
processors := eventProcessorCache.GetProcessorsById(pipelineConfig.PipelineId)
|
||||
for _, processor := range processors {
|
||||
var res string
|
||||
var err error
|
||||
logger.Infof("processor_by_%s_id:%d pipeline_id:%d, before processor:%+v, event: %+v", from, id, pipelineConfig.PipelineId, processor, event)
|
||||
event, res, err = processor.Process(ctx, event)
|
||||
if event == nil {
|
||||
logger.Infof("processor_by_%s_id:%d pipeline_id:%d, event dropped, after processor:%+v, event: %+v", from, id, pipelineConfig.PipelineId, processor, eventOrigin)
|
||||
|
||||
if from == "notify_rule" {
|
||||
// alert_rule 获取不到 eventId 记录没有意义
|
||||
sender.NotifyRecord(ctx, []*models.AlertCurEvent{eventOrigin}, id, "", "", res, fmt.Errorf("processor_by_%s_id:%d pipeline_id:%d, drop by processor", from, id, pipelineConfig.PipelineId))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
logger.Infof("processor_by_%s_id:%d pipeline_id:%d, after processor:%+v, event: %+v, res:%v, err:%v", from, id, pipelineConfig.PipelineId, processor, event, res, err)
|
||||
// 统一使用工作流引擎执行(兼容线性模式和工作流模式)
|
||||
triggerCtx := &models.WorkflowTriggerContext{
|
||||
Mode: models.TriggerModeEvent,
|
||||
TriggerBy: from + "_" + strconv.FormatInt(id, 10),
|
||||
}
|
||||
|
||||
resultEvent, result, err := workflowEngine.Execute(eventPipeline, event, triggerCtx)
|
||||
if err != nil {
|
||||
logger.Errorf("processor_by_%s_id:%d pipeline_id:%d, pipeline execute error: %v", from, id, pipelineConfig.PipelineId, err)
|
||||
continue
|
||||
}
|
||||
|
||||
if resultEvent == nil {
|
||||
logger.Infof("processor_by_%s_id:%d pipeline_id:%d, event dropped, event: %s", from, id, pipelineConfig.PipelineId, eventOrigin.Hash)
|
||||
if from == "notify_rule" {
|
||||
sender.NotifyRecord(ctx, []*models.AlertCurEvent{eventOrigin}, id, "", "", result.Message, fmt.Errorf("processor_by_%s_id:%d pipeline_id:%d, drop by pipeline", from, id, pipelineConfig.PipelineId))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
event = resultEvent
|
||||
logger.Infof("processor_by_%s_id:%d pipeline_id:%d, pipeline executed, status:%s, message:%s", from, id, pipelineConfig.PipelineId, result.Status, result.Message)
|
||||
}
|
||||
|
||||
event.FE2DB()
|
||||
@@ -293,7 +301,7 @@ func PipelineApplicable(pipeline *models.EventPipeline, event *models.AlertCurEv
|
||||
|
||||
tagFilters, err := models.ParseTagFilter(labelFiltersCopy)
|
||||
if err != nil {
|
||||
logger.Errorf("pipeline applicable failed to parse tag filter: %v event:%+v pipeline:%+v", err, event, pipeline)
|
||||
logger.Errorf("pipeline applicable failed to parse tag filter: %v event:%s pipeline:%+v", err, event.Hash, pipeline)
|
||||
return false
|
||||
}
|
||||
tagMatch = common.MatchTags(event.TagsMap, tagFilters)
|
||||
@@ -307,7 +315,7 @@ func PipelineApplicable(pipeline *models.EventPipeline, event *models.AlertCurEv
|
||||
|
||||
tagFilters, err := models.ParseTagFilter(attrFiltersCopy)
|
||||
if err != nil {
|
||||
logger.Errorf("pipeline applicable failed to parse tag filter: %v event:%+v pipeline:%+v err:%v", tagFilters, event, pipeline, err)
|
||||
logger.Errorf("pipeline applicable failed to parse tag filter: %v event:%s pipeline:%+v err:%v", tagFilters, event.Hash, pipeline, err)
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -397,7 +405,7 @@ func NotifyRuleMatchCheck(notifyConfig *models.NotifyConfig, event *models.Alert
|
||||
|
||||
tagFilters, err := models.ParseTagFilter(labelKeysCopy)
|
||||
if err != nil {
|
||||
logger.Errorf("notify send failed to parse tag filter: %v event:%+v notify_config:%+v", err, event, notifyConfig)
|
||||
logger.Errorf("notify send failed to parse tag filter: %v event:%s notify_config:%+v", err, event.Hash, notifyConfig)
|
||||
return fmt.Errorf("failed to parse tag filter: %v", err)
|
||||
}
|
||||
tagMatch = common.MatchTags(event.TagsMap, tagFilters)
|
||||
@@ -415,7 +423,7 @@ func NotifyRuleMatchCheck(notifyConfig *models.NotifyConfig, event *models.Alert
|
||||
|
||||
tagFilters, err := models.ParseTagFilter(attributesCopy)
|
||||
if err != nil {
|
||||
logger.Errorf("notify send failed to parse tag filter: %v event:%+v notify_config:%+v err:%v", tagFilters, event, notifyConfig, err)
|
||||
logger.Errorf("notify send failed to parse tag filter: %v event:%s notify_config:%+v err:%v", tagFilters, event.Hash, notifyConfig, err)
|
||||
return fmt.Errorf("failed to parse tag filter: %v", err)
|
||||
}
|
||||
|
||||
@@ -426,7 +434,7 @@ func NotifyRuleMatchCheck(notifyConfig *models.NotifyConfig, event *models.Alert
|
||||
return fmt.Errorf("event attributes not match attributes filter")
|
||||
}
|
||||
|
||||
logger.Infof("notify send timeMatch:%v severityMatch:%v tagMatch:%v attributesMatch:%v event:%+v notify_config:%+v", timeMatch, severityMatch, tagMatch, attributesMatch, event, notifyConfig)
|
||||
logger.Infof("notify send timeMatch:%v severityMatch:%v tagMatch:%v attributesMatch:%v event:%s notify_config:%+v", timeMatch, severityMatch, tagMatch, attributesMatch, event.Hash, notifyConfig)
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -538,8 +546,8 @@ func SendNotifyRuleMessage(ctx *ctx.Context, userCache *memsto.UserCacheType, us
|
||||
for i := range flashDutyChannelIDs {
|
||||
start := time.Now()
|
||||
respBody, err := notifyChannel.SendFlashDuty(events, flashDutyChannelIDs[i], notifyChannelCache.GetHttpClient(notifyChannel.ID))
|
||||
respBody = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), respBody)
|
||||
logger.Infof("duty_sender notify_id: %d, channel_name: %v, event:%+v, IntegrationUrl: %v dutychannel_id: %v, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0], notifyChannel.RequestConfig.FlashDutyRequestConfig.IntegrationUrl, flashDutyChannelIDs[i], respBody, err)
|
||||
respBody = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), respBody)
|
||||
logger.Infof("duty_sender notify_id: %d, channel_name: %v, event:%s, IntegrationUrl: %v dutychannel_id: %v, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0].Hash, notifyChannel.RequestConfig.FlashDutyRequestConfig.IntegrationUrl, flashDutyChannelIDs[i], respBody, err)
|
||||
sender.NotifyRecord(ctx, events, notifyRuleId, notifyChannel.Name, strconv.FormatInt(flashDutyChannelIDs[i], 10), respBody, err)
|
||||
}
|
||||
|
||||
@@ -547,8 +555,8 @@ func SendNotifyRuleMessage(ctx *ctx.Context, userCache *memsto.UserCacheType, us
|
||||
for _, routingKey := range pagerdutyRoutingKeys {
|
||||
start := time.Now()
|
||||
respBody, err := notifyChannel.SendPagerDuty(events, routingKey, siteInfo.SiteUrl, notifyChannelCache.GetHttpClient(notifyChannel.ID))
|
||||
respBody = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), respBody)
|
||||
logger.Infof("pagerduty_sender notify_id: %d, channel_name: %v, event:%+v, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0], respBody, err)
|
||||
respBody = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), respBody)
|
||||
logger.Infof("pagerduty_sender notify_id: %d, channel_name: %v, event:%s, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0].Hash, respBody, err)
|
||||
sender.NotifyRecord(ctx, events, notifyRuleId, notifyChannel.Name, "", respBody, err)
|
||||
}
|
||||
|
||||
@@ -578,11 +586,11 @@ func SendNotifyRuleMessage(ctx *ctx.Context, userCache *memsto.UserCacheType, us
|
||||
case "script":
|
||||
start := time.Now()
|
||||
target, res, err := notifyChannel.SendScript(events, tplContent, customParams, sendtos)
|
||||
res = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), res)
|
||||
logger.Infof("script_sender notify_id: %d, channel_name: %v, event:%+v, tplContent:%s, customParams:%v, target:%s, res:%s, err:%v", notifyRuleId, notifyChannel.Name, events[0], tplContent, customParams, target, res, err)
|
||||
res = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), res)
|
||||
logger.Infof("script_sender notify_id: %d, channel_name: %v, event:%s, tplContent:%s, customParams:%v, target:%s, res:%s, err:%v", notifyRuleId, notifyChannel.Name, events[0].Hash, tplContent, customParams, target, res, err)
|
||||
sender.NotifyRecord(ctx, events, notifyRuleId, notifyChannel.Name, target, res, err)
|
||||
default:
|
||||
logger.Warningf("notify_id: %d, channel_name: %v, event:%+v send type not found", notifyRuleId, notifyChannel.Name, events[0])
|
||||
logger.Warningf("notify_id: %d, channel_name: %v, event:%s send type not found", notifyRuleId, notifyChannel.Name, events[0].Hash)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -726,7 +734,7 @@ func (e *Dispatch) Send(rule *models.AlertRule, event *models.AlertCurEvent, not
|
||||
event = msgCtx.Events[0]
|
||||
}
|
||||
|
||||
logger.Debugf("send to channel:%s event:%+v users:%+v", channel, event, msgCtx.Users)
|
||||
logger.Debugf("send to channel:%s event:%s users:%+v", channel, event.Hash, msgCtx.Users)
|
||||
s.Send(msgCtx)
|
||||
}
|
||||
}
|
||||
@@ -825,12 +833,12 @@ func (e *Dispatch) HandleIbex(rule *models.AlertRule, event *models.AlertCurEven
|
||||
|
||||
if len(t.Host) == 0 {
|
||||
sender.CallIbex(e.ctx, t.TplId, event.TargetIdent,
|
||||
e.taskTplsCache, e.targetCache, e.userCache, event)
|
||||
e.taskTplsCache, e.targetCache, e.userCache, event, "")
|
||||
continue
|
||||
}
|
||||
for _, host := range t.Host {
|
||||
sender.CallIbex(e.ctx, t.TplId, host,
|
||||
e.taskTplsCache, e.targetCache, e.userCache, event)
|
||||
e.taskTplsCache, e.targetCache, e.userCache, event, "")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,11 +18,11 @@ func LogEvent(event *models.AlertCurEvent, location string, err ...error) {
|
||||
}
|
||||
|
||||
logger.Infof(
|
||||
"event(%s %s) %s: rule_id=%d sub_id:%d notify_rule_ids:%v cluster:%s %v%s@%d last_eval_time:%d %s",
|
||||
"alert_eval_%d event(%s %s) %s: sub_id:%d notify_rule_ids:%v cluster:%s %v%s@%d last_eval_time:%d %s",
|
||||
event.RuleId,
|
||||
event.Hash,
|
||||
status,
|
||||
location,
|
||||
event.RuleId,
|
||||
event.SubRuleId,
|
||||
event.NotifyRuleIds,
|
||||
event.Cluster,
|
||||
|
||||
@@ -101,17 +101,17 @@ func (s *Scheduler) syncAlertRules() {
|
||||
}
|
||||
ds := s.datasourceCache.GetById(dsId)
|
||||
if ds == nil {
|
||||
logger.Debugf("datasource %d not found", dsId)
|
||||
logger.Debugf("alert_eval_%d datasource %d not found", rule.Id, dsId)
|
||||
continue
|
||||
}
|
||||
|
||||
if ds.PluginType != ruleType {
|
||||
logger.Debugf("datasource %d category is %s not %s", dsId, ds.PluginType, ruleType)
|
||||
logger.Debugf("alert_eval_%d datasource %d category is %s not %s", rule.Id, dsId, ds.PluginType, ruleType)
|
||||
continue
|
||||
}
|
||||
|
||||
if ds.Status != "enabled" {
|
||||
logger.Debugf("datasource %d status is %s", dsId, ds.Status)
|
||||
logger.Debugf("alert_eval_%d datasource %d status is %s", rule.Id, dsId, ds.Status)
|
||||
continue
|
||||
}
|
||||
processor := process.NewProcessor(s.aconf.Heartbeat.EngineName, rule, dsId, s.alertRuleCache, s.targetCache, s.targetsOfAlertRuleCache, s.busiGroupCache, s.alertMuteCache, s.datasourceCache, s.ctx, s.stats)
|
||||
@@ -134,12 +134,12 @@ func (s *Scheduler) syncAlertRules() {
|
||||
for _, dsId := range dsIds {
|
||||
ds := s.datasourceCache.GetById(dsId)
|
||||
if ds == nil {
|
||||
logger.Debugf("datasource %d not found", dsId)
|
||||
logger.Debugf("alert_eval_%d datasource %d not found", rule.Id, dsId)
|
||||
continue
|
||||
}
|
||||
|
||||
if ds.Status != "enabled" {
|
||||
logger.Debugf("datasource %d status is %s", dsId, ds.Status)
|
||||
logger.Debugf("alert_eval_%d datasource %d status is %s", rule.Id, dsId, ds.Status)
|
||||
continue
|
||||
}
|
||||
processor := process.NewProcessor(s.aconf.Heartbeat.EngineName, rule, dsId, s.alertRuleCache, s.targetCache, s.targetsOfAlertRuleCache, s.busiGroupCache, s.alertMuteCache, s.datasourceCache, s.ctx, s.stats)
|
||||
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"text/template"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/astats"
|
||||
@@ -24,6 +25,7 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/pkg/poster"
|
||||
promsdk "github.com/ccfos/nightingale/v6/pkg/prom"
|
||||
promql2 "github.com/ccfos/nightingale/v6/pkg/promql"
|
||||
"github.com/ccfos/nightingale/v6/pkg/tplx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/unit"
|
||||
"github.com/ccfos/nightingale/v6/prom"
|
||||
"github.com/prometheus/common/model"
|
||||
@@ -60,6 +62,7 @@ const (
|
||||
CHECK_QUERY = "check_query_config"
|
||||
GET_CLIENT = "get_client"
|
||||
QUERY_DATA = "query_data"
|
||||
EXEC_TEMPLATE = "exec_template"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -106,7 +109,7 @@ func NewAlertRuleWorker(rule *models.AlertRule, datasourceId int64, Processor *p
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
logger.Errorf("alert rule %s add cron pattern error: %v", arw.Key(), err)
|
||||
logger.Errorf("alert_eval_%d datasource_%d add cron pattern error: %v", arw.Rule.Id, arw.DatasourceId, err)
|
||||
}
|
||||
|
||||
Processor.ScheduleEntry = arw.Scheduler.Entry(entryID)
|
||||
@@ -149,9 +152,9 @@ func (arw *AlertRuleWorker) Eval() {
|
||||
|
||||
defer func() {
|
||||
if len(message) == 0 {
|
||||
logger.Infof("rule_eval:%s finished, duration:%v", arw.Key(), time.Since(begin))
|
||||
logger.Infof("alert_eval_%d datasource_%d finished, duration:%v", arw.Rule.Id, arw.DatasourceId, time.Since(begin))
|
||||
} else {
|
||||
logger.Infof("rule_eval:%s finished, duration:%v, message:%s", arw.Key(), time.Since(begin), message)
|
||||
logger.Warningf("alert_eval_%d datasource_%d finished, duration:%v, message:%s", arw.Rule.Id, arw.DatasourceId, time.Since(begin), message)
|
||||
}
|
||||
}()
|
||||
|
||||
@@ -186,8 +189,7 @@ func (arw *AlertRuleWorker) Eval() {
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
logger.Errorf("rule_eval:%s get anomaly point err:%s", arw.Key(), err.Error())
|
||||
message = "failed to get anomaly points"
|
||||
message = fmt.Sprintf("failed to get anomaly points: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -234,7 +236,7 @@ func (arw *AlertRuleWorker) Eval() {
|
||||
}
|
||||
|
||||
func (arw *AlertRuleWorker) Stop() {
|
||||
logger.Infof("rule_eval:%s stopped", arw.Key())
|
||||
logger.Infof("alert_eval_%d datasource_%d stopped", arw.Rule.Id, arw.DatasourceId)
|
||||
close(arw.Quit)
|
||||
c := arw.Scheduler.Stop()
|
||||
<-c.Done()
|
||||
@@ -250,7 +252,7 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
|
||||
var rule *models.PromRuleConfig
|
||||
if err := json.Unmarshal([]byte(ruleConfig), &rule); err != nil {
|
||||
logger.Errorf("rule_eval:%s rule_config:%s, error:%v", arw.Key(), ruleConfig, err)
|
||||
logger.Errorf("alert_eval_%d datasource_%d rule_config:%s, error:%v", arw.Rule.Id, arw.DatasourceId, ruleConfig, err)
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_RULE_CONFIG, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
fmt.Sprintf("%v", arw.Rule.Id),
|
||||
@@ -261,7 +263,7 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
}
|
||||
|
||||
if rule == nil {
|
||||
logger.Errorf("rule_eval:%s rule_config:%s, error:rule is nil", arw.Key(), ruleConfig)
|
||||
logger.Errorf("alert_eval_%d datasource_%d rule_config:%s, error:rule is nil", arw.Rule.Id, arw.DatasourceId, ruleConfig)
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_RULE_CONFIG, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
fmt.Sprintf("%v", arw.Rule.Id),
|
||||
@@ -276,7 +278,7 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
readerClient := arw.PromClients.GetCli(arw.DatasourceId)
|
||||
|
||||
if readerClient == nil {
|
||||
logger.Warningf("rule_eval:%s error reader client is nil", arw.Key())
|
||||
logger.Warningf("alert_eval_%d datasource_%d error reader client is nil", arw.Rule.Id, arw.DatasourceId)
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_CLIENT, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
fmt.Sprintf("%v", arw.Rule.Id),
|
||||
@@ -312,13 +314,13 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
// 无变量
|
||||
promql := strings.TrimSpace(query.PromQl)
|
||||
if promql == "" {
|
||||
logger.Warningf("rule_eval:%s promql is blank", arw.Key())
|
||||
logger.Warningf("alert_eval_%d datasource_%d promql is blank", arw.Rule.Id, arw.DatasourceId)
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), CHECK_QUERY, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
continue
|
||||
}
|
||||
|
||||
if arw.PromClients.IsNil(arw.DatasourceId) {
|
||||
logger.Warningf("rule_eval:%s error reader client is nil", arw.Key())
|
||||
logger.Warningf("alert_eval_%d datasource_%d error reader client is nil", arw.Rule.Id, arw.DatasourceId)
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_CLIENT, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
continue
|
||||
}
|
||||
@@ -327,7 +329,7 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
arw.Processor.Stats.CounterQueryDataTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId), fmt.Sprintf("%d", arw.Rule.Id)).Inc()
|
||||
value, warnings, err := readerClient.Query(context.Background(), promql, time.Now())
|
||||
if err != nil {
|
||||
logger.Errorf("rule_eval:%s promql:%s, error:%v", arw.Key(), promql, err)
|
||||
logger.Errorf("alert_eval_%d datasource_%d promql:%s, error:%v", arw.Rule.Id, arw.DatasourceId, promql, err)
|
||||
arw.Processor.Stats.CounterQueryDataErrorTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId)).Inc()
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), QUERY_DATA, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
@@ -339,12 +341,12 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
}
|
||||
|
||||
if len(warnings) > 0 {
|
||||
logger.Errorf("rule_eval:%s promql:%s, warnings:%v", arw.Key(), promql, warnings)
|
||||
logger.Errorf("alert_eval_%d datasource_%d promql:%s, warnings:%v", arw.Rule.Id, arw.DatasourceId, promql, warnings)
|
||||
arw.Processor.Stats.CounterQueryDataErrorTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId)).Inc()
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), QUERY_DATA, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
}
|
||||
|
||||
logger.Infof("rule_eval:%s query:%+v, value:%v", arw.Key(), query, value)
|
||||
logger.Infof("alert_eval_%d datasource_%d query:%+v, value:%v", arw.Rule.Id, arw.DatasourceId, query, value)
|
||||
points := models.ConvertAnomalyPoints(value)
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
fmt.Sprintf("%v", arw.Rule.Id),
|
||||
@@ -438,14 +440,14 @@ func (arw *AlertRuleWorker) VarFillingAfterQuery(query models.PromQuery, readerC
|
||||
arw.Processor.Stats.CounterQueryDataTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId), fmt.Sprintf("%d", arw.Rule.Id)).Inc()
|
||||
value, _, err := readerClient.Query(context.Background(), curQuery, time.Now())
|
||||
if err != nil {
|
||||
logger.Errorf("rule_eval:%s, promql:%s, error:%v", arw.Key(), curQuery, err)
|
||||
logger.Errorf("alert_eval_%d datasource_%d promql:%s, error:%v", arw.Rule.Id, arw.DatasourceId, curQuery, err)
|
||||
continue
|
||||
}
|
||||
seqVals := getSamples(value)
|
||||
// 得到参数变量的所有组合
|
||||
paramPermutation, err := arw.getParamPermutation(param, ParamKeys, varToLabel, query.PromQl, readerClient)
|
||||
if err != nil {
|
||||
logger.Errorf("rule_eval:%s, paramPermutation error:%v", arw.Key(), err)
|
||||
logger.Errorf("alert_eval_%d datasource_%d paramPermutation error:%v", arw.Rule.Id, arw.DatasourceId, err)
|
||||
continue
|
||||
}
|
||||
// 判断哪些参数值符合条件
|
||||
@@ -578,14 +580,14 @@ func (arw *AlertRuleWorker) getParamPermutation(paramVal map[string]models.Param
|
||||
case "host":
|
||||
hostIdents, err := arw.getHostIdents(paramQuery)
|
||||
if err != nil {
|
||||
logger.Errorf("rule_eval:%s, fail to get host idents, error:%v", arw.Key(), err)
|
||||
logger.Errorf("alert_eval_%d datasource_%d fail to get host idents, error:%v", arw.Rule.Id, arw.DatasourceId, err)
|
||||
break
|
||||
}
|
||||
params = hostIdents
|
||||
case "device":
|
||||
deviceIdents, err := arw.getDeviceIdents(paramQuery)
|
||||
if err != nil {
|
||||
logger.Errorf("rule_eval:%s, fail to get device idents, error:%v", arw.Key(), err)
|
||||
logger.Errorf("alert_eval_%d datasource_%d fail to get device idents, error:%v", arw.Rule.Id, arw.DatasourceId, err)
|
||||
break
|
||||
}
|
||||
params = deviceIdents
|
||||
@@ -594,12 +596,12 @@ func (arw *AlertRuleWorker) getParamPermutation(paramVal map[string]models.Param
|
||||
var query []string
|
||||
err := json.Unmarshal(q, &query)
|
||||
if err != nil {
|
||||
logger.Errorf("query:%s fail to unmarshalling into string slice, error:%v", paramQuery.Query, err)
|
||||
logger.Errorf("alert_eval_%d datasource_%d query:%s fail to unmarshalling into string slice, error:%v", arw.Rule.Id, arw.DatasourceId, paramQuery.Query, err)
|
||||
}
|
||||
if len(query) == 0 {
|
||||
paramsKeyAllLabel, err := getParamKeyAllLabel(varToLabel[paramKey], originPromql, readerClient, arw.DatasourceId, arw.Rule.Id, arw.Processor.Stats)
|
||||
if err != nil {
|
||||
logger.Errorf("rule_eval:%s, fail to getParamKeyAllLabel, error:%v query:%s", arw.Key(), err, paramQuery.Query)
|
||||
logger.Errorf("alert_eval_%d datasource_%d fail to getParamKeyAllLabel, error:%v query:%s", arw.Rule.Id, arw.DatasourceId, err, paramQuery.Query)
|
||||
}
|
||||
params = paramsKeyAllLabel
|
||||
} else {
|
||||
@@ -613,7 +615,7 @@ func (arw *AlertRuleWorker) getParamPermutation(paramVal map[string]models.Param
|
||||
return nil, fmt.Errorf("param key: %s, params is empty", paramKey)
|
||||
}
|
||||
|
||||
logger.Infof("rule_eval:%s paramKey: %s, params: %v", arw.Key(), paramKey, params)
|
||||
logger.Infof("alert_eval_%d datasource_%d paramKey: %s, params: %v", arw.Rule.Id, arw.DatasourceId, paramKey, params)
|
||||
paramMap[paramKey] = params
|
||||
}
|
||||
|
||||
@@ -764,7 +766,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
|
||||
var rule *models.HostRuleConfig
|
||||
if err := json.Unmarshal([]byte(ruleConfig), &rule); err != nil {
|
||||
logger.Errorf("rule_eval:%s rule_config:%s, error:%v", arw.Key(), ruleConfig, err)
|
||||
logger.Errorf("alert_eval_%d datasource_%d rule_config:%s, error:%v", arw.Rule.Id, arw.DatasourceId, ruleConfig, err)
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_RULE_CONFIG, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
fmt.Sprintf("%v", arw.Rule.Id),
|
||||
@@ -775,7 +777,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
}
|
||||
|
||||
if rule == nil {
|
||||
logger.Errorf("rule_eval:%s rule_config:%s, error:rule is nil", arw.Key(), ruleConfig)
|
||||
logger.Errorf("alert_eval_%d datasource_%d rule_config:%s, error:rule is nil", arw.Rule.Id, arw.DatasourceId, ruleConfig)
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_RULE_CONFIG, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
fmt.Sprintf("%v", arw.Rule.Id),
|
||||
@@ -798,7 +800,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
// 如果是中心节点, 将不再上报数据的主机 engineName 为空的机器,也加入到 targets 中
|
||||
missEngineIdents, exists = arw.Processor.TargetsOfAlertRuleCache.Get("", arw.Rule.Id)
|
||||
if !exists {
|
||||
logger.Debugf("rule_eval:%s targets not found engineName:%s", arw.Key(), arw.Processor.EngineName)
|
||||
logger.Debugf("alert_eval_%d datasource_%d targets not found engineName:%s", arw.Rule.Id, arw.DatasourceId, arw.Processor.EngineName)
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), QUERY_DATA, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
}
|
||||
}
|
||||
@@ -806,7 +808,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
|
||||
engineIdents, exists = arw.Processor.TargetsOfAlertRuleCache.Get(arw.Processor.EngineName, arw.Rule.Id)
|
||||
if !exists {
|
||||
logger.Warningf("rule_eval:%s targets not found engineName:%s", arw.Key(), arw.Processor.EngineName)
|
||||
logger.Warningf("alert_eval_%d datasource_%d targets not found engineName:%s", arw.Rule.Id, arw.DatasourceId, arw.Processor.EngineName)
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), QUERY_DATA, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
}
|
||||
idents = append(idents, engineIdents...)
|
||||
@@ -833,7 +835,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
"",
|
||||
).Set(float64(len(missTargets)))
|
||||
|
||||
logger.Debugf("rule_eval:%s missTargets:%v", arw.Key(), missTargets)
|
||||
logger.Debugf("alert_eval_%d datasource_%d missTargets:%v", arw.Rule.Id, arw.DatasourceId, missTargets)
|
||||
targets := arw.Processor.TargetCache.Gets(missTargets)
|
||||
for _, target := range targets {
|
||||
m := make(map[string]string)
|
||||
@@ -842,7 +844,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
}
|
||||
m["ident"] = target.Ident
|
||||
|
||||
lst = append(lst, models.NewAnomalyPoint(trigger.Type, m, now, float64(now-target.UpdateAt), trigger.Severity))
|
||||
lst = append(lst, models.NewAnomalyPoint(trigger.Type, m, now, float64(now-target.BeatTime), trigger.Severity))
|
||||
}
|
||||
case "offset":
|
||||
idents, exists := arw.Processor.TargetsOfAlertRuleCache.Get(arw.Processor.EngineName, arw.Rule.Id)
|
||||
@@ -852,7 +854,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
fmt.Sprintf("%v", arw.Processor.DatasourceId()),
|
||||
"",
|
||||
).Set(0)
|
||||
logger.Warningf("rule_eval:%s targets not found", arw.Key())
|
||||
logger.Warningf("alert_eval_%d datasource_%d targets not found", arw.Rule.Id, arw.DatasourceId)
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), QUERY_DATA, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
continue
|
||||
}
|
||||
@@ -871,7 +873,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
continue
|
||||
}
|
||||
if target, exists := targetMap[ident]; exists {
|
||||
if now-target.UpdateAt > 120 {
|
||||
if now-target.BeatTime > 120 {
|
||||
// means this target is not a active host, do not check offset
|
||||
continue
|
||||
}
|
||||
@@ -883,7 +885,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
}
|
||||
}
|
||||
|
||||
logger.Debugf("rule_eval:%s offsetIdents:%v", arw.Key(), offsetIdents)
|
||||
logger.Debugf("alert_eval_%d datasource_%d offsetIdents:%v", arw.Rule.Id, arw.DatasourceId, offsetIdents)
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
fmt.Sprintf("%v", arw.Rule.Id),
|
||||
fmt.Sprintf("%v", arw.Processor.DatasourceId()),
|
||||
@@ -910,7 +912,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
fmt.Sprintf("%v", arw.Processor.DatasourceId()),
|
||||
"",
|
||||
).Set(0)
|
||||
logger.Warningf("rule_eval:%s targets not found", arw.Key())
|
||||
logger.Warningf("alert_eval_%d datasource_%d targets not found", arw.Rule.Id, arw.DatasourceId)
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), QUERY_DATA, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
continue
|
||||
}
|
||||
@@ -922,7 +924,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
missTargets = append(missTargets, ident)
|
||||
}
|
||||
}
|
||||
logger.Debugf("rule_eval:%s missTargets:%v", arw.Key(), missTargets)
|
||||
logger.Debugf("alert_eval_%d datasource_%d missTargets:%v", arw.Rule.Id, arw.DatasourceId, missTargets)
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
fmt.Sprintf("%v", arw.Rule.Id),
|
||||
fmt.Sprintf("%v", arw.Processor.DatasourceId()),
|
||||
@@ -1118,7 +1120,7 @@ func ProcessJoins(ruleId int64, trigger models.Trigger, seriesTagIndexes map[str
|
||||
|
||||
// 有 join 条件,按条件依次合并
|
||||
if len(seriesTagIndexes) < len(trigger.Joins)+1 {
|
||||
logger.Errorf("rule_eval rid:%d queries' count: %d not match join condition's count: %d", ruleId, len(seriesTagIndexes), len(trigger.Joins))
|
||||
logger.Errorf("alert_eval_%d queries' count: %d not match join condition's count: %d", ruleId, len(seriesTagIndexes), len(trigger.Joins))
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -1154,7 +1156,7 @@ func ProcessJoins(ruleId int64, trigger models.Trigger, seriesTagIndexes map[str
|
||||
lastRehashed = exclude(curRehashed, lastRehashed)
|
||||
last = flatten(lastRehashed)
|
||||
default:
|
||||
logger.Warningf("rule_eval rid:%d join type:%s not support", ruleId, trigger.Joins[i].JoinType)
|
||||
logger.Warningf("alert_eval_%d join type:%s not support", ruleId, trigger.Joins[i].JoinType)
|
||||
}
|
||||
}
|
||||
return last
|
||||
@@ -1274,7 +1276,7 @@ func (arw *AlertRuleWorker) VarFillingBeforeQuery(query models.PromQuery, reader
|
||||
// 得到参数变量的所有组合
|
||||
paramPermutation, err := arw.getParamPermutation(param, ParamKeys, varToLabel, query.PromQl, readerClient)
|
||||
if err != nil {
|
||||
logger.Errorf("rule_eval:%s, paramPermutation error:%v", arw.Key(), err)
|
||||
logger.Errorf("alert_eval_%d datasource_%d paramPermutation error:%v", arw.Rule.Id, arw.DatasourceId, err)
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -1302,10 +1304,10 @@ func (arw *AlertRuleWorker) VarFillingBeforeQuery(query models.PromQuery, reader
|
||||
arw.Processor.Stats.CounterQueryDataTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId), fmt.Sprintf("%d", arw.Rule.Id)).Inc()
|
||||
value, _, err := readerClient.Query(context.Background(), promql, time.Now())
|
||||
if err != nil {
|
||||
logger.Errorf("rule_eval:%s, promql:%s, error:%v", arw.Key(), promql, err)
|
||||
logger.Errorf("alert_eval_%d datasource_%d promql:%s, error:%v", arw.Rule.Id, arw.DatasourceId, promql, err)
|
||||
return
|
||||
}
|
||||
logger.Infof("rule_eval:%s, promql:%s, value:%+v", arw.Key(), promql, value)
|
||||
logger.Infof("alert_eval_%d datasource_%d promql:%s, value:%+v", arw.Rule.Id, arw.DatasourceId, promql, value)
|
||||
|
||||
points := models.ConvertAnomalyPoints(value)
|
||||
if len(points) == 0 {
|
||||
@@ -1444,7 +1446,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
recoverPoints := []models.AnomalyPoint{}
|
||||
ruleConfig := strings.TrimSpace(rule.RuleConfig)
|
||||
if ruleConfig == "" {
|
||||
logger.Warningf("rule_eval:%d ruleConfig is blank", rule.Id)
|
||||
logger.Warningf("alert_eval_%d datasource_%d ruleConfig is blank", rule.Id, dsId)
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_RULE_CONFIG, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
fmt.Sprintf("%v", arw.Rule.Id),
|
||||
@@ -1452,15 +1454,15 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
"",
|
||||
).Set(0)
|
||||
|
||||
return points, recoverPoints, fmt.Errorf("rule_eval:%d ruleConfig is blank", rule.Id)
|
||||
return points, recoverPoints, fmt.Errorf("alert_eval_%d datasource_%d ruleConfig is blank", rule.Id, dsId)
|
||||
}
|
||||
|
||||
var ruleQuery models.RuleQuery
|
||||
err := json.Unmarshal([]byte(ruleConfig), &ruleQuery)
|
||||
if err != nil {
|
||||
logger.Warningf("rule_eval:%d promql parse error:%s", rule.Id, err.Error())
|
||||
logger.Warningf("alert_eval_%d datasource_%d promql parse error:%s", rule.Id, dsId, err.Error())
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_RULE_CONFIG, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
return points, recoverPoints, fmt.Errorf("rule_eval:%d promql parse error:%s", rule.Id, err.Error())
|
||||
return points, recoverPoints, fmt.Errorf("alert_eval_%d datasource_%d promql parse error:%s", rule.Id, dsId, err.Error())
|
||||
}
|
||||
|
||||
arw.Inhibit = ruleQuery.Inhibit
|
||||
@@ -1472,7 +1474,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
|
||||
plug, exists := dscache.DsCache.Get(rule.Cate, dsId)
|
||||
if !exists {
|
||||
logger.Warningf("rule_eval rid:%d datasource:%d not exists", rule.Id, dsId)
|
||||
logger.Warningf("alert_eval_%d datasource_%d not exists", rule.Id, dsId)
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_CLIENT, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
@@ -1481,14 +1483,24 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
fmt.Sprintf("%v", i),
|
||||
).Set(-2)
|
||||
|
||||
return points, recoverPoints, fmt.Errorf("rule_eval:%d datasource:%d not exists", rule.Id, dsId)
|
||||
return points, recoverPoints, fmt.Errorf("alert_eval_%d datasource_%d not exists", rule.Id, dsId)
|
||||
}
|
||||
|
||||
if err = ExecuteQueryTemplate(rule.Cate, query, nil); err != nil {
|
||||
logger.Warningf("alert_eval_%d datasource_%d execute query template error: %v", rule.Id, dsId, err)
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), EXEC_TEMPLATE, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
fmt.Sprintf("%v", arw.Rule.Id),
|
||||
fmt.Sprintf("%v", arw.Processor.DatasourceId()),
|
||||
fmt.Sprintf("%v", i),
|
||||
).Set(-3)
|
||||
}
|
||||
|
||||
ctx := context.WithValue(context.Background(), "delay", int64(rule.Delay))
|
||||
series, err := plug.QueryData(ctx, query)
|
||||
arw.Processor.Stats.CounterQueryDataTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId), fmt.Sprintf("%d", rule.Id)).Inc()
|
||||
if err != nil {
|
||||
logger.Warningf("rule_eval rid:%d query data error: %v", rule.Id, err)
|
||||
logger.Warningf("alert_eval_%d datasource_%d query data error: %v", rule.Id, dsId, err)
|
||||
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_CLIENT, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
fmt.Sprintf("%v", arw.Rule.Id),
|
||||
@@ -1496,7 +1508,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
fmt.Sprintf("%v", i),
|
||||
).Set(-1)
|
||||
|
||||
return points, recoverPoints, fmt.Errorf("rule_eval:%d query data error: %v", rule.Id, err)
|
||||
return points, recoverPoints, fmt.Errorf("alert_eval_%d datasource_%d query data error: %v", rule.Id, dsId, err)
|
||||
}
|
||||
|
||||
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
|
||||
@@ -1506,7 +1518,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
).Set(float64(len(series)))
|
||||
|
||||
// 此条日志很重要,是告警判断的现场值
|
||||
logger.Infof("rule_eval rid:%d req:%+v resp:%v", rule.Id, query, series)
|
||||
logger.Infof("alert_eval_%d datasource_%d req:%+v resp:%v", rule.Id, dsId, query, series)
|
||||
for i := 0; i < len(series); i++ {
|
||||
seriesHash := hash.GetHash(series[i].Metric, series[i].Ref)
|
||||
tagHash := hash.GetTagHash(series[i].Metric)
|
||||
@@ -1520,7 +1532,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
}
|
||||
ref, err := GetQueryRef(query)
|
||||
if err != nil {
|
||||
logger.Warningf("rule_eval rid:%d query:%+v get ref error:%s", rule.Id, query, err.Error())
|
||||
logger.Warningf("alert_eval_%d datasource_%d query:%+v get ref error:%s", rule.Id, dsId, query, err.Error())
|
||||
continue
|
||||
}
|
||||
seriesTagIndexes[ref] = seriesTagIndex
|
||||
@@ -1530,7 +1542,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
for _, query := range ruleQuery.Queries {
|
||||
ref, unit, err := GetQueryRefAndUnit(query)
|
||||
if err != nil {
|
||||
logger.Warningf("rule_eval rid:%d query:%+v get ref and unit error:%s", rule.Id, query, err.Error())
|
||||
logger.Warningf("alert_eval_%d datasource_%d query:%+v get ref and unit error:%s", rule.Id, dsId, query, err.Error())
|
||||
continue
|
||||
}
|
||||
unitMap[ref] = unit
|
||||
@@ -1553,12 +1565,12 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
for _, seriesHash := range seriesHash {
|
||||
series, exists := seriesStore[seriesHash]
|
||||
if !exists {
|
||||
logger.Warningf("rule_eval rid:%d series:%+v not found", rule.Id, series)
|
||||
logger.Warningf("alert_eval_%d datasource_%d series:%+v not found", rule.Id, dsId, series)
|
||||
continue
|
||||
}
|
||||
t, v, exists := series.Last()
|
||||
if !exists {
|
||||
logger.Warningf("rule_eval rid:%d series:%+v value not found", rule.Id, series)
|
||||
logger.Warningf("alert_eval_%d datasource_%d series:%+v value not found", rule.Id, dsId, series)
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -1589,12 +1601,12 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
ts = int64(t)
|
||||
sample = series
|
||||
value = v
|
||||
logger.Infof("rule_eval rid:%d origin series labels:%+v", rule.Id, series.Metric)
|
||||
logger.Infof("alert_eval_%d datasource_%d origin series labels:%+v", rule.Id, dsId, series.Metric)
|
||||
}
|
||||
|
||||
isTriggered := parser.CalcWithRid(trigger.Exp, m, rule.Id)
|
||||
// 此条日志很重要,是告警判断的现场值
|
||||
logger.Infof("rule_eval rid:%d trigger:%+v exp:%s res:%v m:%v", rule.Id, trigger, trigger.Exp, isTriggered, m)
|
||||
logger.Infof("alert_eval_%d datasource_%d trigger:%+v exp:%s res:%v m:%v", rule.Id, dsId, trigger, trigger.Exp, isTriggered, m)
|
||||
|
||||
var values string
|
||||
for k, v := range m {
|
||||
@@ -1602,11 +1614,15 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
continue
|
||||
}
|
||||
|
||||
switch v.(type) {
|
||||
case float64:
|
||||
values += fmt.Sprintf("%s:%.3f ", k, v)
|
||||
case string:
|
||||
values += fmt.Sprintf("%s:%s ", k, v)
|
||||
if u, exists := valuesUnitMap[k]; exists { // 配置了单位,优先用配置了单位的值
|
||||
values += fmt.Sprintf("%s:%s ", k, u.Text)
|
||||
} else {
|
||||
switch v.(type) {
|
||||
case float64:
|
||||
values += fmt.Sprintf("%s:%.3f ", k, v)
|
||||
case string:
|
||||
values += fmt.Sprintf("%s:%s ", k, v)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1663,7 +1679,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
|
||||
// 检查是否超过 resolve_after 时间
|
||||
if now-int64(lastTs) > int64(ruleQuery.NodataTrigger.ResolveAfter) {
|
||||
logger.Infof("rule_eval rid:%d series:%+v resolve after %d seconds now:%d lastTs:%d", rule.Id, lastSeries, ruleQuery.NodataTrigger.ResolveAfter, now, int64(lastTs))
|
||||
logger.Infof("alert_eval_%d datasource_%d series:%+v resolve after %d seconds now:%d lastTs:%d", rule.Id, dsId, lastSeries, ruleQuery.NodataTrigger.ResolveAfter, now, int64(lastTs))
|
||||
delete(arw.LastSeriesStore, hash)
|
||||
continue
|
||||
}
|
||||
@@ -1684,7 +1700,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
TriggerType: models.TriggerTypeNodata,
|
||||
}
|
||||
points = append(points, point)
|
||||
logger.Infof("rule_eval rid:%d nodata point:%+v", rule.Id, point)
|
||||
logger.Infof("alert_eval_%d datasource_%d nodata point:%+v", rule.Id, dsId, point)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1699,3 +1715,61 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
|
||||
|
||||
return points, recoverPoints, nil
|
||||
}
|
||||
|
||||
// ExecuteQueryTemplate 根据数据源类型对 Query 进行模板渲染处理
|
||||
// cate: 数据源类别,如 "mysql", "pgsql" 等
|
||||
// query: 查询对象,如果是数据库类型的数据源,会处理其中的 sql 字段
|
||||
// data: 模板数据对象,如果为 nil 则使用空结构体(不支持变量渲染),如果不为 nil 则使用传入的数据(支持变量渲染)
|
||||
func ExecuteQueryTemplate(cate string, query interface{}, data interface{}) error {
|
||||
// 检查 query 是否是 map,且包含 sql 字段
|
||||
queryMap, ok := query.(map[string]interface{})
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
sqlVal, exists := queryMap["sql"]
|
||||
if !exists {
|
||||
return nil
|
||||
}
|
||||
|
||||
sqlStr, ok := sqlVal.(string)
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
// 调用 ExecuteSqlTemplate 处理 sql 字段
|
||||
processedSQL, err := ExecuteSqlTemplate(sqlStr, data)
|
||||
if err != nil {
|
||||
return fmt.Errorf("execute sql template error: %w", err)
|
||||
}
|
||||
|
||||
// 更新 query 中的 sql 字段
|
||||
queryMap["sql"] = processedSQL
|
||||
return nil
|
||||
}
|
||||
|
||||
// ExecuteSqlTemplate 执行 query 中的 golang 模板语法函数
|
||||
// query: 要处理的 query 字符串
|
||||
// data: 模板数据对象,如果为 nil 则使用空结构体(不支持变量渲染),如果不为 nil 则使用传入的数据(支持变量渲染)
|
||||
func ExecuteSqlTemplate(query string, data interface{}) (string, error) {
|
||||
if !strings.Contains(query, "{{") || !strings.Contains(query, "}}") {
|
||||
return query, nil
|
||||
}
|
||||
|
||||
tmpl, err := template.New("query").Funcs(tplx.TemplateFuncMap).Parse(query)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("query tmpl parse error: %w", err)
|
||||
}
|
||||
|
||||
var buf strings.Builder
|
||||
templateData := data
|
||||
if templateData == nil {
|
||||
templateData = struct{}{}
|
||||
}
|
||||
|
||||
if err := tmpl.Execute(&buf, templateData); err != nil {
|
||||
return "", fmt.Errorf("query tmpl execute error: %w", err)
|
||||
}
|
||||
|
||||
return buf.String(), nil
|
||||
}
|
||||
|
||||
@@ -41,8 +41,28 @@ func IsMuted(rule *models.AlertRule, event *models.AlertCurEvent, targetCache *m
|
||||
|
||||
// TimeSpanMuteStrategy 根据规则配置的告警生效时间段过滤,如果产生的告警不在规则配置的告警生效时间段内,则不告警,即被mute
|
||||
// 时间范围,左闭右开,默认范围:00:00-24:00
|
||||
// 如果规则配置了时区,则在该时区下进行时间判断;如果时区为空,则使用系统时区
|
||||
func TimeSpanMuteStrategy(rule *models.AlertRule, event *models.AlertCurEvent) bool {
|
||||
tm := time.Unix(event.TriggerTime, 0)
|
||||
// 确定使用的时区
|
||||
var targetLoc *time.Location
|
||||
var err error
|
||||
|
||||
timezone := rule.TimeZone
|
||||
if timezone == "" {
|
||||
// 如果时区为空,使用系统时区(保持原有逻辑)
|
||||
targetLoc = time.Local
|
||||
} else {
|
||||
// 加载规则配置的时区
|
||||
targetLoc, err = time.LoadLocation(timezone)
|
||||
if err != nil {
|
||||
// 如果时区加载失败,记录错误并使用系统时区
|
||||
logger.Warningf("Failed to load timezone %s for rule %d, using system timezone: %v", timezone, rule.Id, err)
|
||||
targetLoc = time.Local
|
||||
}
|
||||
}
|
||||
|
||||
// 将触发时间转换到目标时区
|
||||
tm := time.Unix(event.TriggerTime, 0).In(targetLoc)
|
||||
triggerTime := tm.Format("15:04")
|
||||
triggerWeek := strconv.Itoa(int(tm.Weekday()))
|
||||
|
||||
@@ -102,7 +122,7 @@ func IdentNotExistsMuteStrategy(rule *models.AlertRule, event *models.AlertCurEv
|
||||
// 如果是target_up的告警,且ident已经不存在了,直接过滤掉
|
||||
// 这里的判断有点太粗暴了,但是目前没有更好的办法
|
||||
if !exists && strings.Contains(rule.PromQl, "target_up") {
|
||||
logger.Debugf("[%s] mute: rule_eval:%d cluster:%s ident:%s", "IdentNotExistsMuteStrategy", rule.Id, event.Cluster, ident)
|
||||
logger.Debugf("alert_eval_%d [IdentNotExistsMuteStrategy] mute: cluster:%s ident:%s", rule.Id, event.Cluster, ident)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
@@ -124,7 +144,7 @@ func BgNotMatchMuteStrategy(rule *models.AlertRule, event *models.AlertCurEvent,
|
||||
// 对于包含ident的告警事件,check一下ident所属bg和rule所属bg是否相同
|
||||
// 如果告警规则选择了只在本BG生效,那其他BG的机器就不能因此规则产生告警
|
||||
if exists && !target.MatchGroupId(rule.GroupId) {
|
||||
logger.Debugf("[%s] mute: rule_eval:%d cluster:%s", "BgNotMatchMuteStrategy", rule.Id, event.Cluster)
|
||||
logger.Debugf("alert_eval_%d [BgNotMatchMuteStrategy] mute: cluster:%s", rule.Id, event.Cluster)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
||||
380
alert/pipeline/engine/engine.go
Normal file
380
alert/pipeline/engine/engine.go
Normal file
@@ -0,0 +1,380 @@
|
||||
package engine
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/google/uuid"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
type WorkflowEngine struct {
|
||||
ctx *ctx.Context
|
||||
}
|
||||
|
||||
func NewWorkflowEngine(c *ctx.Context) *WorkflowEngine {
|
||||
return &WorkflowEngine{ctx: c}
|
||||
}
|
||||
|
||||
func (e *WorkflowEngine) Execute(pipeline *models.EventPipeline, event *models.AlertCurEvent, triggerCtx *models.WorkflowTriggerContext) (*models.AlertCurEvent, *models.WorkflowResult, error) {
|
||||
startTime := time.Now()
|
||||
|
||||
wfCtx := e.initWorkflowContext(pipeline, event, triggerCtx)
|
||||
|
||||
nodes := pipeline.GetWorkflowNodes()
|
||||
connections := pipeline.GetWorkflowConnections()
|
||||
|
||||
if len(nodes) == 0 {
|
||||
return event, &models.WorkflowResult{
|
||||
Event: event,
|
||||
Status: models.ExecutionStatusSuccess,
|
||||
Message: "no nodes to execute",
|
||||
}, nil
|
||||
}
|
||||
|
||||
nodeMap := make(map[string]*models.WorkflowNode)
|
||||
for i := range nodes {
|
||||
if nodes[i].RetryInterval == 0 {
|
||||
nodes[i].RetryInterval = 1
|
||||
}
|
||||
|
||||
if nodes[i].MaxRetries == 0 {
|
||||
nodes[i].MaxRetries = 1
|
||||
}
|
||||
|
||||
nodeMap[nodes[i].ID] = &nodes[i]
|
||||
}
|
||||
|
||||
result := e.executeDAG(nodeMap, connections, wfCtx)
|
||||
result.Event = wfCtx.Event
|
||||
|
||||
duration := time.Since(startTime).Milliseconds()
|
||||
|
||||
if triggerCtx != nil && triggerCtx.Mode != "" {
|
||||
e.saveExecutionRecord(pipeline, wfCtx, result, triggerCtx, startTime.Unix(), duration)
|
||||
}
|
||||
|
||||
return wfCtx.Event, result, nil
|
||||
}
|
||||
|
||||
func (e *WorkflowEngine) initWorkflowContext(pipeline *models.EventPipeline, event *models.AlertCurEvent, triggerCtx *models.WorkflowTriggerContext) *models.WorkflowContext {
|
||||
// 合并输入参数
|
||||
inputs := pipeline.GetInputsMap()
|
||||
if triggerCtx != nil && triggerCtx.InputsOverrides != nil {
|
||||
for k, v := range triggerCtx.InputsOverrides {
|
||||
inputs[k] = v
|
||||
}
|
||||
}
|
||||
|
||||
metadata := map[string]string{
|
||||
"start_time": fmt.Sprintf("%d", time.Now().Unix()),
|
||||
"pipeline_id": fmt.Sprintf("%d", pipeline.ID),
|
||||
}
|
||||
|
||||
// 是否启用流式输出
|
||||
stream := false
|
||||
if triggerCtx != nil {
|
||||
metadata["request_id"] = triggerCtx.RequestID
|
||||
metadata["trigger_mode"] = triggerCtx.Mode
|
||||
metadata["trigger_by"] = triggerCtx.TriggerBy
|
||||
stream = triggerCtx.Stream
|
||||
}
|
||||
|
||||
return &models.WorkflowContext{
|
||||
Event: event,
|
||||
Inputs: inputs,
|
||||
Vars: make(map[string]interface{}), // 初始化空的 Vars,供节点间传递数据
|
||||
Metadata: metadata,
|
||||
Stream: stream,
|
||||
}
|
||||
}
|
||||
|
||||
// executeDAG 使用 Kahn 算法执行 DAG
|
||||
func (e *WorkflowEngine) executeDAG(nodeMap map[string]*models.WorkflowNode, connections models.Connections, wfCtx *models.WorkflowContext) *models.WorkflowResult {
|
||||
result := &models.WorkflowResult{
|
||||
Status: models.ExecutionStatusSuccess,
|
||||
NodeResults: make([]*models.NodeExecutionResult, 0),
|
||||
Stream: wfCtx.Stream, // 从上下文继承流式输出设置
|
||||
}
|
||||
|
||||
// 计算每个节点的入度
|
||||
inDegree := make(map[string]int)
|
||||
for nodeID := range nodeMap {
|
||||
inDegree[nodeID] = 0
|
||||
}
|
||||
|
||||
// 遍历连接,计算入度
|
||||
for _, nodeConns := range connections {
|
||||
for _, targets := range nodeConns.Main {
|
||||
for _, target := range targets {
|
||||
inDegree[target.Node]++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 找到所有入度为 0 的节点(起始节点)
|
||||
queue := make([]string, 0)
|
||||
for nodeID, degree := range inDegree {
|
||||
if degree == 0 {
|
||||
queue = append(queue, nodeID)
|
||||
}
|
||||
}
|
||||
|
||||
// 如果没有起始节点,说明存在循环依赖
|
||||
if len(queue) == 0 && len(nodeMap) > 0 {
|
||||
result.Status = models.ExecutionStatusFailed
|
||||
result.Message = "workflow has circular dependency"
|
||||
return result
|
||||
}
|
||||
|
||||
// 记录已执行的节点
|
||||
executed := make(map[string]bool)
|
||||
// 记录节点的分支选择结果
|
||||
branchResults := make(map[string]*int)
|
||||
|
||||
for len(queue) > 0 {
|
||||
// 取出队首节点
|
||||
nodeID := queue[0]
|
||||
queue = queue[1:]
|
||||
|
||||
// 检查是否已执行
|
||||
if executed[nodeID] {
|
||||
continue
|
||||
}
|
||||
|
||||
node, exists := nodeMap[nodeID]
|
||||
if !exists {
|
||||
continue
|
||||
}
|
||||
|
||||
// 执行节点
|
||||
nodeResult, nodeOutput := e.executeNode(node, wfCtx)
|
||||
result.NodeResults = append(result.NodeResults, nodeResult)
|
||||
|
||||
if nodeOutput != nil && nodeOutput.Stream && nodeOutput.StreamChan != nil {
|
||||
// 流式输出节点通常是最后一个节点
|
||||
// 直接传递 StreamChan 给 WorkflowResult,不阻塞等待
|
||||
result.Stream = true
|
||||
result.StreamChan = nodeOutput.StreamChan
|
||||
result.Event = wfCtx.Event
|
||||
result.Status = "streaming"
|
||||
result.Message = fmt.Sprintf("streaming output from node: %s", node.Name)
|
||||
|
||||
// 更新节点状态为 streaming
|
||||
nodeResult.Status = "streaming"
|
||||
nodeResult.Message = "streaming in progress"
|
||||
|
||||
// 立即返回,让 API 层处理流式响应
|
||||
return result
|
||||
}
|
||||
executed[nodeID] = true
|
||||
|
||||
// 保存分支结果
|
||||
if nodeResult.BranchIndex != nil {
|
||||
branchResults[nodeID] = nodeResult.BranchIndex
|
||||
}
|
||||
|
||||
// 检查执行状态
|
||||
if nodeResult.Status == "failed" {
|
||||
if !node.ContinueOnFail {
|
||||
result.Status = models.ExecutionStatusFailed
|
||||
result.ErrorNode = nodeID
|
||||
result.Message = fmt.Sprintf("node %s failed: %s", node.Name, nodeResult.Error)
|
||||
}
|
||||
}
|
||||
|
||||
// 检查是否终止
|
||||
if nodeResult.Status == "terminated" {
|
||||
result.Message = fmt.Sprintf("workflow terminated at node %s", node.Name)
|
||||
return result
|
||||
}
|
||||
|
||||
// 更新后继节点的入度
|
||||
if nodeConns, ok := connections[nodeID]; ok {
|
||||
for outputIndex, targets := range nodeConns.Main {
|
||||
// 检查是否应该走这个分支
|
||||
if !e.shouldFollowBranch(nodeID, outputIndex, branchResults) {
|
||||
continue
|
||||
}
|
||||
|
||||
for _, target := range targets {
|
||||
inDegree[target.Node]--
|
||||
if inDegree[target.Node] == 0 {
|
||||
queue = append(queue, target.Node)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// executeNode 执行单个节点
|
||||
// 返回:节点执行结果、节点输出(用于流式输出检测)
|
||||
func (e *WorkflowEngine) executeNode(node *models.WorkflowNode, wfCtx *models.WorkflowContext) (*models.NodeExecutionResult, *models.NodeOutput) {
|
||||
startTime := time.Now()
|
||||
nodeResult := &models.NodeExecutionResult{
|
||||
NodeID: node.ID,
|
||||
NodeName: node.Name,
|
||||
NodeType: node.Type,
|
||||
StartedAt: startTime.Unix(),
|
||||
}
|
||||
|
||||
var nodeOutput *models.NodeOutput
|
||||
|
||||
// 跳过禁用的节点
|
||||
if node.Disabled {
|
||||
nodeResult.Status = "skipped"
|
||||
nodeResult.Message = "node is disabled"
|
||||
nodeResult.FinishedAt = time.Now().Unix()
|
||||
nodeResult.DurationMs = time.Since(startTime).Milliseconds()
|
||||
return nodeResult, nil
|
||||
}
|
||||
|
||||
// 获取处理器
|
||||
processor, err := models.GetProcessorByType(node.Type, node.Config)
|
||||
if err != nil {
|
||||
nodeResult.Status = "failed"
|
||||
nodeResult.Error = fmt.Sprintf("failed to get processor: %v", err)
|
||||
nodeResult.FinishedAt = time.Now().Unix()
|
||||
nodeResult.DurationMs = time.Since(startTime).Milliseconds()
|
||||
return nodeResult, nil
|
||||
}
|
||||
|
||||
// 执行处理器(带重试)
|
||||
var retries int
|
||||
maxRetries := node.MaxRetries
|
||||
if !node.RetryOnFail {
|
||||
maxRetries = 0
|
||||
}
|
||||
|
||||
for retries <= maxRetries {
|
||||
// 检查是否为分支处理器
|
||||
if branchProcessor, ok := processor.(models.BranchProcessor); ok {
|
||||
output, err := branchProcessor.ProcessWithBranch(e.ctx, wfCtx)
|
||||
if err != nil {
|
||||
if retries < maxRetries {
|
||||
retries++
|
||||
time.Sleep(time.Duration(node.RetryInterval) * time.Second)
|
||||
continue
|
||||
}
|
||||
nodeResult.Status = "failed"
|
||||
nodeResult.Error = err.Error()
|
||||
} else {
|
||||
nodeResult.Status = "success"
|
||||
if output != nil {
|
||||
nodeOutput = output
|
||||
if output.WfCtx != nil {
|
||||
wfCtx = output.WfCtx
|
||||
}
|
||||
nodeResult.Message = output.Message
|
||||
nodeResult.BranchIndex = output.BranchIndex
|
||||
if output.Terminate {
|
||||
nodeResult.Status = "terminated"
|
||||
}
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
// 普通处理器
|
||||
newWfCtx, msg, err := processor.Process(e.ctx, wfCtx)
|
||||
if err != nil {
|
||||
if retries < maxRetries {
|
||||
retries++
|
||||
time.Sleep(time.Duration(node.RetryInterval) * time.Second)
|
||||
continue
|
||||
}
|
||||
nodeResult.Status = "failed"
|
||||
nodeResult.Error = err.Error()
|
||||
} else {
|
||||
nodeResult.Status = "success"
|
||||
nodeResult.Message = msg
|
||||
if newWfCtx != nil {
|
||||
wfCtx = newWfCtx
|
||||
|
||||
// 检测流式输出标记
|
||||
if newWfCtx.Stream && newWfCtx.StreamChan != nil {
|
||||
nodeOutput = &models.NodeOutput{
|
||||
WfCtx: newWfCtx,
|
||||
Message: msg,
|
||||
Stream: true,
|
||||
StreamChan: newWfCtx.StreamChan,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 如果事件被 drop(返回 nil 或 Event 为 nil),标记为终止
|
||||
if newWfCtx == nil || newWfCtx.Event == nil {
|
||||
nodeResult.Status = "terminated"
|
||||
nodeResult.Message = msg
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
nodeResult.FinishedAt = time.Now().Unix()
|
||||
nodeResult.DurationMs = time.Since(startTime).Milliseconds()
|
||||
|
||||
logger.Infof("workflow: executed node %s (type=%s) status=%s msg=%s duration=%dms",
|
||||
node.Name, node.Type, nodeResult.Status, nodeResult.Message, nodeResult.DurationMs)
|
||||
|
||||
return nodeResult, nodeOutput
|
||||
}
|
||||
|
||||
// shouldFollowBranch 判断是否应该走某个分支
|
||||
func (e *WorkflowEngine) shouldFollowBranch(nodeID string, outputIndex int, branchResults map[string]*int) bool {
|
||||
branchIndex, hasBranch := branchResults[nodeID]
|
||||
if !hasBranch {
|
||||
// 没有分支结果,说明不是分支节点,只走第一个输出
|
||||
return outputIndex == 0
|
||||
}
|
||||
|
||||
if branchIndex == nil {
|
||||
// branchIndex 为 nil,走默认分支(通常是最后一个)
|
||||
return true
|
||||
}
|
||||
|
||||
// 只走选中的分支
|
||||
return outputIndex == *branchIndex
|
||||
}
|
||||
|
||||
func (e *WorkflowEngine) saveExecutionRecord(pipeline *models.EventPipeline, wfCtx *models.WorkflowContext, result *models.WorkflowResult, triggerCtx *models.WorkflowTriggerContext, startTime int64, duration int64) {
|
||||
executionID := triggerCtx.RequestID
|
||||
if executionID == "" {
|
||||
executionID = uuid.New().String()
|
||||
}
|
||||
|
||||
execution := &models.EventPipelineExecution{
|
||||
ID: executionID,
|
||||
PipelineID: pipeline.ID,
|
||||
PipelineName: pipeline.Name,
|
||||
Mode: triggerCtx.Mode,
|
||||
Status: result.Status,
|
||||
ErrorMessage: result.Message,
|
||||
ErrorNode: result.ErrorNode,
|
||||
CreatedAt: startTime,
|
||||
FinishedAt: time.Now().Unix(),
|
||||
DurationMs: duration,
|
||||
TriggerBy: triggerCtx.TriggerBy,
|
||||
}
|
||||
|
||||
if wfCtx.Event != nil {
|
||||
execution.EventID = wfCtx.Event.Id
|
||||
}
|
||||
|
||||
if err := execution.SetNodeResults(result.NodeResults); err != nil {
|
||||
logger.Errorf("workflow: failed to set node results: pipeline_id=%d, error=%v", pipeline.ID, err)
|
||||
}
|
||||
|
||||
if err := execution.SetInputsSnapshot(wfCtx.Inputs); err != nil {
|
||||
logger.Errorf("workflow: failed to set inputs snapshot: pipeline_id=%d, error=%v", pipeline.ID, err)
|
||||
}
|
||||
|
||||
if err := models.CreateEventPipelineExecution(e.ctx, execution); err != nil {
|
||||
logger.Errorf("workflow: failed to save execution record: pipeline_id=%d, error=%v", pipeline.ID, err)
|
||||
}
|
||||
}
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/callback"
|
||||
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/eventdrop"
|
||||
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/eventupdate"
|
||||
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/logic"
|
||||
_ "github.com/ccfos/nightingale/v6/alert/pipeline/processor/relabel"
|
||||
)
|
||||
|
||||
|
||||
@@ -55,23 +55,24 @@ func (c *AISummaryConfig) Init(settings interface{}) (models.Processor, error) {
|
||||
return result, err
|
||||
}
|
||||
|
||||
func (c *AISummaryConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) (*models.AlertCurEvent, string, error) {
|
||||
func (c *AISummaryConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
|
||||
event := wfCtx.Event
|
||||
if c.Client == nil {
|
||||
if err := c.initHTTPClient(); err != nil {
|
||||
return event, "", fmt.Errorf("failed to initialize HTTP client: %v processor: %v", err, c)
|
||||
return wfCtx, "", fmt.Errorf("failed to initialize HTTP client: %v processor: %v", err, c)
|
||||
}
|
||||
}
|
||||
|
||||
// 准备告警事件信息
|
||||
eventInfo, err := c.prepareEventInfo(event)
|
||||
eventInfo, err := c.prepareEventInfo(wfCtx)
|
||||
if err != nil {
|
||||
return event, "", fmt.Errorf("failed to prepare event info: %v processor: %v", err, c)
|
||||
return wfCtx, "", fmt.Errorf("failed to prepare event info: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
// 调用AI模型生成总结
|
||||
summary, err := c.generateAISummary(eventInfo)
|
||||
if err != nil {
|
||||
return event, "", fmt.Errorf("failed to generate AI summary: %v processor: %v", err, c)
|
||||
return wfCtx, "", fmt.Errorf("failed to generate AI summary: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
// 将总结添加到annotations字段
|
||||
@@ -83,11 +84,11 @@ func (c *AISummaryConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent)
|
||||
// 更新Annotations字段
|
||||
b, err := json.Marshal(event.AnnotationsJSON)
|
||||
if err != nil {
|
||||
return event, "", fmt.Errorf("failed to marshal annotations: %v processor: %v", err, c)
|
||||
return wfCtx, "", fmt.Errorf("failed to marshal annotations: %v processor: %v", err, c)
|
||||
}
|
||||
event.Annotations = string(b)
|
||||
|
||||
return event, "", nil
|
||||
return wfCtx, "", nil
|
||||
}
|
||||
|
||||
func (c *AISummaryConfig) initHTTPClient() error {
|
||||
@@ -110,9 +111,10 @@ func (c *AISummaryConfig) initHTTPClient() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *AISummaryConfig) prepareEventInfo(event *models.AlertCurEvent) (string, error) {
|
||||
func (c *AISummaryConfig) prepareEventInfo(wfCtx *models.WorkflowContext) (string, error) {
|
||||
var defs = []string{
|
||||
"{{$event := .}}",
|
||||
"{{$event := .Event}}",
|
||||
"{{$inputs := .Inputs}}",
|
||||
}
|
||||
|
||||
text := strings.Join(append(defs, c.PromptTemplate), "")
|
||||
@@ -122,7 +124,7 @@ func (c *AISummaryConfig) prepareEventInfo(event *models.AlertCurEvent) (string,
|
||||
}
|
||||
|
||||
var body bytes.Buffer
|
||||
err = t.Execute(&body, event)
|
||||
err = t.Execute(&body, wfCtx)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to execute prompt template: %v", err)
|
||||
}
|
||||
|
||||
@@ -42,8 +42,14 @@ func TestAISummaryConfig_Process(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
// 创建 WorkflowContext
|
||||
wfCtx := &models.WorkflowContext{
|
||||
Event: event,
|
||||
Inputs: map[string]string{},
|
||||
}
|
||||
|
||||
// 测试模板处理
|
||||
eventInfo, err := config.prepareEventInfo(event)
|
||||
eventInfo, err := config.prepareEventInfo(wfCtx)
|
||||
assert.NoError(t, err)
|
||||
assert.Contains(t, eventInfo, "Test Rule")
|
||||
assert.Contains(t, eventInfo, "1")
|
||||
@@ -54,18 +60,18 @@ func TestAISummaryConfig_Process(t *testing.T) {
|
||||
assert.NotNil(t, processor)
|
||||
|
||||
// 测试处理函数
|
||||
result, _, err := processor.Process(&ctx.Context{}, event)
|
||||
result, _, err := processor.Process(&ctx.Context{}, wfCtx)
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, result)
|
||||
assert.NotEmpty(t, result.AnnotationsJSON["ai_summary"])
|
||||
assert.NotEmpty(t, result.Event.AnnotationsJSON["ai_summary"])
|
||||
|
||||
// 展示处理结果
|
||||
t.Log("\n=== 处理结果 ===")
|
||||
t.Logf("告警规则: %s", result.RuleName)
|
||||
t.Logf("严重程度: %d", result.Severity)
|
||||
t.Logf("标签: %v", result.TagsMap)
|
||||
t.Logf("原始注释: %v", result.AnnotationsJSON["description"])
|
||||
t.Logf("AI总结: %s", result.AnnotationsJSON["ai_summary"])
|
||||
t.Logf("告警规则: %s", result.Event.RuleName)
|
||||
t.Logf("严重程度: %d", result.Event.Severity)
|
||||
t.Logf("标签: %v", result.Event.TagsMap)
|
||||
t.Logf("原始注释: %v", result.Event.AnnotationsJSON["description"])
|
||||
t.Logf("AI总结: %s", result.Event.AnnotationsJSON["ai_summary"])
|
||||
}
|
||||
|
||||
func TestConvertCustomParam(t *testing.T) {
|
||||
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/common"
|
||||
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/utils"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
@@ -43,7 +44,8 @@ func (c *CallbackConfig) Init(settings interface{}) (models.Processor, error) {
|
||||
return result, err
|
||||
}
|
||||
|
||||
func (c *CallbackConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) (*models.AlertCurEvent, string, error) {
|
||||
func (c *CallbackConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
|
||||
event := wfCtx.Event
|
||||
if c.Client == nil {
|
||||
transport := &http.Transport{
|
||||
TLSClientConfig: &tls.Config{InsecureSkipVerify: c.SkipSSLVerify},
|
||||
@@ -52,7 +54,7 @@ func (c *CallbackConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent)
|
||||
if c.Proxy != "" {
|
||||
proxyURL, err := url.Parse(c.Proxy)
|
||||
if err != nil {
|
||||
return event, "", fmt.Errorf("failed to parse proxy url: %v processor: %v", err, c)
|
||||
return wfCtx, "", fmt.Errorf("failed to parse proxy url: %v processor: %v", err, c)
|
||||
} else {
|
||||
transport.Proxy = http.ProxyURL(proxyURL)
|
||||
}
|
||||
@@ -70,14 +72,19 @@ func (c *CallbackConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent)
|
||||
headers[k] = v
|
||||
}
|
||||
|
||||
body, err := json.Marshal(event)
|
||||
url, err := utils.TplRender(wfCtx, c.URL)
|
||||
if err != nil {
|
||||
return event, "", fmt.Errorf("failed to marshal event: %v processor: %v", err, c)
|
||||
return wfCtx, "", fmt.Errorf("failed to render url template: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("POST", c.URL, strings.NewReader(string(body)))
|
||||
body, err := json.Marshal(event)
|
||||
if err != nil {
|
||||
return event, "", fmt.Errorf("failed to create request: %v processor: %v", err, c)
|
||||
return wfCtx, "", fmt.Errorf("failed to marshal event: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("POST", url, strings.NewReader(string(body)))
|
||||
if err != nil {
|
||||
return wfCtx, "", fmt.Errorf("failed to create request: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
for k, v := range headers {
|
||||
@@ -90,14 +97,14 @@ func (c *CallbackConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent)
|
||||
|
||||
resp, err := c.Client.Do(req)
|
||||
if err != nil {
|
||||
return event, "", fmt.Errorf("failed to send request: %v processor: %v", err, c)
|
||||
return wfCtx, "", fmt.Errorf("failed to send request: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
b, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return event, "", fmt.Errorf("failed to read response body: %v processor: %v", err, c)
|
||||
return wfCtx, "", fmt.Errorf("failed to read response body: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
logger.Debugf("callback processor response body: %s", string(b))
|
||||
return event, "callback success", nil
|
||||
return wfCtx, "callback success", nil
|
||||
}
|
||||
|
||||
@@ -26,35 +26,38 @@ func (c *EventDropConfig) Init(settings interface{}) (models.Processor, error) {
|
||||
return result, err
|
||||
}
|
||||
|
||||
func (c *EventDropConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) (*models.AlertCurEvent, string, error) {
|
||||
func (c *EventDropConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
|
||||
// 使用背景是可以根据此处理器,实现对事件进行更加灵活的过滤的逻辑
|
||||
// 在标签过滤和属性过滤都不满足需求时可以使用
|
||||
// 如果模板执行结果为 true,则删除该事件
|
||||
event := wfCtx.Event
|
||||
|
||||
var defs = []string{
|
||||
"{{ $event := . }}",
|
||||
"{{ $labels := .TagsMap }}",
|
||||
"{{ $value := .TriggerValue }}",
|
||||
"{{ $event := .Event }}",
|
||||
"{{ $labels := .Event.TagsMap }}",
|
||||
"{{ $value := .Event.TriggerValue }}",
|
||||
"{{ $inputs := .Inputs }}",
|
||||
}
|
||||
|
||||
text := strings.Join(append(defs, c.Content), "")
|
||||
|
||||
tpl, err := texttemplate.New("eventdrop").Funcs(tplx.TemplateFuncMap).Parse(text)
|
||||
if err != nil {
|
||||
return event, "", fmt.Errorf("processor failed to parse template: %v processor: %v", err, c)
|
||||
return wfCtx, "", fmt.Errorf("processor failed to parse template: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
var body bytes.Buffer
|
||||
if err = tpl.Execute(&body, event); err != nil {
|
||||
return event, "", fmt.Errorf("processor failed to execute template: %v processor: %v", err, c)
|
||||
if err = tpl.Execute(&body, wfCtx); err != nil {
|
||||
return wfCtx, "", fmt.Errorf("processor failed to execute template: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
result := strings.TrimSpace(body.String())
|
||||
logger.Infof("processor eventdrop result: %v", result)
|
||||
if result == "true" {
|
||||
logger.Infof("processor eventdrop drop event: %v", event)
|
||||
return nil, "drop event success", nil
|
||||
wfCtx.Event = nil
|
||||
logger.Infof("processor eventdrop drop event: %s", event.Hash)
|
||||
return wfCtx, "drop event success", nil
|
||||
}
|
||||
|
||||
return event, "drop event failed", nil
|
||||
return wfCtx, "drop event failed", nil
|
||||
}
|
||||
|
||||
@@ -31,7 +31,8 @@ func (c *EventUpdateConfig) Init(settings interface{}) (models.Processor, error)
|
||||
return result, err
|
||||
}
|
||||
|
||||
func (c *EventUpdateConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) (*models.AlertCurEvent, string, error) {
|
||||
func (c *EventUpdateConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
|
||||
event := wfCtx.Event
|
||||
if c.Client == nil {
|
||||
transport := &http.Transport{
|
||||
TLSClientConfig: &tls.Config{InsecureSkipVerify: c.SkipSSLVerify},
|
||||
@@ -40,7 +41,7 @@ func (c *EventUpdateConfig) Process(ctx *ctx.Context, event *models.AlertCurEven
|
||||
if c.Proxy != "" {
|
||||
proxyURL, err := url.Parse(c.Proxy)
|
||||
if err != nil {
|
||||
return event, "", fmt.Errorf("failed to parse proxy url: %v processor: %v", err, c)
|
||||
return wfCtx, "", fmt.Errorf("failed to parse proxy url: %v processor: %v", err, c)
|
||||
} else {
|
||||
transport.Proxy = http.ProxyURL(proxyURL)
|
||||
}
|
||||
@@ -60,12 +61,12 @@ func (c *EventUpdateConfig) Process(ctx *ctx.Context, event *models.AlertCurEven
|
||||
|
||||
body, err := json.Marshal(event)
|
||||
if err != nil {
|
||||
return event, "", fmt.Errorf("failed to marshal event: %v processor: %v", err, c)
|
||||
return wfCtx, "", fmt.Errorf("failed to marshal event: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("POST", c.URL, strings.NewReader(string(body)))
|
||||
if err != nil {
|
||||
return event, "", fmt.Errorf("failed to create request: %v processor: %v", err, c)
|
||||
return wfCtx, "", fmt.Errorf("failed to create request: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
for k, v := range headers {
|
||||
@@ -78,7 +79,7 @@ func (c *EventUpdateConfig) Process(ctx *ctx.Context, event *models.AlertCurEven
|
||||
|
||||
resp, err := c.Client.Do(req)
|
||||
if err != nil {
|
||||
return event, "", fmt.Errorf("failed to send request: %v processor: %v", err, c)
|
||||
return wfCtx, "", fmt.Errorf("failed to send request: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
b, err := io.ReadAll(resp.Body)
|
||||
@@ -89,8 +90,8 @@ func (c *EventUpdateConfig) Process(ctx *ctx.Context, event *models.AlertCurEven
|
||||
|
||||
err = json.Unmarshal(b, &event)
|
||||
if err != nil {
|
||||
return event, "", fmt.Errorf("failed to unmarshal response body: %v processor: %v", err, c)
|
||||
return wfCtx, "", fmt.Errorf("failed to unmarshal response body: %v processor: %v", err, c)
|
||||
}
|
||||
|
||||
return event, "", nil
|
||||
return wfCtx, "", nil
|
||||
}
|
||||
|
||||
197
alert/pipeline/processor/logic/if.go
Normal file
197
alert/pipeline/processor/logic/if.go
Normal file
@@ -0,0 +1,197 @@
|
||||
package logic
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"strings"
|
||||
"text/template"
|
||||
|
||||
alertCommon "github.com/ccfos/nightingale/v6/alert/common"
|
||||
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/common"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/tplx"
|
||||
)
|
||||
|
||||
// 判断模式常量
|
||||
const (
|
||||
ConditionModeExpression = "expression" // 表达式模式(默认)
|
||||
ConditionModeTags = "tags" // 标签/属性模式
|
||||
)
|
||||
|
||||
// IfConfig If 条件处理器配置
|
||||
type IfConfig struct {
|
||||
// 判断模式:expression(表达式)或 tags(标签/属性)
|
||||
Mode string `json:"mode,omitempty"`
|
||||
|
||||
// 表达式模式配置
|
||||
// 条件表达式(支持 Go 模板语法)
|
||||
// 例如:{{ if eq .Severity 1 }}true{{ end }}
|
||||
Condition string `json:"condition,omitempty"`
|
||||
|
||||
// 标签/属性模式配置
|
||||
LabelKeys []models.TagFilter `json:"label_keys,omitempty"` // 适用标签
|
||||
Attributes []models.TagFilter `json:"attributes,omitempty"` // 适用属性
|
||||
|
||||
// 内部使用,解析后的过滤器
|
||||
parsedLabelKeys []models.TagFilter `json:"-"`
|
||||
parsedAttributes []models.TagFilter `json:"-"`
|
||||
}
|
||||
|
||||
func init() {
|
||||
models.RegisterProcessor("logic.if", &IfConfig{})
|
||||
}
|
||||
|
||||
func (c *IfConfig) Init(settings interface{}) (models.Processor, error) {
|
||||
result, err := common.InitProcessor[*IfConfig](settings)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// 解析标签过滤器
|
||||
if len(result.LabelKeys) > 0 {
|
||||
// Deep copy to avoid concurrent map writes on cached objects
|
||||
labelKeysCopy := make([]models.TagFilter, len(result.LabelKeys))
|
||||
copy(labelKeysCopy, result.LabelKeys)
|
||||
for i := range labelKeysCopy {
|
||||
if labelKeysCopy[i].Func == "" {
|
||||
labelKeysCopy[i].Func = labelKeysCopy[i].Op
|
||||
}
|
||||
}
|
||||
result.parsedLabelKeys, err = models.ParseTagFilter(labelKeysCopy)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse label_keys: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// 解析属性过滤器
|
||||
if len(result.Attributes) > 0 {
|
||||
// Deep copy to avoid concurrent map writes on cached objects
|
||||
attributesCopy := make([]models.TagFilter, len(result.Attributes))
|
||||
copy(attributesCopy, result.Attributes)
|
||||
for i := range attributesCopy {
|
||||
if attributesCopy[i].Func == "" {
|
||||
attributesCopy[i].Func = attributesCopy[i].Op
|
||||
}
|
||||
}
|
||||
result.parsedAttributes, err = models.ParseTagFilter(attributesCopy)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse attributes: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// Process 实现 Processor 接口(兼容旧模式)
|
||||
func (c *IfConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
|
||||
result, err := c.evaluateCondition(wfCtx)
|
||||
if err != nil {
|
||||
return wfCtx, "", fmt.Errorf("if processor: failed to evaluate condition: %v", err)
|
||||
}
|
||||
|
||||
if result {
|
||||
return wfCtx, "condition matched (true branch)", nil
|
||||
}
|
||||
return wfCtx, "condition not matched (false branch)", nil
|
||||
}
|
||||
|
||||
// ProcessWithBranch 实现 BranchProcessor 接口
|
||||
func (c *IfConfig) ProcessWithBranch(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.NodeOutput, error) {
|
||||
result, err := c.evaluateCondition(wfCtx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("if processor: failed to evaluate condition: %v", err)
|
||||
}
|
||||
|
||||
output := &models.NodeOutput{
|
||||
WfCtx: wfCtx,
|
||||
}
|
||||
|
||||
if result {
|
||||
// 条件为 true,走输出 0(true 分支)
|
||||
branchIndex := 0
|
||||
output.BranchIndex = &branchIndex
|
||||
output.Message = "condition matched (true branch)"
|
||||
} else {
|
||||
// 条件为 false,走输出 1(false 分支)
|
||||
branchIndex := 1
|
||||
output.BranchIndex = &branchIndex
|
||||
output.Message = "condition not matched (false branch)"
|
||||
}
|
||||
|
||||
return output, nil
|
||||
}
|
||||
|
||||
// evaluateCondition 评估条件
|
||||
func (c *IfConfig) evaluateCondition(wfCtx *models.WorkflowContext) (bool, error) {
|
||||
mode := c.Mode
|
||||
if mode == "" {
|
||||
mode = ConditionModeExpression // 默认表达式模式
|
||||
}
|
||||
|
||||
switch mode {
|
||||
case ConditionModeTags:
|
||||
return c.evaluateTagsCondition(wfCtx.Event)
|
||||
default:
|
||||
return c.evaluateExpressionCondition(wfCtx)
|
||||
}
|
||||
}
|
||||
|
||||
// evaluateExpressionCondition 评估表达式条件
|
||||
func (c *IfConfig) evaluateExpressionCondition(wfCtx *models.WorkflowContext) (bool, error) {
|
||||
if c.Condition == "" {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// 构建模板数据
|
||||
var defs = []string{
|
||||
"{{ $event := .Event }}",
|
||||
"{{ $labels := .Event.TagsMap }}",
|
||||
"{{ $value := .Event.TriggerValue }}",
|
||||
"{{ $inputs := .Inputs }}",
|
||||
}
|
||||
|
||||
text := strings.Join(append(defs, c.Condition), "")
|
||||
|
||||
tpl, err := template.New("if_condition").Funcs(tplx.TemplateFuncMap).Parse(text)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
if err = tpl.Execute(&buf, wfCtx); err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
result := strings.TrimSpace(strings.ToLower(buf.String()))
|
||||
return result == "true" || result == "1", nil
|
||||
}
|
||||
|
||||
// evaluateTagsCondition 评估标签/属性条件
|
||||
func (c *IfConfig) evaluateTagsCondition(event *models.AlertCurEvent) (bool, error) {
|
||||
// 如果没有配置任何过滤条件,默认返回 true
|
||||
if len(c.parsedLabelKeys) == 0 && len(c.parsedAttributes) == 0 {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// 匹配标签 (TagsMap)
|
||||
if len(c.parsedLabelKeys) > 0 {
|
||||
tagsMap := event.TagsMap
|
||||
if tagsMap == nil {
|
||||
tagsMap = make(map[string]string)
|
||||
}
|
||||
if !alertCommon.MatchTags(tagsMap, c.parsedLabelKeys) {
|
||||
return false, nil
|
||||
}
|
||||
}
|
||||
|
||||
// 匹配属性 (JsonTagsAndValue - 所有 JSON 字段)
|
||||
if len(c.parsedAttributes) > 0 {
|
||||
attributesMap := event.JsonTagsAndValue()
|
||||
if !alertCommon.MatchTags(attributesMap, c.parsedAttributes) {
|
||||
return false, nil
|
||||
}
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}
|
||||
224
alert/pipeline/processor/logic/switch.go
Normal file
224
alert/pipeline/processor/logic/switch.go
Normal file
@@ -0,0 +1,224 @@
|
||||
package logic
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"strings"
|
||||
"text/template"
|
||||
|
||||
alertCommon "github.com/ccfos/nightingale/v6/alert/common"
|
||||
"github.com/ccfos/nightingale/v6/alert/pipeline/processor/common"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/tplx"
|
||||
)
|
||||
|
||||
// SwitchCase Switch 分支定义
|
||||
type SwitchCase struct {
|
||||
// 判断模式:expression(表达式)或 tags(标签/属性)
|
||||
Mode string `json:"mode,omitempty"`
|
||||
|
||||
// 表达式模式配置
|
||||
// 条件表达式(支持 Go 模板语法)
|
||||
Condition string `json:"condition,omitempty"`
|
||||
|
||||
// 标签/属性模式配置
|
||||
LabelKeys []models.TagFilter `json:"label_keys,omitempty"` // 适用标签
|
||||
Attributes []models.TagFilter `json:"attributes,omitempty"` // 适用属性
|
||||
|
||||
// 分支名称(可选,用于日志)
|
||||
Name string `json:"name,omitempty"`
|
||||
|
||||
// 内部使用,解析后的过滤器
|
||||
parsedLabelKeys []models.TagFilter `json:"-"`
|
||||
parsedAttributes []models.TagFilter `json:"-"`
|
||||
}
|
||||
|
||||
// SwitchConfig Switch 多分支处理器配置
|
||||
type SwitchConfig struct {
|
||||
// 分支条件列表
|
||||
// 按顺序匹配,第一个为 true 的分支将被选中
|
||||
Cases []SwitchCase `json:"cases"`
|
||||
// 是否允许多个分支同时匹配(默认 false,只走第一个匹配的)
|
||||
AllowMultiple bool `json:"allow_multiple,omitempty"`
|
||||
}
|
||||
|
||||
func init() {
|
||||
models.RegisterProcessor("logic.switch", &SwitchConfig{})
|
||||
}
|
||||
|
||||
func (c *SwitchConfig) Init(settings interface{}) (models.Processor, error) {
|
||||
result, err := common.InitProcessor[*SwitchConfig](settings)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// 解析每个 case 的标签和属性过滤器
|
||||
for i := range result.Cases {
|
||||
if len(result.Cases[i].LabelKeys) > 0 {
|
||||
// Deep copy to avoid concurrent map writes on cached objects
|
||||
labelKeysCopy := make([]models.TagFilter, len(result.Cases[i].LabelKeys))
|
||||
copy(labelKeysCopy, result.Cases[i].LabelKeys)
|
||||
for j := range labelKeysCopy {
|
||||
if labelKeysCopy[j].Func == "" {
|
||||
labelKeysCopy[j].Func = labelKeysCopy[j].Op
|
||||
}
|
||||
}
|
||||
result.Cases[i].parsedLabelKeys, err = models.ParseTagFilter(labelKeysCopy)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse label_keys for case[%d]: %v", i, err)
|
||||
}
|
||||
}
|
||||
|
||||
if len(result.Cases[i].Attributes) > 0 {
|
||||
// Deep copy to avoid concurrent map writes on cached objects
|
||||
attributesCopy := make([]models.TagFilter, len(result.Cases[i].Attributes))
|
||||
copy(attributesCopy, result.Cases[i].Attributes)
|
||||
for j := range attributesCopy {
|
||||
if attributesCopy[j].Func == "" {
|
||||
attributesCopy[j].Func = attributesCopy[j].Op
|
||||
}
|
||||
}
|
||||
result.Cases[i].parsedAttributes, err = models.ParseTagFilter(attributesCopy)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse attributes for case[%d]: %v", i, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// Process 实现 Processor 接口(兼容旧模式)
|
||||
func (c *SwitchConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
|
||||
index, caseName, err := c.evaluateCases(wfCtx)
|
||||
if err != nil {
|
||||
return wfCtx, "", fmt.Errorf("switch processor: failed to evaluate cases: %v", err)
|
||||
}
|
||||
|
||||
if index >= 0 {
|
||||
if caseName != "" {
|
||||
return wfCtx, fmt.Sprintf("matched case[%d]: %s", index, caseName), nil
|
||||
}
|
||||
return wfCtx, fmt.Sprintf("matched case[%d]", index), nil
|
||||
}
|
||||
|
||||
// 走默认分支(最后一个输出)
|
||||
return wfCtx, "no case matched, using default branch", nil
|
||||
}
|
||||
|
||||
// ProcessWithBranch 实现 BranchProcessor 接口
|
||||
func (c *SwitchConfig) ProcessWithBranch(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.NodeOutput, error) {
|
||||
index, caseName, err := c.evaluateCases(wfCtx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("switch processor: failed to evaluate cases: %v", err)
|
||||
}
|
||||
|
||||
output := &models.NodeOutput{
|
||||
WfCtx: wfCtx,
|
||||
}
|
||||
|
||||
if index >= 0 {
|
||||
output.BranchIndex = &index
|
||||
if caseName != "" {
|
||||
output.Message = fmt.Sprintf("matched case[%d]: %s", index, caseName)
|
||||
} else {
|
||||
output.Message = fmt.Sprintf("matched case[%d]", index)
|
||||
}
|
||||
} else {
|
||||
// 默认分支的索引是 cases 数量(即最后一个输出端口)
|
||||
defaultIndex := len(c.Cases)
|
||||
output.BranchIndex = &defaultIndex
|
||||
output.Message = "no case matched, using default branch"
|
||||
}
|
||||
|
||||
return output, nil
|
||||
}
|
||||
|
||||
// evaluateCases 评估所有分支条件
|
||||
// 返回匹配的分支索引和分支名称,如果没有匹配返回 -1
|
||||
func (c *SwitchConfig) evaluateCases(wfCtx *models.WorkflowContext) (int, string, error) {
|
||||
for i := range c.Cases {
|
||||
matched, err := c.evaluateCaseCondition(&c.Cases[i], wfCtx)
|
||||
if err != nil {
|
||||
return -1, "", fmt.Errorf("case[%d] evaluation error: %v", i, err)
|
||||
}
|
||||
if matched {
|
||||
return i, c.Cases[i].Name, nil
|
||||
}
|
||||
}
|
||||
return -1, "", nil
|
||||
}
|
||||
|
||||
// evaluateCaseCondition 评估单个分支条件
|
||||
func (c *SwitchConfig) evaluateCaseCondition(caseItem *SwitchCase, wfCtx *models.WorkflowContext) (bool, error) {
|
||||
mode := caseItem.Mode
|
||||
if mode == "" {
|
||||
mode = ConditionModeExpression // 默认表达式模式
|
||||
}
|
||||
|
||||
switch mode {
|
||||
case ConditionModeTags:
|
||||
return c.evaluateTagsCondition(caseItem, wfCtx.Event)
|
||||
default:
|
||||
return c.evaluateExpressionCondition(caseItem.Condition, wfCtx)
|
||||
}
|
||||
}
|
||||
|
||||
// evaluateExpressionCondition 评估表达式条件
|
||||
func (c *SwitchConfig) evaluateExpressionCondition(condition string, wfCtx *models.WorkflowContext) (bool, error) {
|
||||
if condition == "" {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
var defs = []string{
|
||||
"{{ $event := .Event }}",
|
||||
"{{ $labels := .Event.TagsMap }}",
|
||||
"{{ $value := .Event.TriggerValue }}",
|
||||
"{{ $inputs := .Inputs }}",
|
||||
}
|
||||
|
||||
text := strings.Join(append(defs, condition), "")
|
||||
|
||||
tpl, err := template.New("switch_condition").Funcs(tplx.TemplateFuncMap).Parse(text)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
if err = tpl.Execute(&buf, wfCtx); err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
result := strings.TrimSpace(strings.ToLower(buf.String()))
|
||||
return result == "true" || result == "1", nil
|
||||
}
|
||||
|
||||
// evaluateTagsCondition 评估标签/属性条件
|
||||
func (c *SwitchConfig) evaluateTagsCondition(caseItem *SwitchCase, event *models.AlertCurEvent) (bool, error) {
|
||||
// 如果没有配置任何过滤条件,默认返回 false(不匹配)
|
||||
if len(caseItem.parsedLabelKeys) == 0 && len(caseItem.parsedAttributes) == 0 {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// 匹配标签 (TagsMap)
|
||||
if len(caseItem.parsedLabelKeys) > 0 {
|
||||
tagsMap := event.TagsMap
|
||||
if tagsMap == nil {
|
||||
tagsMap = make(map[string]string)
|
||||
}
|
||||
if !alertCommon.MatchTags(tagsMap, caseItem.parsedLabelKeys) {
|
||||
return false, nil
|
||||
}
|
||||
}
|
||||
|
||||
// 匹配属性 (JsonTagsAndValue - 所有 JSON 字段)
|
||||
if len(caseItem.parsedAttributes) > 0 {
|
||||
attributesMap := event.JsonTagsAndValue()
|
||||
if !alertCommon.MatchTags(attributesMap, caseItem.parsedAttributes) {
|
||||
return false, nil
|
||||
}
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}
|
||||
@@ -42,7 +42,7 @@ func (r *RelabelConfig) Init(settings interface{}) (models.Processor, error) {
|
||||
return result, err
|
||||
}
|
||||
|
||||
func (r *RelabelConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) (*models.AlertCurEvent, string, error) {
|
||||
func (r *RelabelConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContext) (*models.WorkflowContext, string, error) {
|
||||
sourceLabels := make([]model.LabelName, len(r.SourceLabels))
|
||||
for i := range r.SourceLabels {
|
||||
sourceLabels[i] = model.LabelName(strings.ReplaceAll(r.SourceLabels[i], ".", REPLACE_DOT))
|
||||
@@ -63,8 +63,8 @@ func (r *RelabelConfig) Process(ctx *ctx.Context, event *models.AlertCurEvent) (
|
||||
},
|
||||
}
|
||||
|
||||
EventRelabel(event, relabelConfigs)
|
||||
return event, "", nil
|
||||
EventRelabel(wfCtx.Event, relabelConfigs)
|
||||
return wfCtx, "", nil
|
||||
}
|
||||
|
||||
func EventRelabel(event *models.AlertCurEvent, relabelConfigs []*pconf.RelabelConfig) {
|
||||
|
||||
32
alert/pipeline/processor/utils/utils.go
Normal file
32
alert/pipeline/processor/utils/utils.go
Normal file
@@ -0,0 +1,32 @@
|
||||
package utils
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"strings"
|
||||
"text/template"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/tplx"
|
||||
)
|
||||
|
||||
func TplRender(wfCtx *models.WorkflowContext, content string) (string, error) {
|
||||
var defs = []string{
|
||||
"{{ $event := .Event }}",
|
||||
"{{ $labels := .Event.TagsMap }}",
|
||||
"{{ $value := .Event.TriggerValue }}",
|
||||
"{{ $inputs := .Inputs }}",
|
||||
}
|
||||
text := strings.Join(append(defs, content), "")
|
||||
tpl, err := template.New("tpl").Funcs(tplx.TemplateFuncMap).Parse(text)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to parse template: %v", err)
|
||||
}
|
||||
|
||||
var body bytes.Buffer
|
||||
if err = tpl.Execute(&body, wfCtx); err != nil {
|
||||
return "", fmt.Errorf("failed to execute template: %v", err)
|
||||
}
|
||||
|
||||
return strings.TrimSpace(body.String()), nil
|
||||
}
|
||||
@@ -131,7 +131,7 @@ func (p *Processor) Handle(anomalyPoints []models.AnomalyPoint, from string, inh
|
||||
p.inhibit = inhibit
|
||||
cachedRule := p.alertRuleCache.Get(p.rule.Id)
|
||||
if cachedRule == nil {
|
||||
logger.Warningf("process handle error: rule not found %+v rule_id:%d maybe rule has been deleted", anomalyPoints, p.rule.Id)
|
||||
logger.Warningf("alert_eval_%d datasource_%d handle error: rule not found, maybe rule has been deleted, anomalyPoints:%+v", p.rule.Id, p.datasourceId, anomalyPoints)
|
||||
p.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", p.DatasourceId()), "handle_event", p.BusiGroupCache.GetNameByBusiGroupId(p.rule.GroupId), fmt.Sprintf("%v", p.rule.Id)).Inc()
|
||||
return
|
||||
}
|
||||
@@ -156,14 +156,14 @@ func (p *Processor) Handle(anomalyPoints []models.AnomalyPoint, from string, inh
|
||||
eventCopy := event.DeepCopy()
|
||||
event = dispatch.HandleEventPipeline(cachedRule.PipelineConfigs, eventCopy, event, dispatch.EventProcessorCache, p.ctx, cachedRule.Id, "alert_rule")
|
||||
if event == nil {
|
||||
logger.Infof("rule_eval:%s is muted drop by pipeline event:%v", p.Key(), eventCopy)
|
||||
logger.Infof("alert_eval_%d datasource_%d is muted drop by pipeline event:%s", p.rule.Id, p.datasourceId, eventCopy.Hash)
|
||||
continue
|
||||
}
|
||||
|
||||
// event mute
|
||||
isMuted, detail, muteId := mute.IsMuted(cachedRule, event, p.TargetCache, p.alertMuteCache)
|
||||
if isMuted {
|
||||
logger.Infof("rule_eval:%s is muted, detail:%s event:%v", p.Key(), detail, event)
|
||||
logger.Infof("alert_eval_%d datasource_%d is muted, detail:%s event:%s", p.rule.Id, p.datasourceId, detail, event.Hash)
|
||||
p.Stats.CounterMuteTotal.WithLabelValues(
|
||||
fmt.Sprintf("%v", event.GroupName),
|
||||
fmt.Sprintf("%v", p.rule.Id),
|
||||
@@ -174,7 +174,7 @@ func (p *Processor) Handle(anomalyPoints []models.AnomalyPoint, from string, inh
|
||||
}
|
||||
|
||||
if dispatch.EventMuteHook(event) {
|
||||
logger.Infof("rule_eval:%s is muted by hook event:%v", p.Key(), event)
|
||||
logger.Infof("alert_eval_%d datasource_%d is muted by hook event:%s", p.rule.Id, p.datasourceId, event.Hash)
|
||||
p.Stats.CounterMuteTotal.WithLabelValues(
|
||||
fmt.Sprintf("%v", event.GroupName),
|
||||
fmt.Sprintf("%v", p.rule.Id),
|
||||
@@ -247,7 +247,7 @@ func (p *Processor) BuildEvent(anomalyPoint models.AnomalyPoint, from string, no
|
||||
|
||||
if err := json.Unmarshal([]byte(p.rule.Annotations), &event.AnnotationsJSON); err != nil {
|
||||
event.AnnotationsJSON = make(map[string]string) // 解析失败时使用空 map
|
||||
logger.Warningf("unmarshal annotations json failed: %v, rule: %d", err, p.rule.Id)
|
||||
logger.Warningf("alert_eval_%d datasource_%d unmarshal annotations json failed: %v", p.rule.Id, p.datasourceId, err)
|
||||
}
|
||||
|
||||
if event.TriggerValues != "" && strings.Count(event.TriggerValues, "$") > 1 {
|
||||
@@ -272,7 +272,7 @@ func (p *Processor) BuildEvent(anomalyPoint models.AnomalyPoint, from string, no
|
||||
pt.GroupNames = p.BusiGroupCache.GetNamesByBusiGroupIds(pt.GroupIds)
|
||||
event.Target = pt
|
||||
} else {
|
||||
logger.Infof("fill event target error, ident: %s doesn't exist in cache.", event.TargetIdent)
|
||||
logger.Infof("alert_eval_%d datasource_%d fill event target error, ident: %s doesn't exist in cache.", p.rule.Id, p.datasourceId, event.TargetIdent)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -371,19 +371,19 @@ func (p *Processor) RecoverSingle(byRecover bool, hash string, now int64, value
|
||||
lastPendingEvent, has := p.pendingsUseByRecover.Get(hash)
|
||||
if !has {
|
||||
// 说明没有产生过异常点,就不需要恢复了
|
||||
logger.Debugf("rule_eval:%s event:%v do not has pending event, not recover", p.Key(), event)
|
||||
logger.Debugf("alert_eval_%d datasource_%d event:%s do not has pending event, not recover", p.rule.Id, p.datasourceId, event.Hash)
|
||||
return
|
||||
}
|
||||
|
||||
if now-lastPendingEvent.LastEvalTime < cachedRule.RecoverDuration {
|
||||
logger.Debugf("rule_eval:%s event:%v not recover", p.Key(), event)
|
||||
logger.Debugf("alert_eval_%d datasource_%d event:%s not recover", p.rule.Id, p.datasourceId, event.Hash)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// 如果设置了恢复条件,则不能在此处恢复,必须依靠 recoverPoint 来恢复
|
||||
if event.RecoverConfig.JudgeType != models.Origin && !byRecover {
|
||||
logger.Debugf("rule_eval:%s event:%v not recover", p.Key(), event)
|
||||
logger.Debugf("alert_eval_%d datasource_%d event:%s not recover", p.rule.Id, p.datasourceId, event.Hash)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -460,7 +460,7 @@ func (p *Processor) handleEvent(events []*models.AlertCurEvent) {
|
||||
func (p *Processor) inhibitEvent(events []*models.AlertCurEvent, highSeverity int) {
|
||||
for _, event := range events {
|
||||
if p.inhibit && event.Severity > highSeverity {
|
||||
logger.Debugf("rule_eval:%s event:%+v inhibit highSeverity:%d", p.Key(), event, highSeverity)
|
||||
logger.Debugf("alert_eval_%d datasource_%d event:%s inhibit highSeverity:%d", p.rule.Id, p.datasourceId, event.Hash, highSeverity)
|
||||
continue
|
||||
}
|
||||
p.fireEvent(event)
|
||||
@@ -476,7 +476,7 @@ func (p *Processor) fireEvent(event *models.AlertCurEvent) {
|
||||
|
||||
message := "unknown"
|
||||
defer func() {
|
||||
logger.Infof("rule_eval:%s event-hash-%s %s", p.Key(), event.Hash, message)
|
||||
logger.Infof("alert_eval_%d datasource_%d event-hash-%s %s", p.rule.Id, p.datasourceId, event.Hash, message)
|
||||
}()
|
||||
|
||||
if fired, has := p.fires.Get(event.Hash); has {
|
||||
@@ -527,7 +527,7 @@ func (p *Processor) pushEventToQueue(e *models.AlertCurEvent) {
|
||||
|
||||
dispatch.LogEvent(e, "push_queue")
|
||||
if !queue.EventQueue.PushFront(e) {
|
||||
logger.Warningf("event_push_queue: queue is full, event:%+v", e)
|
||||
logger.Warningf("alert_eval_%d datasource_%d event_push_queue: queue is full, event:%s", p.rule.Id, p.datasourceId, e.Hash)
|
||||
p.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", p.DatasourceId()), "push_event_queue", p.BusiGroupCache.GetNameByBusiGroupId(p.rule.GroupId), fmt.Sprintf("%v", p.rule.Id)).Inc()
|
||||
}
|
||||
}
|
||||
@@ -538,7 +538,7 @@ func (p *Processor) RecoverAlertCurEventFromDb() {
|
||||
|
||||
curEvents, err := models.AlertCurEventGetByRuleIdAndDsId(p.ctx, p.rule.Id, p.datasourceId)
|
||||
if err != nil {
|
||||
logger.Errorf("recover event from db for rule:%s failed, err:%s", p.Key(), err)
|
||||
logger.Errorf("alert_eval_%d datasource_%d recover event from db failed, err:%s", p.rule.Id, p.datasourceId, err)
|
||||
p.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", p.DatasourceId()), "get_recover_event", p.BusiGroupCache.GetNameByBusiGroupId(p.rule.GroupId), fmt.Sprintf("%v", p.rule.Id)).Inc()
|
||||
p.fires = NewAlertCurEventMap(nil)
|
||||
return
|
||||
|
||||
@@ -22,10 +22,11 @@ type Router struct {
|
||||
AlertStats *astats.Stats
|
||||
Ctx *ctx.Context
|
||||
ExternalProcessors *process.ExternalProcessorsType
|
||||
LogDir string
|
||||
}
|
||||
|
||||
func New(httpConfig httpx.Config, alert aconf.Alert, amc *memsto.AlertMuteCacheType, tc *memsto.TargetCacheType, bgc *memsto.BusiGroupCacheType,
|
||||
astats *astats.Stats, ctx *ctx.Context, externalProcessors *process.ExternalProcessorsType) *Router {
|
||||
astats *astats.Stats, ctx *ctx.Context, externalProcessors *process.ExternalProcessorsType, logDir string) *Router {
|
||||
return &Router{
|
||||
HTTP: httpConfig,
|
||||
Alert: alert,
|
||||
@@ -35,6 +36,7 @@ func New(httpConfig httpx.Config, alert aconf.Alert, amc *memsto.AlertMuteCacheT
|
||||
AlertStats: astats,
|
||||
Ctx: ctx,
|
||||
ExternalProcessors: externalProcessors,
|
||||
LogDir: logDir,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -50,6 +52,9 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
service.POST("/event", rt.pushEventToQueue)
|
||||
service.POST("/event-persist", rt.eventPersist)
|
||||
service.POST("/make-event", rt.makeEvent)
|
||||
service.GET("/event-detail/:hash", rt.eventDetail)
|
||||
service.GET("/alert-eval-detail/:id", rt.alertEvalDetail)
|
||||
service.GET("/trace-logs/:traceid", rt.traceLogs)
|
||||
}
|
||||
|
||||
func Render(c *gin.Context, data, msg interface{}) {
|
||||
|
||||
28
alert/router/router_alert_eval_detail.go
Normal file
28
alert/router/router_alert_eval_detail.go
Normal file
@@ -0,0 +1,28 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/pkg/loggrep"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
func (rt *Router) alertEvalDetail(c *gin.Context) {
|
||||
id := ginx.UrlParamStr(c, "id")
|
||||
if !loggrep.IsValidRuleID(id) {
|
||||
ginx.Bomb(200, "invalid rule id format")
|
||||
}
|
||||
|
||||
instance := fmt.Sprintf("%s:%d", rt.Alert.Heartbeat.IP, rt.HTTP.Port)
|
||||
|
||||
keyword := fmt.Sprintf("alert_eval_%s", id)
|
||||
logs, err := loggrep.GrepLogDir(rt.LogDir, keyword)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(loggrep.EventDetailResp{
|
||||
Logs: logs,
|
||||
Instance: instance,
|
||||
}, nil)
|
||||
}
|
||||
@@ -13,9 +13,9 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/alert/queue"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/poster"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
@@ -75,7 +75,7 @@ func (rt *Router) pushEventToQueue(c *gin.Context) {
|
||||
|
||||
dispatch.LogEvent(event, "http_push_queue")
|
||||
if !queue.EventQueue.PushFront(event) {
|
||||
msg := fmt.Sprintf("event:%+v push_queue err: queue is full", event)
|
||||
msg := fmt.Sprintf("event:%s push_queue err: queue is full", event.Hash)
|
||||
ginx.Bomb(200, msg)
|
||||
logger.Warningf(msg)
|
||||
}
|
||||
@@ -105,21 +105,21 @@ func (rt *Router) makeEvent(c *gin.Context) {
|
||||
for i := 0; i < len(events); i++ {
|
||||
node, err := naming.DatasourceHashRing.GetNode(strconv.FormatInt(events[i].DatasourceId, 10), fmt.Sprintf("%d", events[i].RuleId))
|
||||
if err != nil {
|
||||
logger.Warningf("event:%+v get node err:%v", events[i], err)
|
||||
logger.Warningf("event(rule_id=%d ds_id=%d) get node err:%v", events[i].RuleId, events[i].DatasourceId, err)
|
||||
ginx.Bomb(200, "event node not exists")
|
||||
}
|
||||
|
||||
if node != rt.Alert.Heartbeat.Endpoint {
|
||||
err := forwardEvent(events[i], node)
|
||||
if err != nil {
|
||||
logger.Warningf("event:%+v forward err:%v", events[i], err)
|
||||
logger.Warningf("event(rule_id=%d ds_id=%d) forward err:%v", events[i].RuleId, events[i].DatasourceId, err)
|
||||
ginx.Bomb(200, "event forward error")
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
ruleWorker, exists := rt.ExternalProcessors.GetExternalAlertRule(events[i].DatasourceId, events[i].RuleId)
|
||||
logger.Debugf("handle event:%+v exists:%v", events[i], exists)
|
||||
logger.Debugf("handle event(rule_id=%d ds_id=%d) exists:%v", events[i].RuleId, events[i].DatasourceId, exists)
|
||||
if !exists {
|
||||
ginx.Bomb(200, "rule not exists")
|
||||
}
|
||||
@@ -143,6 +143,6 @@ func forwardEvent(event *eventForm, instance string) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
logger.Infof("forward event: result=succ url=%s code=%d event:%v response=%s", ur, code, event, string(res))
|
||||
logger.Infof("forward event: result=succ url=%s code=%d rule_id=%d response=%s", ur, code, event.RuleId, string(res))
|
||||
return nil
|
||||
}
|
||||
|
||||
27
alert/router/router_event_detail.go
Normal file
27
alert/router/router_event_detail.go
Normal file
@@ -0,0 +1,27 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/pkg/loggrep"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
func (rt *Router) eventDetail(c *gin.Context) {
|
||||
hash := ginx.UrlParamStr(c, "hash")
|
||||
if !loggrep.IsValidHash(hash) {
|
||||
ginx.Bomb(200, "invalid hash format")
|
||||
}
|
||||
|
||||
instance := fmt.Sprintf("%s:%d", rt.Alert.Heartbeat.IP, rt.HTTP.Port)
|
||||
|
||||
logs, err := loggrep.GrepLogDir(rt.LogDir, hash)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(loggrep.EventDetailResp{
|
||||
Logs: logs,
|
||||
Instance: instance,
|
||||
}, nil)
|
||||
}
|
||||
28
alert/router/router_trace_logs.go
Normal file
28
alert/router/router_trace_logs.go
Normal file
@@ -0,0 +1,28 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/loggrep"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
func (rt *Router) traceLogs(c *gin.Context) {
|
||||
traceId := ginx.UrlParamStr(c, "traceid")
|
||||
if !loggrep.IsValidTraceID(traceId) {
|
||||
ginx.Bomb(200, "invalid trace id format")
|
||||
}
|
||||
|
||||
instance := fmt.Sprintf("%s:%d", rt.Alert.Heartbeat.IP, rt.HTTP.Port)
|
||||
|
||||
keyword := "trace_id=" + traceId
|
||||
logs, err := loggrep.GrepLatestLogFiles(rt.LogDir, keyword)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(loggrep.EventDetailResp{
|
||||
Logs: logs,
|
||||
Instance: instance,
|
||||
}, nil)
|
||||
}
|
||||
@@ -135,9 +135,7 @@ func (c *DefaultCallBacker) CallBack(ctx CallBackContext) {
|
||||
|
||||
func doSendAndRecord(ctx *ctx.Context, url, token string, body interface{}, channel string,
|
||||
stats *astats.Stats, events []*models.AlertCurEvent) {
|
||||
start := time.Now()
|
||||
res, err := doSend(url, body, channel, stats)
|
||||
res = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), res)
|
||||
NotifyRecord(ctx, events, 0, channel, token, res, err)
|
||||
}
|
||||
|
||||
@@ -171,11 +169,11 @@ func doSend(url string, body interface{}, channel string, stats *astats.Stats) (
|
||||
|
||||
start := time.Now()
|
||||
res, code, err := poster.PostJSON(url, time.Second*5, body, 3)
|
||||
res = []byte(fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), res))
|
||||
res = []byte(fmt.Sprintf("duration: %d ms status_code:%d, response:%s", time.Since(start).Milliseconds(), code, string(res)))
|
||||
if err != nil {
|
||||
logger.Errorf("%s_sender: result=fail url=%s code=%d error=%v req:%v response=%s", channel, url, code, err, body, string(res))
|
||||
stats.AlertNotifyErrorTotal.WithLabelValues(channel).Inc()
|
||||
return "", err
|
||||
return string(res), err
|
||||
}
|
||||
|
||||
logger.Infof("%s_sender: result=succ url=%s code=%d req:%v response=%s", channel, url, code, body, string(res))
|
||||
@@ -207,6 +205,6 @@ func PushCallbackEvent(ctx *ctx.Context, webhook *models.Webhook, event *models.
|
||||
|
||||
succ := queue.eventQueue.Push(event)
|
||||
if !succ {
|
||||
logger.Warningf("Write channel(%s) full, current channel size: %d event:%v", webhook.Url, queue.eventQueue.Len(), event)
|
||||
logger.Warningf("Write channel(%s) full, current channel size: %d event:%s", webhook.Url, queue.eventQueue.Len(), event.Hash)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -30,14 +30,14 @@ type IbexCallBacker struct {
|
||||
|
||||
func (c *IbexCallBacker) CallBack(ctx CallBackContext) {
|
||||
if len(ctx.CallBackURL) == 0 || len(ctx.Events) == 0 {
|
||||
logger.Warningf("event_callback_ibex: url or events is empty, url: %s, events: %+v", ctx.CallBackURL, ctx.Events)
|
||||
logger.Warningf("event_callback_ibex: url or events is empty, url: %s", ctx.CallBackURL)
|
||||
return
|
||||
}
|
||||
|
||||
event := ctx.Events[0]
|
||||
|
||||
if event.IsRecovered {
|
||||
logger.Infof("event_callback_ibex: event is recovered, event: %+v", event)
|
||||
logger.Infof("event_callback_ibex: event is recovered, event: %s", event.Hash)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -45,9 +45,9 @@ func (c *IbexCallBacker) CallBack(ctx CallBackContext) {
|
||||
}
|
||||
|
||||
func (c *IbexCallBacker) handleIbex(ctx *ctx.Context, url string, event *models.AlertCurEvent) {
|
||||
logger.Infof("event_callback_ibex: url: %s, event: %+v", url, event)
|
||||
logger.Infof("event_callback_ibex: url: %s, event: %s", url, event.Hash)
|
||||
if imodels.DB() == nil && ctx.IsCenter {
|
||||
logger.Warningf("event_callback_ibex: db is nil, event: %+v", event)
|
||||
logger.Warningf("event_callback_ibex: db is nil, event: %s", event.Hash)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -66,7 +66,7 @@ func (c *IbexCallBacker) handleIbex(ctx *ctx.Context, url string, event *models.
|
||||
|
||||
id, err := strconv.ParseInt(idstr, 10, 64)
|
||||
if err != nil {
|
||||
logger.Errorf("event_callback_ibex: failed to parse url: %s event: %+v", url, event)
|
||||
logger.Errorf("event_callback_ibex: failed to parse url: %s event: %s", url, event.Hash)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -82,34 +82,37 @@ func (c *IbexCallBacker) handleIbex(ctx *ctx.Context, url string, event *models.
|
||||
}
|
||||
|
||||
if host == "" {
|
||||
logger.Errorf("event_callback_ibex: failed to get host, id: %d, event: %+v", id, event)
|
||||
logger.Errorf("event_callback_ibex: failed to get host, id: %d, event: %s", id, event.Hash)
|
||||
return
|
||||
}
|
||||
|
||||
CallIbex(ctx, id, host, c.taskTplCache, c.targetCache, c.userCache, event)
|
||||
CallIbex(ctx, id, host, c.taskTplCache, c.targetCache, c.userCache, event, "")
|
||||
}
|
||||
|
||||
func CallIbex(ctx *ctx.Context, id int64, host string,
|
||||
taskTplCache *memsto.TaskTplCache, targetCache *memsto.TargetCacheType,
|
||||
userCache *memsto.UserCacheType, event *models.AlertCurEvent) {
|
||||
logger.Infof("event_callback_ibex: id: %d, host: %s, event: %+v", id, host, event)
|
||||
userCache *memsto.UserCacheType, event *models.AlertCurEvent, args string) (int64, error) {
|
||||
logger.Infof("event_callback_ibex: id: %d, host: %s, args: %s, event: %s", id, host, args, event.Hash)
|
||||
|
||||
tpl := taskTplCache.Get(id)
|
||||
if tpl == nil {
|
||||
logger.Errorf("event_callback_ibex: no such tpl(%d), event: %+v", id, event)
|
||||
return
|
||||
err := fmt.Errorf("event_callback_ibex: no such tpl(%d), event: %s", id, event.Hash)
|
||||
logger.Errorf("%s", err)
|
||||
return 0, err
|
||||
}
|
||||
// check perm
|
||||
// tpl.GroupId - host - account 三元组校验权限
|
||||
can, err := canDoIbex(tpl.UpdateBy, tpl, host, targetCache, userCache)
|
||||
can, err := CanDoIbex(tpl.UpdateBy, tpl, host, targetCache, userCache)
|
||||
if err != nil {
|
||||
logger.Errorf("event_callback_ibex: check perm fail: %v, event: %+v", err, event)
|
||||
return
|
||||
err = fmt.Errorf("event_callback_ibex: check perm fail: %v, event: %s", err, event.Hash)
|
||||
logger.Errorf("%s", err)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if !can {
|
||||
logger.Errorf("event_callback_ibex: user(%s) no permission, event: %+v", tpl.UpdateBy, event)
|
||||
return
|
||||
err = fmt.Errorf("event_callback_ibex: user(%s) no permission, event: %s", tpl.UpdateBy, event.Hash)
|
||||
logger.Errorf("%s", err)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
tagsMap := make(map[string]string)
|
||||
@@ -133,11 +136,16 @@ func CallIbex(ctx *ctx.Context, id int64, host string,
|
||||
|
||||
tags, err := json.Marshal(tagsMap)
|
||||
if err != nil {
|
||||
logger.Errorf("event_callback_ibex: failed to marshal tags to json: %v, event: %+v", tagsMap, event)
|
||||
return
|
||||
err = fmt.Errorf("event_callback_ibex: failed to marshal tags to json: %v, event: %s", tagsMap, event.Hash)
|
||||
logger.Errorf("%s", err)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// call ibex
|
||||
taskArgs := tpl.Args
|
||||
if args != "" {
|
||||
taskArgs = args
|
||||
}
|
||||
in := models.TaskForm{
|
||||
Title: tpl.Title + " FH: " + host,
|
||||
Account: tpl.Account,
|
||||
@@ -146,7 +154,7 @@ func CallIbex(ctx *ctx.Context, id int64, host string,
|
||||
Timeout: tpl.Timeout,
|
||||
Pause: tpl.Pause,
|
||||
Script: tpl.Script,
|
||||
Args: tpl.Args,
|
||||
Args: taskArgs,
|
||||
Stdin: string(tags),
|
||||
Action: "start",
|
||||
Creator: tpl.UpdateBy,
|
||||
@@ -156,8 +164,9 @@ func CallIbex(ctx *ctx.Context, id int64, host string,
|
||||
|
||||
id, err = TaskAdd(in, tpl.UpdateBy, ctx.IsCenter)
|
||||
if err != nil {
|
||||
logger.Errorf("event_callback_ibex: call ibex fail: %v, event: %+v", err, event)
|
||||
return
|
||||
err = fmt.Errorf("event_callback_ibex: call ibex fail: %v, event: %s", err, event.Hash)
|
||||
logger.Errorf("%s", err)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// write db
|
||||
@@ -178,11 +187,14 @@ func CallIbex(ctx *ctx.Context, id int64, host string,
|
||||
}
|
||||
|
||||
if err = record.Add(ctx); err != nil {
|
||||
logger.Errorf("event_callback_ibex: persist task_record fail: %v, event: %+v", err, event)
|
||||
err = fmt.Errorf("event_callback_ibex: persist task_record fail: %v, event: %s", err, event.Hash)
|
||||
logger.Errorf("%s", err)
|
||||
return id, err
|
||||
}
|
||||
return id, nil
|
||||
}
|
||||
|
||||
func canDoIbex(username string, tpl *models.TaskTpl, host string, targetCache *memsto.TargetCacheType, userCache *memsto.UserCacheType) (bool, error) {
|
||||
func CanDoIbex(username string, tpl *models.TaskTpl, host string, targetCache *memsto.TargetCacheType, userCache *memsto.UserCacheType) (bool, error) {
|
||||
user := userCache.GetByUsername(username)
|
||||
if user != nil && user.IsAdmin() {
|
||||
return true, nil
|
||||
|
||||
@@ -89,7 +89,7 @@ func alertingCallScript(ctx *ctx.Context, stdinBytes []byte, notifyScript models
|
||||
err, isTimeout := sys.WrapTimeout(cmd, time.Duration(config.Timeout)*time.Second)
|
||||
|
||||
res := buf.String()
|
||||
res = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), res)
|
||||
res = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), res)
|
||||
|
||||
// 截断超出长度的输出
|
||||
if len(res) > 512 {
|
||||
|
||||
@@ -13,10 +13,53 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/alert/astats"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/poster"
|
||||
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
// webhookClientCache 缓存 http.Client,避免每次请求都创建新的 Client 导致连接泄露
|
||||
var webhookClientCache sync.Map // key: clientKey (string), value: *http.Client
|
||||
|
||||
// 相同配置的 webhook 会复用同一个 Client
|
||||
func getWebhookClient(webhook *models.Webhook) *http.Client {
|
||||
clientKey := webhook.Hash()
|
||||
|
||||
if client, ok := webhookClientCache.Load(clientKey); ok {
|
||||
return client.(*http.Client)
|
||||
}
|
||||
|
||||
// 创建新的 Client
|
||||
transport := &http.Transport{
|
||||
TLSClientConfig: &tls.Config{InsecureSkipVerify: webhook.SkipVerify},
|
||||
MaxIdleConns: 100,
|
||||
MaxIdleConnsPerHost: 10,
|
||||
IdleConnTimeout: 90 * time.Second,
|
||||
}
|
||||
|
||||
if poster.UseProxy(webhook.Url) {
|
||||
transport.Proxy = http.ProxyFromEnvironment
|
||||
}
|
||||
|
||||
timeout := webhook.Timeout
|
||||
if timeout <= 0 {
|
||||
timeout = 10
|
||||
}
|
||||
|
||||
newClient := &http.Client{
|
||||
Timeout: time.Duration(timeout) * time.Second,
|
||||
Transport: transport,
|
||||
}
|
||||
|
||||
// 使用 LoadOrStore 确保并发安全,避免重复创建
|
||||
actual, loaded := webhookClientCache.LoadOrStore(clientKey, newClient)
|
||||
if loaded {
|
||||
return actual.(*http.Client)
|
||||
}
|
||||
|
||||
return newClient
|
||||
}
|
||||
|
||||
func sendWebhook(webhook *models.Webhook, event interface{}, stats *astats.Stats) (bool, string, error) {
|
||||
channel := "webhook"
|
||||
if webhook.Type == models.RuleCallback {
|
||||
@@ -29,7 +72,7 @@ func sendWebhook(webhook *models.Webhook, event interface{}, stats *astats.Stats
|
||||
}
|
||||
bs, err := json.Marshal(event)
|
||||
if err != nil {
|
||||
logger.Errorf("%s alertingWebhook failed to marshal event:%+v err:%v", channel, event, err)
|
||||
logger.Errorf("%s alertingWebhook failed to marshal event err:%v", channel, err)
|
||||
return false, "", err
|
||||
}
|
||||
|
||||
@@ -55,25 +98,13 @@ func sendWebhook(webhook *models.Webhook, event interface{}, stats *astats.Stats
|
||||
req.Header.Set(conf.Headers[i], conf.Headers[i+1])
|
||||
}
|
||||
}
|
||||
insecureSkipVerify := false
|
||||
if webhook != nil {
|
||||
insecureSkipVerify = webhook.SkipVerify
|
||||
}
|
||||
|
||||
if conf.Client == nil {
|
||||
logger.Warningf("event_%s, event:%s, url: [%s], error: [%s]", channel, string(bs), conf.Url, "client is nil")
|
||||
conf.Client = &http.Client{
|
||||
Timeout: time.Duration(conf.Timeout) * time.Second,
|
||||
Transport: &http.Transport{
|
||||
TLSClientConfig: &tls.Config{InsecureSkipVerify: insecureSkipVerify},
|
||||
},
|
||||
}
|
||||
}
|
||||
// 使用全局 Client 缓存,避免每次请求都创建新的 Client 导致连接泄露
|
||||
client := getWebhookClient(conf)
|
||||
|
||||
stats.AlertNotifyTotal.WithLabelValues(channel).Inc()
|
||||
var resp *http.Response
|
||||
var body []byte
|
||||
resp, err = conf.Client.Do(req)
|
||||
resp, err = client.Do(req)
|
||||
|
||||
if err != nil {
|
||||
stats.AlertNotifyErrorTotal.WithLabelValues(channel).Inc()
|
||||
@@ -88,11 +119,11 @@ func sendWebhook(webhook *models.Webhook, event interface{}, stats *astats.Stats
|
||||
|
||||
if resp.StatusCode == 429 {
|
||||
logger.Errorf("event_%s_fail, url: %s, response code: %d, body: %s event:%s", channel, conf.Url, resp.StatusCode, string(body), string(bs))
|
||||
return true, string(body), fmt.Errorf("status code is 429")
|
||||
return true, fmt.Sprintf("status_code:%d, response:%s", resp.StatusCode, string(body)), fmt.Errorf("status code is 429")
|
||||
}
|
||||
|
||||
logger.Debugf("event_%s_succ, url: %s, response code: %d, body: %s event:%s", channel, conf.Url, resp.StatusCode, string(body), string(bs))
|
||||
return false, string(body), nil
|
||||
return false, fmt.Sprintf("status_code:%d, response:%s", resp.StatusCode, string(body)), nil
|
||||
}
|
||||
|
||||
func SingleSendWebhooks(ctx *ctx.Context, webhooks map[string]*models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
|
||||
@@ -101,7 +132,7 @@ func SingleSendWebhooks(ctx *ctx.Context, webhooks map[string]*models.Webhook, e
|
||||
for retryCount < 3 {
|
||||
start := time.Now()
|
||||
needRetry, res, err := sendWebhook(conf, event, stats)
|
||||
res = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), res)
|
||||
res = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), res)
|
||||
NotifyRecord(ctx, []*models.AlertCurEvent{event}, 0, "webhook", conf.Url, res, err)
|
||||
if !needRetry {
|
||||
break
|
||||
@@ -114,7 +145,7 @@ func SingleSendWebhooks(ctx *ctx.Context, webhooks map[string]*models.Webhook, e
|
||||
|
||||
func BatchSendWebhooks(ctx *ctx.Context, webhooks map[string]*models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
|
||||
for _, conf := range webhooks {
|
||||
logger.Infof("push event:%+v to queue:%v", event, conf)
|
||||
logger.Infof("push event:%s to queue:%v", event.Hash, conf)
|
||||
PushEvent(ctx, conf, event, stats)
|
||||
}
|
||||
}
|
||||
@@ -152,7 +183,7 @@ func PushEvent(ctx *ctx.Context, webhook *models.Webhook, event *models.AlertCur
|
||||
succ := queue.eventQueue.Push(event)
|
||||
if !succ {
|
||||
stats.AlertNotifyErrorTotal.WithLabelValues("push_event_queue").Inc()
|
||||
logger.Warningf("Write channel(%s) full, current channel size: %d event:%v", webhook.Url, queue.eventQueue.Len(), event)
|
||||
logger.Warningf("Write channel(%s) full, current channel size: %d event:%s", webhook.Url, queue.eventQueue.Len(), event.Hash)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -173,7 +204,7 @@ func StartConsumer(ctx *ctx.Context, queue *WebhookQueue, popSize int, webhook *
|
||||
for retryCount < webhook.RetryCount {
|
||||
start := time.Now()
|
||||
needRetry, res, err := sendWebhook(webhook, events, stats)
|
||||
res = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), res)
|
||||
res = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), res)
|
||||
go NotifyRecord(ctx, events, 0, "webhook", webhook.Url, res, err)
|
||||
if !needRetry {
|
||||
break
|
||||
|
||||
@@ -7,19 +7,20 @@ import (
|
||||
)
|
||||
|
||||
type Center struct {
|
||||
Plugins []Plugin
|
||||
MetricsYamlFile string
|
||||
OpsYamlFile string
|
||||
BuiltinIntegrationsDir string
|
||||
I18NHeaderKey string
|
||||
MetricDesc MetricDescType
|
||||
AnonymousAccess AnonymousAccess
|
||||
UseFileAssets bool
|
||||
FlashDuty FlashDuty
|
||||
EventHistoryGroupView bool
|
||||
CleanNotifyRecordDay int
|
||||
MigrateBusiGroupLabel bool
|
||||
RSA httpx.RSAConfig
|
||||
Plugins []Plugin
|
||||
MetricsYamlFile string
|
||||
OpsYamlFile string
|
||||
BuiltinIntegrationsDir string
|
||||
I18NHeaderKey string
|
||||
MetricDesc MetricDescType
|
||||
AnonymousAccess AnonymousAccess
|
||||
UseFileAssets bool
|
||||
FlashDuty FlashDuty
|
||||
EventHistoryGroupView bool
|
||||
CleanNotifyRecordDay int
|
||||
CleanPipelineExecutionDay int
|
||||
MigrateBusiGroupLabel bool
|
||||
RSA httpx.RSAConfig
|
||||
}
|
||||
|
||||
type Plugin struct {
|
||||
|
||||
@@ -134,11 +134,12 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
|
||||
go version.GetGithubVersion()
|
||||
|
||||
go cron.CleanNotifyRecord(ctx, config.Center.CleanNotifyRecordDay)
|
||||
go cron.CleanPipelineExecution(ctx, config.Center.CleanPipelineExecutionDay)
|
||||
|
||||
alertrtRouter := alertrt.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors)
|
||||
alertrtRouter := alertrt.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors, config.Log.Dir)
|
||||
centerRouter := centerrt.New(config.HTTP, config.Center, config.Alert, config.Ibex,
|
||||
cconf.Operations, dsCache, notifyConfigCache, promClients,
|
||||
redis, sso, ctx, metas, idents, targetCache, userCache, userGroupCache, userTokenCache)
|
||||
redis, sso, ctx, metas, idents, targetCache, userCache, userGroupCache, userTokenCache, config.Log.Dir)
|
||||
pushgwRouter := pushgwrt.New(config.HTTP, config.Pushgw, config.Alert, targetCache, busiGroupCache, idents, metas, writers, ctx)
|
||||
|
||||
r := httpx.GinEngine(config.Global.RunMode, config.HTTP, configCvalCache.PrintBodyPaths, configCvalCache.PrintAccessLog)
|
||||
|
||||
@@ -271,10 +271,8 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
|
||||
}
|
||||
|
||||
for _, metric := range metrics {
|
||||
if metric.UUID == 0 {
|
||||
time.Sleep(time.Microsecond)
|
||||
metric.UUID = time.Now().UnixMicro()
|
||||
}
|
||||
time.Sleep(time.Microsecond)
|
||||
metric.UUID = time.Now().UnixMicro()
|
||||
metric.ID = metric.UUID
|
||||
metric.CreatedBy = SYSTEM
|
||||
metric.UpdatedBy = SYSTEM
|
||||
|
||||
@@ -118,7 +118,7 @@ func (s *Set) updateTargets(m map[string]models.HostMeta) error {
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
err := storage.MSet(context.Background(), s.redis, newMap)
|
||||
err := storage.MSet(context.Background(), s.redis, newMap, 7*24*time.Hour)
|
||||
if err != nil {
|
||||
cstats.RedisOperationLatency.WithLabelValues("mset_target_meta", "fail").Observe(time.Since(start).Seconds())
|
||||
return err
|
||||
@@ -127,7 +127,7 @@ func (s *Set) updateTargets(m map[string]models.HostMeta) error {
|
||||
}
|
||||
|
||||
if len(extendMap) > 0 {
|
||||
err = storage.MSet(context.Background(), s.redis, extendMap)
|
||||
err = storage.MSet(context.Background(), s.redis, extendMap, 7*24*time.Hour)
|
||||
if err != nil {
|
||||
cstats.RedisOperationLatency.WithLabelValues("mset_target_extend", "fail").Observe(time.Since(start).Seconds())
|
||||
return err
|
||||
|
||||
@@ -24,11 +24,11 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/prom"
|
||||
"github.com/ccfos/nightingale/v6/pushgw/idents"
|
||||
"github.com/ccfos/nightingale/v6/storage"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"gorm.io/gorm"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/rakyll/statik/fs"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
"github.com/toolkits/pkg/runner"
|
||||
)
|
||||
@@ -51,6 +51,7 @@ type Router struct {
|
||||
UserGroupCache *memsto.UserGroupCacheType
|
||||
UserTokenCache *memsto.UserTokenCacheType
|
||||
Ctx *ctx.Context
|
||||
LogDir string
|
||||
|
||||
HeartbeatHook HeartbeatHookFunc
|
||||
TargetDeleteHook models.TargetDeleteHookFunc
|
||||
@@ -61,7 +62,7 @@ func New(httpConfig httpx.Config, center cconf.Center, alert aconf.Alert, ibex c
|
||||
operations cconf.Operation, ds *memsto.DatasourceCacheType, ncc *memsto.NotifyConfigCacheType,
|
||||
pc *prom.PromClientMap, redis storage.Redis,
|
||||
sso *sso.SsoClient, ctx *ctx.Context, metaSet *metas.Set, idents *idents.Set,
|
||||
tc *memsto.TargetCacheType, uc *memsto.UserCacheType, ugc *memsto.UserGroupCacheType, utc *memsto.UserTokenCacheType) *Router {
|
||||
tc *memsto.TargetCacheType, uc *memsto.UserCacheType, ugc *memsto.UserGroupCacheType, utc *memsto.UserTokenCacheType, logDir string) *Router {
|
||||
return &Router{
|
||||
HTTP: httpConfig,
|
||||
Center: center,
|
||||
@@ -80,6 +81,7 @@ func New(httpConfig httpx.Config, center cconf.Center, alert aconf.Alert, ibex c
|
||||
UserGroupCache: ugc,
|
||||
UserTokenCache: utc,
|
||||
Ctx: ctx,
|
||||
LogDir: logDir,
|
||||
HeartbeatHook: func(ident string) map[string]interface{} { return nil },
|
||||
TargetDeleteHook: func(tx *gorm.DB, idents []string) error { return nil },
|
||||
AlertRuleModifyHook: func(ar *models.AlertRule) {},
|
||||
@@ -211,8 +213,8 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.GET("/datasource/brief", rt.auth(), rt.user(), rt.datasourceBriefs)
|
||||
pages.POST("/datasource/query", rt.auth(), rt.user(), rt.datasourceQuery)
|
||||
|
||||
pages.POST("/ds-query", rt.auth(), rt.QueryData)
|
||||
pages.POST("/logs-query", rt.auth(), rt.QueryLogV2)
|
||||
pages.POST("/ds-query", rt.auth(), rt.user(), rt.QueryData)
|
||||
pages.POST("/logs-query", rt.auth(), rt.user(), rt.QueryLogV2)
|
||||
|
||||
pages.POST("/tdengine-databases", rt.auth(), rt.tdengineDatabases)
|
||||
pages.POST("/tdengine-tables", rt.auth(), rt.tdengineTables)
|
||||
@@ -251,10 +253,12 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.GET("/auth/redirect/cas", rt.loginRedirectCas)
|
||||
pages.GET("/auth/redirect/oauth", rt.loginRedirectOAuth)
|
||||
pages.GET("/auth/redirect/dingtalk", rt.loginRedirectDingTalk)
|
||||
pages.GET("/auth/redirect/feishu", rt.loginRedirectFeiShu)
|
||||
pages.GET("/auth/callback", rt.loginCallback)
|
||||
pages.GET("/auth/callback/cas", rt.loginCallbackCas)
|
||||
pages.GET("/auth/callback/oauth", rt.loginCallbackOAuth)
|
||||
pages.GET("/auth/callback/dingtalk", rt.loginCallbackDingTalk)
|
||||
pages.GET("/auth/callback/feishu", rt.loginCallbackFeiShu)
|
||||
pages.GET("/auth/perms", rt.allPerms)
|
||||
|
||||
pages.GET("/metrics/desc", rt.metricsDescGetFile)
|
||||
@@ -366,6 +370,7 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
// pages.GET("/alert-rules/builtin/alerts-cates", rt.auth(), rt.user(), rt.builtinAlertCateGets)
|
||||
// pages.GET("/alert-rules/builtin/list", rt.auth(), rt.user(), rt.builtinAlertRules)
|
||||
pages.GET("/alert-rules/callbacks", rt.auth(), rt.user(), rt.alertRuleCallbacks)
|
||||
pages.GET("/timezones", rt.auth(), rt.user(), rt.timezonesGet)
|
||||
|
||||
pages.GET("/busi-groups/alert-rules", rt.auth(), rt.user(), rt.perm("/alert-rules"), rt.alertRuleGetsByGids)
|
||||
pages.GET("/busi-group/:id/alert-rules", rt.auth(), rt.user(), rt.perm("/alert-rules"), rt.alertRuleGets)
|
||||
@@ -389,8 +394,8 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.GET("/busi-group/:id/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules"), rt.recordingRuleGets)
|
||||
pages.POST("/busi-group/:id/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules/add"), rt.bgrw(), rt.recordingRuleAddByFE)
|
||||
pages.DELETE("/busi-group/:id/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules/del"), rt.bgrw(), rt.recordingRuleDel)
|
||||
pages.PUT("/busi-group/:id/recording-rule/:rrid", rt.auth(), rt.user(), rt.perm("/recording-rules/put"), rt.bgrw(), rt.recordingRulePutByFE)
|
||||
pages.GET("/recording-rule/:rrid", rt.auth(), rt.user(), rt.perm("/recording-rules"), rt.recordingRuleGet)
|
||||
pages.PUT("/recording-rule/:rrid", rt.auth(), rt.user(), rt.perm("/recording-rules"), rt.recordingRulePutByFE)
|
||||
pages.PUT("/busi-group/:id/recording-rules/fields", rt.auth(), rt.user(), rt.perm("/recording-rules/put"), rt.recordingRulePutFields)
|
||||
|
||||
pages.GET("/busi-groups/alert-mutes", rt.auth(), rt.user(), rt.perm("/alert-mutes"), rt.alertMuteGetsByGids)
|
||||
@@ -414,6 +419,9 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.GET("/alert-cur-event/:eid", rt.alertCurEventGet)
|
||||
pages.GET("/alert-his-event/:eid", rt.alertHisEventGet)
|
||||
pages.GET("/event-notify-records/:eid", rt.notificationRecordList)
|
||||
pages.GET("/event-detail/:hash", rt.eventDetailPage)
|
||||
pages.GET("/alert-eval-detail/:id", rt.alertEvalDetailPage)
|
||||
pages.GET("/trace-logs/:traceid", rt.traceLogsPage)
|
||||
|
||||
// card logic
|
||||
pages.GET("/alert-cur-events/list", rt.auth(), rt.user(), rt.alertCurEventsList)
|
||||
@@ -558,6 +566,19 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.POST("/event-pipeline-tryrun", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.tryRunEventPipeline)
|
||||
pages.POST("/event-processor-tryrun", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.tryRunEventProcessor)
|
||||
|
||||
// API 触发工作流
|
||||
pages.POST("/event-pipeline/:id/trigger", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.triggerEventPipelineByAPI)
|
||||
// SSE 流式执行工作流
|
||||
pages.POST("/event-pipeline/:id/stream", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.streamEventPipeline)
|
||||
|
||||
// 事件Pipeline执行记录路由
|
||||
pages.GET("/event-pipeline-executions", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.listAllEventPipelineExecutions)
|
||||
pages.GET("/event-pipeline/:id/executions", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.listEventPipelineExecutions)
|
||||
pages.GET("/event-pipeline/:id/execution/:exec_id", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.getEventPipelineExecution)
|
||||
pages.GET("/event-pipeline-execution/:exec_id", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.getEventPipelineExecution)
|
||||
pages.GET("/event-pipeline/:id/execution-stats", rt.auth(), rt.user(), rt.perm("/event-pipelines"), rt.getEventPipelineExecutionStats)
|
||||
pages.POST("/event-pipeline-executions/clean", rt.auth(), rt.user(), rt.admin(), rt.cleanEventPipelineExecutions)
|
||||
|
||||
pages.POST("/notify-channel-configs", rt.auth(), rt.user(), rt.perm("/notification-channels/add"), rt.notifyChannelsAdd)
|
||||
pages.DELETE("/notify-channel-configs", rt.auth(), rt.user(), rt.perm("/notification-channels/del"), rt.notifyChannelsDel)
|
||||
pages.PUT("/notify-channel-config/:id", rt.auth(), rt.user(), rt.perm("/notification-channels/put"), rt.notifyChannelPut)
|
||||
@@ -569,6 +590,14 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.GET("/pagerduty-service-list/:id", rt.auth(), rt.user(), rt.pagerDutyNotifyServicesGet)
|
||||
pages.GET("/notify-channel-config", rt.auth(), rt.user(), rt.notifyChannelGetBy)
|
||||
pages.GET("/notify-channel-config/idents", rt.notifyChannelIdentsGet)
|
||||
|
||||
// saved view 查询条件保存相关路由
|
||||
pages.GET("/saved-views", rt.auth(), rt.user(), rt.savedViewGets)
|
||||
pages.POST("/saved-views", rt.auth(), rt.user(), rt.savedViewAdd)
|
||||
pages.PUT("/saved-view/:id", rt.auth(), rt.user(), rt.savedViewPut)
|
||||
pages.DELETE("/saved-view/:id", rt.auth(), rt.user(), rt.savedViewDel)
|
||||
pages.POST("/saved-view/:id/favorite", rt.auth(), rt.user(), rt.savedViewFavoriteAdd)
|
||||
pages.DELETE("/saved-view/:id/favorite", rt.auth(), rt.user(), rt.savedViewFavoriteDel)
|
||||
}
|
||||
|
||||
r.GET("/api/n9e/versions", func(c *gin.Context) {
|
||||
@@ -682,6 +711,9 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
service.GET("/message-templates", rt.messageTemplateGets)
|
||||
|
||||
service.GET("/event-pipelines", rt.eventPipelinesListByService)
|
||||
service.POST("/event-pipeline/:id/trigger", rt.triggerEventPipelineByService)
|
||||
service.POST("/event-pipeline/:id/stream", rt.streamEventPipelineByService)
|
||||
service.POST("/event-pipeline-execution", rt.eventPipelineExecutionAdd)
|
||||
|
||||
// 手机号加密存储配置接口
|
||||
service.POST("/users/phone/encrypt", rt.usersPhoneEncrypt)
|
||||
|
||||
@@ -4,9 +4,9 @@ import (
|
||||
"net/http"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
// no param
|
||||
|
||||
@@ -10,9 +10,9 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/strx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
|
||||
168
center/router/router_alert_eval_detail.go
Normal file
168
center/router/router_alert_eval_detail.go
Normal file
@@ -0,0 +1,168 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/loggrep"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
// alertEvalDetailPage renders an HTML log viewer page for alert rule evaluation logs.
|
||||
func (rt *Router) alertEvalDetailPage(c *gin.Context) {
|
||||
id := ginx.UrlParamStr(c, "id")
|
||||
if !loggrep.IsValidRuleID(id) {
|
||||
c.String(http.StatusBadRequest, "invalid rule id format")
|
||||
return
|
||||
}
|
||||
|
||||
logs, instance, err := rt.getAlertEvalLogs(id)
|
||||
if err != nil {
|
||||
c.String(http.StatusInternalServerError, "Error: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
c.Header("Content-Type", "text/html; charset=utf-8")
|
||||
err = loggrep.RenderAlertEvalHTML(c.Writer, loggrep.AlertEvalPageData{
|
||||
RuleID: id,
|
||||
Instance: instance,
|
||||
Logs: logs,
|
||||
Total: len(logs),
|
||||
})
|
||||
if err != nil {
|
||||
c.String(http.StatusInternalServerError, "render error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// alertEvalDetailJSON returns JSON for alert rule evaluation logs.
|
||||
func (rt *Router) alertEvalDetailJSON(c *gin.Context) {
|
||||
id := ginx.UrlParamStr(c, "id")
|
||||
if !loggrep.IsValidRuleID(id) {
|
||||
ginx.Bomb(200, "invalid rule id format")
|
||||
}
|
||||
|
||||
logs, instance, err := rt.getAlertEvalLogs(id)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(loggrep.EventDetailResp{
|
||||
Logs: logs,
|
||||
Instance: instance,
|
||||
}, nil)
|
||||
}
|
||||
|
||||
// getAlertEvalLogs resolves the target instance(s) and retrieves alert eval logs.
|
||||
func (rt *Router) getAlertEvalLogs(id string) ([]string, string, error) {
|
||||
ruleId, _ := strconv.ParseInt(id, 10, 64)
|
||||
rule, err := models.AlertRuleGetById(rt.Ctx, ruleId)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
if rule == nil {
|
||||
return nil, "", fmt.Errorf("no such alert rule")
|
||||
}
|
||||
|
||||
instance := fmt.Sprintf("%s:%d", rt.Alert.Heartbeat.IP, rt.HTTP.Port)
|
||||
keyword := fmt.Sprintf("alert_eval_%s", id)
|
||||
|
||||
// Get datasource IDs for this rule
|
||||
dsIds := rt.DatasourceCache.GetIDsByDsCateAndQueries(rule.Cate, rule.DatasourceQueries)
|
||||
if len(dsIds) == 0 {
|
||||
// No datasources found (e.g. host rule), try local grep
|
||||
logs, err := loggrep.GrepLogDir(rt.LogDir, keyword)
|
||||
return logs, instance, err
|
||||
}
|
||||
|
||||
// Find unique target nodes via hash ring, with DB fallback
|
||||
nodeSet := make(map[string]struct{})
|
||||
for _, dsId := range dsIds {
|
||||
node, err := rt.getNodeForDatasource(dsId, id)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
nodeSet[node] = struct{}{}
|
||||
}
|
||||
|
||||
if len(nodeSet) == 0 {
|
||||
// Hash ring not ready, grep locally
|
||||
logs, err := loggrep.GrepLogDir(rt.LogDir, keyword)
|
||||
return logs, instance, err
|
||||
}
|
||||
|
||||
// Collect logs from all target nodes
|
||||
var allLogs []string
|
||||
var instances []string
|
||||
|
||||
for node := range nodeSet {
|
||||
if node == instance {
|
||||
logs, err := loggrep.GrepLogDir(rt.LogDir, keyword)
|
||||
if err == nil {
|
||||
allLogs = append(allLogs, logs...)
|
||||
instances = append(instances, node)
|
||||
}
|
||||
} else {
|
||||
logs, nodeAddr, err := rt.forwardAlertEvalDetail(node, id)
|
||||
if err == nil {
|
||||
allLogs = append(allLogs, logs...)
|
||||
instances = append(instances, nodeAddr)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sort logs by timestamp descending
|
||||
sort.Slice(allLogs, func(i, j int) bool {
|
||||
return allLogs[i] > allLogs[j]
|
||||
})
|
||||
|
||||
if len(allLogs) > loggrep.MaxLogLines {
|
||||
allLogs = allLogs[:loggrep.MaxLogLines]
|
||||
}
|
||||
|
||||
return allLogs, strings.Join(instances, ", "), nil
|
||||
}
|
||||
|
||||
func (rt *Router) forwardAlertEvalDetail(node, id string) ([]string, string, error) {
|
||||
url := fmt.Sprintf("http://%s/v1/n9e/alert-eval-detail/%s", node, id)
|
||||
req, err := http.NewRequest("GET", url, nil)
|
||||
if err != nil {
|
||||
return nil, node, err
|
||||
}
|
||||
|
||||
for user, pass := range rt.HTTP.APIForService.BasicAuth {
|
||||
req.SetBasicAuth(user, pass)
|
||||
break
|
||||
}
|
||||
|
||||
client := &http.Client{Timeout: 15 * time.Second}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, node, fmt.Errorf("forward to %s failed: %v", node, err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
body, err := io.ReadAll(io.LimitReader(resp.Body, 10*1024*1024)) // 10MB limit
|
||||
if err != nil {
|
||||
return nil, node, err
|
||||
}
|
||||
|
||||
var result struct {
|
||||
Dat loggrep.EventDetailResp `json:"dat"`
|
||||
Err string `json:"err"`
|
||||
}
|
||||
if err := json.Unmarshal(body, &result); err != nil {
|
||||
return nil, node, err
|
||||
}
|
||||
if result.Err != "" {
|
||||
return nil, node, fmt.Errorf("%s", result.Err)
|
||||
}
|
||||
|
||||
return result.Dat.Logs, result.Dat.Instance, nil
|
||||
}
|
||||
@@ -8,9 +8,9 @@ import (
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
"golang.org/x/exp/slices"
|
||||
)
|
||||
|
||||
@@ -16,12 +16,12 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/pkg/strx"
|
||||
"github.com/ccfos/nightingale/v6/pushgw/pconf"
|
||||
"github.com/ccfos/nightingale/v6/pushgw/writer"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/jinzhu/copier"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/prometheus/prometheus/prompb"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
)
|
||||
|
||||
@@ -36,6 +36,7 @@ func (rt *Router) alertRuleGets(c *gin.Context) {
|
||||
for i := 0; i < len(ars); i++ {
|
||||
ars[i].FillNotifyGroups(rt.Ctx, cache)
|
||||
}
|
||||
models.FillUpdateByNicknames(rt.Ctx, ars)
|
||||
}
|
||||
ginx.NewRender(c).Data(ars, err)
|
||||
}
|
||||
@@ -76,7 +77,6 @@ func (rt *Router) alertRuleGetsByGids(c *gin.Context) {
|
||||
if err == nil {
|
||||
cache := make(map[int64]*models.UserGroup)
|
||||
rids := make([]int64, 0, len(ars))
|
||||
names := make([]string, 0, len(ars))
|
||||
for i := 0; i < len(ars); i++ {
|
||||
ars[i].FillNotifyGroups(rt.Ctx, cache)
|
||||
|
||||
@@ -85,7 +85,6 @@ func (rt *Router) alertRuleGetsByGids(c *gin.Context) {
|
||||
}
|
||||
|
||||
rids = append(rids, ars[i].Id)
|
||||
names = append(names, ars[i].UpdateBy)
|
||||
}
|
||||
|
||||
stime, etime := GetAlertCueEventTimeRange(c)
|
||||
@@ -96,14 +95,7 @@ func (rt *Router) alertRuleGetsByGids(c *gin.Context) {
|
||||
}
|
||||
}
|
||||
|
||||
users := models.UserMapGet(rt.Ctx, "username in (?)", names)
|
||||
if users != nil {
|
||||
for i := 0; i < len(ars); i++ {
|
||||
if user, exist := users[ars[i].UpdateBy]; exist {
|
||||
ars[i].UpdateByNickname = user.Nickname
|
||||
}
|
||||
}
|
||||
}
|
||||
models.FillUpdateByNicknames(rt.Ctx, ars)
|
||||
}
|
||||
ginx.NewRender(c).Data(ars, err)
|
||||
}
|
||||
@@ -135,6 +127,7 @@ func (rt *Router) alertRulesGetByService(c *gin.Context) {
|
||||
ars[i].DatasourceIdsJson = rt.DatasourceCache.GetIDsByDsCateAndQueries(ars[i].Cate, ars[i].DatasourceQueries)
|
||||
}
|
||||
}
|
||||
models.FillUpdateByNicknames(rt.Ctx, ars)
|
||||
}
|
||||
ginx.NewRender(c).Data(ars, err)
|
||||
}
|
||||
@@ -889,3 +882,27 @@ func (rt *Router) batchAlertRuleClone(c *gin.Context) {
|
||||
|
||||
ginx.NewRender(c).Data(reterr, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) timezonesGet(c *gin.Context) {
|
||||
// 返回常用时区列表(按时差去重,每个时差只保留一个代表性时区)
|
||||
timezones := []string{
|
||||
"UTC",
|
||||
"Asia/Shanghai", // UTC+8 (代表 Asia/Hong_Kong, Asia/Singapore 等)
|
||||
"Asia/Tokyo", // UTC+9 (代表 Asia/Seoul 等)
|
||||
"Asia/Dubai", // UTC+4
|
||||
"Asia/Kolkata", // UTC+5:30
|
||||
"Asia/Bangkok", // UTC+7 (代表 Asia/Jakarta 等)
|
||||
"Europe/London", // UTC+0 (代表 UTC)
|
||||
"Europe/Paris", // UTC+1 (代表 Europe/Berlin, Europe/Rome, Europe/Madrid 等)
|
||||
"Europe/Moscow", // UTC+3
|
||||
"America/New_York", // UTC-5 (代表 America/Toronto 等)
|
||||
"America/Chicago", // UTC-6 (代表 America/Mexico_City 等)
|
||||
"America/Denver", // UTC-7
|
||||
"America/Los_Angeles", // UTC-8
|
||||
"America/Sao_Paulo", // UTC-3
|
||||
"Australia/Sydney", // UTC+10 (代表 Australia/Melbourne 等)
|
||||
"Pacific/Auckland", // UTC+12
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(timezones, nil)
|
||||
}
|
||||
|
||||
@@ -9,9 +9,9 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/alert/common"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/strx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
)
|
||||
|
||||
@@ -30,6 +30,7 @@ func (rt *Router) alertSubscribeGets(c *gin.Context) {
|
||||
ginx.Dangerous(lst[i].FillDatasourceIds(rt.Ctx))
|
||||
ginx.Dangerous(lst[i].DB2FE())
|
||||
}
|
||||
models.FillUpdateByNicknames(rt.Ctx, lst)
|
||||
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
@@ -66,6 +67,7 @@ func (rt *Router) alertSubscribeGetsByGids(c *gin.Context) {
|
||||
ginx.Dangerous(lst[i].FillDatasourceIds(rt.Ctx))
|
||||
ginx.Dangerous(lst[i].DB2FE())
|
||||
}
|
||||
models.FillUpdateByNicknames(rt.Ctx, lst)
|
||||
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
|
||||
@@ -7,9 +7,9 @@ import (
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/strx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
)
|
||||
|
||||
@@ -260,6 +260,9 @@ func (rt *Router) boardGets(c *gin.Context) {
|
||||
query := ginx.QueryStr(c, "query", "")
|
||||
|
||||
boards, err := models.BoardGetsByGroupId(rt.Ctx, bgid, query)
|
||||
if err == nil {
|
||||
models.FillUpdateByNicknames(rt.Ctx, boards)
|
||||
}
|
||||
ginx.NewRender(c).Data(boards, err)
|
||||
}
|
||||
|
||||
@@ -273,6 +276,9 @@ func (rt *Router) publicBoardGets(c *gin.Context) {
|
||||
ginx.Dangerous(err)
|
||||
|
||||
boards, err := models.BoardGets(rt.Ctx, "", "public=1 and (public_cate in (?) or id in (?))", []int64{0, 1}, boardIds)
|
||||
if err == nil {
|
||||
models.FillUpdateByNicknames(rt.Ctx, boards)
|
||||
}
|
||||
ginx.NewRender(c).Data(boards, err)
|
||||
}
|
||||
|
||||
@@ -312,6 +318,7 @@ func (rt *Router) boardGetsByGids(c *gin.Context) {
|
||||
boards[i].Bgids = ids
|
||||
}
|
||||
}
|
||||
models.FillUpdateByNicknames(rt.Ctx, boards)
|
||||
|
||||
ginx.NewRender(c).Data(boards, err)
|
||||
}
|
||||
|
||||
@@ -8,10 +8,10 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/file"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
"github.com/toolkits/pkg/runner"
|
||||
)
|
||||
|
||||
@@ -5,9 +5,9 @@ import (
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"gorm.io/gorm"
|
||||
)
|
||||
|
||||
|
||||
@@ -3,8 +3,8 @@ package router
|
||||
import (
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/prom"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func (rt *Router) metricFilterGets(c *gin.Context) {
|
||||
@@ -27,6 +27,8 @@ func (rt *Router) metricFilterGets(c *gin.Context) {
|
||||
}
|
||||
}
|
||||
|
||||
models.FillUpdateByNicknames(rt.Ctx, arr)
|
||||
|
||||
ginx.NewRender(c).Data(arr, err)
|
||||
}
|
||||
|
||||
|
||||
@@ -7,9 +7,9 @@ import (
|
||||
|
||||
"github.com/ccfos/nightingale/v6/center/integration"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
)
|
||||
|
||||
|
||||
@@ -9,8 +9,8 @@ import (
|
||||
"github.com/BurntSushi/toml"
|
||||
"github.com/ccfos/nightingale/v6/center/integration"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
)
|
||||
|
||||
|
||||
@@ -5,9 +5,9 @@ import (
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/strx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
@@ -119,6 +119,9 @@ func (rt *Router) busiGroupGets(c *gin.Context) {
|
||||
if len(lst) == 0 {
|
||||
lst = []models.BusiGroup{}
|
||||
}
|
||||
if err == nil {
|
||||
models.FillUpdateByNicknames(rt.Ctx, lst)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
|
||||
@@ -5,9 +5,9 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/storage"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/gin-gonic/gin"
|
||||
captcha "github.com/mojocn/base64Captcha"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
|
||||
@@ -5,9 +5,9 @@ import (
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/strx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func (rt *Router) chartShareGets(c *gin.Context) {
|
||||
|
||||
@@ -4,9 +4,9 @@ import (
|
||||
"encoding/json"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func (rt *Router) notifyChannelsGets(c *gin.Context) {
|
||||
|
||||
@@ -4,9 +4,9 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
const EMBEDDEDDASHBOARD = "embedded-dashboards"
|
||||
@@ -15,6 +15,9 @@ func (rt *Router) configsGet(c *gin.Context) {
|
||||
prefix := ginx.QueryStr(c, "prefix", "")
|
||||
limit := ginx.QueryInt(c, "limit", 10)
|
||||
configs, err := models.ConfigsGets(rt.Ctx, prefix, limit, ginx.Offset(c, limit))
|
||||
if err == nil {
|
||||
models.FillUpdateByNicknames(rt.Ctx, configs)
|
||||
}
|
||||
ginx.NewRender(c).Data(configs, err)
|
||||
}
|
||||
|
||||
|
||||
@@ -2,9 +2,9 @@ package router
|
||||
|
||||
import (
|
||||
"github.com/ccfos/nightingale/v6/pkg/secu"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
type confPropCrypto struct {
|
||||
|
||||
@@ -7,9 +7,9 @@ import (
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func checkAnnotationPermission(c *gin.Context, ctx *ctx.Context, dashboardId int64) {
|
||||
|
||||
@@ -15,8 +15,8 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/datasource/opensearch"
|
||||
"github.com/ccfos/nightingale/v6/dskit/clickhouse"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
@@ -276,7 +276,7 @@ func (rt *Router) datasourceUpsert(c *gin.Context) {
|
||||
}
|
||||
err = req.Add(rt.Ctx)
|
||||
} else {
|
||||
err = req.Update(rt.Ctx, "name", "identifier", "description", "cluster_name", "settings", "http", "auth", "updated_by", "updated_at", "is_default")
|
||||
err = req.Update(rt.Ctx, "name", "identifier", "description", "cluster_name", "settings", "http", "auth", "updated_by", "updated_at", "is_default", "weight")
|
||||
}
|
||||
|
||||
Render(c, nil, err)
|
||||
@@ -293,11 +293,15 @@ func DatasourceCheck(c *gin.Context, ds models.Datasource) error {
|
||||
}
|
||||
}
|
||||
|
||||
// 使用 TLS 配置(支持 mTLS)
|
||||
tlsConfig, err := ds.HTTPJson.TLS.TLSConfig()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create TLS config: %v", err)
|
||||
}
|
||||
|
||||
client := &http.Client{
|
||||
Transport: &http.Transport{
|
||||
TLSClientConfig: &tls.Config{
|
||||
InsecureSkipVerify: ds.HTTPJson.TLS.SkipTlsVerify,
|
||||
},
|
||||
TLSClientConfig: tlsConfig,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -6,10 +6,10 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/dscache"
|
||||
"github.com/ccfos/nightingale/v6/dskit/types"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/logx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
func (rt *Router) ShowDatabases(c *gin.Context) {
|
||||
@@ -18,7 +18,7 @@ func (rt *Router) ShowDatabases(c *gin.Context) {
|
||||
|
||||
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
|
||||
if !exists {
|
||||
logger.Warningf("cluster:%d not exists", f.DatasourceId)
|
||||
logx.Warningf(c.Request.Context(), "cluster:%d not exists", f.DatasourceId)
|
||||
ginx.Bomb(200, "cluster not exists")
|
||||
}
|
||||
|
||||
@@ -48,7 +48,7 @@ func (rt *Router) ShowTables(c *gin.Context) {
|
||||
|
||||
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
|
||||
if !exists {
|
||||
logger.Warningf("cluster:%d not exists", f.DatasourceId)
|
||||
logx.Warningf(c.Request.Context(), "cluster:%d not exists", f.DatasourceId)
|
||||
ginx.Bomb(200, "cluster not exists")
|
||||
}
|
||||
|
||||
@@ -78,7 +78,7 @@ func (rt *Router) DescribeTable(c *gin.Context) {
|
||||
|
||||
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
|
||||
if !exists {
|
||||
logger.Warningf("cluster:%d not exists", f.DatasourceId)
|
||||
logx.Warningf(c.Request.Context(), "cluster:%d not exists", f.DatasourceId)
|
||||
ginx.Bomb(200, "cluster not exists")
|
||||
}
|
||||
// 只接受一个入参
|
||||
|
||||
@@ -5,14 +5,15 @@ import (
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func (rt *Router) embeddedProductGets(c *gin.Context) {
|
||||
products, err := models.EmbeddedProductGets(rt.Ctx)
|
||||
ginx.Dangerous(err)
|
||||
models.FillUpdateByNicknames(rt.Ctx, products)
|
||||
// 获取当前用户可访问的Group ID 列表
|
||||
me := c.MustGet("user").(*models.User)
|
||||
|
||||
|
||||
@@ -3,10 +3,10 @@ package router
|
||||
import (
|
||||
"github.com/ccfos/nightingale/v6/datasource/es"
|
||||
"github.com/ccfos/nightingale/v6/dscache"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/logx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
type IndexReq struct {
|
||||
@@ -34,7 +34,7 @@ func (rt *Router) QueryIndices(c *gin.Context) {
|
||||
|
||||
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
|
||||
if !exists {
|
||||
logger.Warningf("cluster:%d not exists", f.DatasourceId)
|
||||
logx.Warningf(c.Request.Context(), "cluster:%d not exists", f.DatasourceId)
|
||||
ginx.Bomb(200, "cluster not exists")
|
||||
}
|
||||
|
||||
@@ -50,7 +50,7 @@ func (rt *Router) QueryFields(c *gin.Context) {
|
||||
|
||||
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
|
||||
if !exists {
|
||||
logger.Warningf("cluster:%d not exists", f.DatasourceId)
|
||||
logx.Warningf(c.Request.Context(), "cluster:%d not exists", f.DatasourceId)
|
||||
ginx.Bomb(200, "cluster not exists")
|
||||
}
|
||||
|
||||
@@ -66,7 +66,7 @@ func (rt *Router) QueryESVariable(c *gin.Context) {
|
||||
|
||||
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
|
||||
if !exists {
|
||||
logger.Warningf("cluster:%d not exists", f.DatasourceId)
|
||||
logx.Warningf(c.Request.Context(), "cluster:%d not exists", f.DatasourceId)
|
||||
ginx.Bomb(200, "cluster not exists")
|
||||
}
|
||||
|
||||
|
||||
@@ -5,8 +5,8 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
// 创建 ES Index Pattern
|
||||
@@ -69,6 +69,10 @@ func (rt *Router) esIndexPatternGetList(c *gin.Context) {
|
||||
lst, err = models.EsIndexPatternGets(rt.Ctx, "")
|
||||
}
|
||||
|
||||
if err == nil {
|
||||
models.FillUpdateByNicknames(rt.Ctx, lst)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
|
||||
|
||||
149
center/router/router_event_detail.go
Normal file
149
center/router/router_event_detail.go
Normal file
@@ -0,0 +1,149 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/naming"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/loggrep"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
// eventDetailPage renders an HTML log viewer page (for pages group).
|
||||
func (rt *Router) eventDetailPage(c *gin.Context) {
|
||||
hash := ginx.UrlParamStr(c, "hash")
|
||||
if !loggrep.IsValidHash(hash) {
|
||||
c.String(http.StatusBadRequest, "invalid hash format")
|
||||
return
|
||||
}
|
||||
|
||||
logs, instance, err := rt.getEventLogs(hash)
|
||||
if err != nil {
|
||||
c.String(http.StatusInternalServerError, "Error: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
c.Header("Content-Type", "text/html; charset=utf-8")
|
||||
err = loggrep.RenderHTML(c.Writer, loggrep.PageData{
|
||||
Hash: hash,
|
||||
Instance: instance,
|
||||
Logs: logs,
|
||||
Total: len(logs),
|
||||
})
|
||||
if err != nil {
|
||||
c.String(http.StatusInternalServerError, "render error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// eventDetailJSON returns JSON (for service group).
|
||||
func (rt *Router) eventDetailJSON(c *gin.Context) {
|
||||
hash := ginx.UrlParamStr(c, "hash")
|
||||
if !loggrep.IsValidHash(hash) {
|
||||
ginx.Bomb(200, "invalid hash format")
|
||||
}
|
||||
|
||||
logs, instance, err := rt.getEventLogs(hash)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(loggrep.EventDetailResp{
|
||||
Logs: logs,
|
||||
Instance: instance,
|
||||
}, nil)
|
||||
}
|
||||
|
||||
// getNodeForDatasource returns the alert engine instance responsible for the given
|
||||
// datasource and primary key. It first checks the local hashring, and falls back
|
||||
// to querying the database for active instances if the hashring is empty
|
||||
// (e.g. when the datasource belongs to another engine cluster).
|
||||
func (rt *Router) getNodeForDatasource(datasourceId int64, pk string) (string, error) {
|
||||
dsIdStr := strconv.FormatInt(datasourceId, 10)
|
||||
node, err := naming.DatasourceHashRing.GetNode(dsIdStr, pk)
|
||||
if err == nil {
|
||||
return node, nil
|
||||
}
|
||||
|
||||
// Hashring is empty for this datasource (likely belongs to another engine cluster).
|
||||
// Query the DB for active instances.
|
||||
servers, dbErr := models.AlertingEngineGetsInstances(rt.Ctx,
|
||||
"datasource_id = ? and clock > ?",
|
||||
datasourceId, time.Now().Unix()-30)
|
||||
if dbErr != nil {
|
||||
return "", dbErr
|
||||
}
|
||||
if len(servers) == 0 {
|
||||
return "", fmt.Errorf("no active instances for datasource %d", datasourceId)
|
||||
}
|
||||
|
||||
ring := naming.NewConsistentHashRing(int32(naming.NodeReplicas), servers)
|
||||
return ring.Get(pk)
|
||||
}
|
||||
|
||||
// getEventLogs resolves the target instance and retrieves logs.
|
||||
func (rt *Router) getEventLogs(hash string) ([]string, string, error) {
|
||||
event, err := models.AlertHisEventGetByHash(rt.Ctx, hash)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
if event == nil {
|
||||
return nil, "", fmt.Errorf("no such alert event")
|
||||
}
|
||||
|
||||
ruleId := strconv.FormatInt(event.RuleId, 10)
|
||||
|
||||
instance := fmt.Sprintf("%s:%d", rt.Alert.Heartbeat.IP, rt.HTTP.Port)
|
||||
|
||||
node, err := rt.getNodeForDatasource(event.DatasourceId, ruleId)
|
||||
if err != nil || node == instance {
|
||||
// hashring not ready or target is self, handle locally
|
||||
logs, err := loggrep.GrepLogDir(rt.LogDir, hash)
|
||||
return logs, instance, err
|
||||
}
|
||||
|
||||
// forward to the target alert instance
|
||||
return rt.forwardEventDetail(node, hash)
|
||||
}
|
||||
|
||||
func (rt *Router) forwardEventDetail(node, hash string) ([]string, string, error) {
|
||||
url := fmt.Sprintf("http://%s/v1/n9e/event-detail/%s", node, hash)
|
||||
req, err := http.NewRequest("GET", url, nil)
|
||||
if err != nil {
|
||||
return nil, node, err
|
||||
}
|
||||
|
||||
for user, pass := range rt.HTTP.APIForService.BasicAuth {
|
||||
req.SetBasicAuth(user, pass)
|
||||
break
|
||||
}
|
||||
|
||||
client := &http.Client{Timeout: 15 * time.Second}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, node, fmt.Errorf("forward to %s failed: %v", node, err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
body, err := io.ReadAll(io.LimitReader(resp.Body, 10*1024*1024)) // 10MB limit
|
||||
if err != nil {
|
||||
return nil, node, err
|
||||
}
|
||||
|
||||
var result struct {
|
||||
Dat loggrep.EventDetailResp `json:"dat"`
|
||||
Err string `json:"err"`
|
||||
}
|
||||
if err := json.Unmarshal(body, &result); err != nil {
|
||||
return nil, node, err
|
||||
}
|
||||
if result.Err != "" {
|
||||
return nil, node, fmt.Errorf("%s", result.Err)
|
||||
}
|
||||
|
||||
return result.Dat.Logs, result.Dat.Instance, nil
|
||||
}
|
||||
@@ -1,14 +1,19 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/pipeline/engine"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/google/uuid"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
// 获取事件Pipeline列表
|
||||
@@ -27,18 +32,38 @@ func (rt *Router) eventPipelinesList(c *gin.Context) {
|
||||
for _, tid := range pipeline.TeamIds {
|
||||
pipeline.TeamNames = append(pipeline.TeamNames, ugMap[tid])
|
||||
}
|
||||
// 兼容处理:自动填充工作流字段
|
||||
pipeline.FillWorkflowFields()
|
||||
}
|
||||
models.FillUpdateByNicknames(rt.Ctx, pipelines)
|
||||
|
||||
gids, err := models.MyGroupIdsMap(rt.Ctx, me.Id)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if me.IsAdmin() {
|
||||
for _, pipeline := range pipelines {
|
||||
if pipeline.TriggerMode == "" {
|
||||
pipeline.TriggerMode = models.TriggerModeEvent
|
||||
}
|
||||
|
||||
if pipeline.UseCase == "" {
|
||||
pipeline.UseCase = models.UseCaseEventPipeline
|
||||
}
|
||||
}
|
||||
ginx.NewRender(c).Data(pipelines, nil)
|
||||
return
|
||||
}
|
||||
|
||||
res := make([]*models.EventPipeline, 0)
|
||||
for _, pipeline := range pipelines {
|
||||
if pipeline.TriggerMode == "" {
|
||||
pipeline.TriggerMode = models.TriggerModeEvent
|
||||
}
|
||||
|
||||
if pipeline.UseCase == "" {
|
||||
pipeline.UseCase = models.UseCaseEventPipeline
|
||||
}
|
||||
|
||||
for _, tid := range pipeline.TeamIds {
|
||||
if _, ok := gids[tid]; ok {
|
||||
res = append(res, pipeline)
|
||||
@@ -61,6 +86,15 @@ func (rt *Router) getEventPipeline(c *gin.Context) {
|
||||
err = pipeline.FillTeamNames(rt.Ctx)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
// 兼容处理:自动填充工作流字段
|
||||
pipeline.FillWorkflowFields()
|
||||
if pipeline.TriggerMode == "" {
|
||||
pipeline.TriggerMode = models.TriggerModeEvent
|
||||
}
|
||||
if pipeline.UseCase == "" {
|
||||
pipeline.UseCase = models.UseCaseEventPipeline
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(pipeline, nil)
|
||||
}
|
||||
|
||||
@@ -131,7 +165,9 @@ func (rt *Router) tryRunEventPipeline(c *gin.Context) {
|
||||
var f struct {
|
||||
EventId int64 `json:"event_id"`
|
||||
PipelineConfig models.EventPipeline `json:"pipeline_config"`
|
||||
InputVariables map[string]string `json:"input_variables,omitempty"`
|
||||
}
|
||||
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
hisEvent, err := models.AlertHisEventGetById(rt.Ctx, f.EventId)
|
||||
@@ -141,30 +177,33 @@ func (rt *Router) tryRunEventPipeline(c *gin.Context) {
|
||||
event := hisEvent.ToCur()
|
||||
|
||||
lang := c.GetHeader("X-Language")
|
||||
var result string
|
||||
for _, p := range f.PipelineConfig.ProcessorConfigs {
|
||||
processor, err := models.GetProcessorByType(p.Typ, p.Config)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, "get processor: %+v err: %+v", p, err)
|
||||
}
|
||||
event, result, err = processor.Process(rt.Ctx, event)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, "processor: %+v err: %+v", p, err)
|
||||
}
|
||||
me := c.MustGet("user").(*models.User)
|
||||
|
||||
if event == nil {
|
||||
ginx.NewRender(c).Data(map[string]interface{}{
|
||||
"event": event,
|
||||
"result": i18n.Sprintf(lang, "event is dropped"),
|
||||
}, nil)
|
||||
return
|
||||
}
|
||||
// 统一使用工作流引擎执行(兼容线性模式和工作流模式)
|
||||
workflowEngine := engine.NewWorkflowEngine(rt.Ctx)
|
||||
|
||||
triggerCtx := &models.WorkflowTriggerContext{
|
||||
Mode: models.TriggerModeAPI,
|
||||
TriggerBy: me.Username,
|
||||
InputsOverrides: f.InputVariables,
|
||||
}
|
||||
|
||||
resultEvent, result, err := workflowEngine.Execute(&f.PipelineConfig, event, triggerCtx)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, "pipeline execute error: %v", err)
|
||||
}
|
||||
|
||||
m := map[string]interface{}{
|
||||
"event": event,
|
||||
"result": i18n.Sprintf(lang, result),
|
||||
"event": resultEvent,
|
||||
"result": i18n.Sprintf(lang, result.Message),
|
||||
"status": result.Status,
|
||||
"node_results": result.NodeResults,
|
||||
}
|
||||
|
||||
if resultEvent == nil {
|
||||
m["result"] = i18n.Sprintf(lang, "event is dropped")
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(m, nil)
|
||||
}
|
||||
|
||||
@@ -186,14 +225,18 @@ func (rt *Router) tryRunEventProcessor(c *gin.Context) {
|
||||
if err != nil {
|
||||
ginx.Bomb(200, "get processor err: %+v", err)
|
||||
}
|
||||
event, res, err := processor.Process(rt.Ctx, event)
|
||||
wfCtx := &models.WorkflowContext{
|
||||
Event: event,
|
||||
Vars: make(map[string]interface{}),
|
||||
}
|
||||
wfCtx, res, err := processor.Process(rt.Ctx, wfCtx)
|
||||
if err != nil {
|
||||
ginx.Bomb(200, "processor err: %+v", err)
|
||||
}
|
||||
|
||||
lang := c.GetHeader("X-Language")
|
||||
ginx.NewRender(c).Data(map[string]interface{}{
|
||||
"event": event,
|
||||
"event": wfCtx.Event,
|
||||
"result": i18n.Sprintf(lang, res),
|
||||
}, nil)
|
||||
}
|
||||
@@ -223,6 +266,10 @@ func (rt *Router) tryRunEventProcessorByNotifyRule(c *gin.Context) {
|
||||
ginx.Bomb(http.StatusBadRequest, "processors not found")
|
||||
}
|
||||
|
||||
wfCtx := &models.WorkflowContext{
|
||||
Event: event,
|
||||
Vars: make(map[string]interface{}),
|
||||
}
|
||||
for _, pl := range pipelines {
|
||||
for _, p := range pl.ProcessorConfigs {
|
||||
processor, err := models.GetProcessorByType(p.Typ, p.Config)
|
||||
@@ -230,14 +277,14 @@ func (rt *Router) tryRunEventProcessorByNotifyRule(c *gin.Context) {
|
||||
ginx.Bomb(http.StatusBadRequest, "get processor: %+v err: %+v", p, err)
|
||||
}
|
||||
|
||||
event, _, err := processor.Process(rt.Ctx, event)
|
||||
wfCtx, _, err = processor.Process(rt.Ctx, wfCtx)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, "processor: %+v err: %+v", p, err)
|
||||
}
|
||||
if event == nil {
|
||||
if wfCtx == nil || wfCtx.Event == nil {
|
||||
lang := c.GetHeader("X-Language")
|
||||
ginx.NewRender(c).Data(map[string]interface{}{
|
||||
"event": event,
|
||||
"event": nil,
|
||||
"result": i18n.Sprintf(lang, "event is dropped"),
|
||||
}, nil)
|
||||
return
|
||||
@@ -245,10 +292,348 @@ func (rt *Router) tryRunEventProcessorByNotifyRule(c *gin.Context) {
|
||||
}
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(event, nil)
|
||||
ginx.NewRender(c).Data(wfCtx.Event, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) eventPipelinesListByService(c *gin.Context) {
|
||||
pipelines, err := models.ListEventPipelines(rt.Ctx)
|
||||
ginx.NewRender(c).Data(pipelines, err)
|
||||
}
|
||||
|
||||
type EventPipelineRequest struct {
|
||||
// 事件数据(可选,如果不传则使用空事件)
|
||||
Event *models.AlertCurEvent `json:"event,omitempty"`
|
||||
// 输入参数覆盖
|
||||
InputsOverrides map[string]string `json:"inputs_overrides,omitempty"`
|
||||
|
||||
Username string `json:"username,omitempty"`
|
||||
}
|
||||
|
||||
// executePipelineTrigger 执行 Pipeline 触发的公共逻辑
|
||||
func (rt *Router) executePipelineTrigger(pipeline *models.EventPipeline, req *EventPipelineRequest, triggerBy string) (string, error) {
|
||||
// 准备事件数据
|
||||
var event *models.AlertCurEvent
|
||||
if req.Event != nil {
|
||||
event = req.Event
|
||||
} else {
|
||||
// 创建空事件
|
||||
event = &models.AlertCurEvent{
|
||||
TriggerTime: time.Now().Unix(),
|
||||
}
|
||||
}
|
||||
|
||||
// 生成执行ID
|
||||
executionID := uuid.New().String()
|
||||
|
||||
// 创建触发上下文
|
||||
triggerCtx := &models.WorkflowTriggerContext{
|
||||
Mode: models.TriggerModeAPI,
|
||||
TriggerBy: triggerBy,
|
||||
InputsOverrides: req.InputsOverrides,
|
||||
RequestID: executionID,
|
||||
}
|
||||
|
||||
// 异步执行工作流
|
||||
go func() {
|
||||
workflowEngine := engine.NewWorkflowEngine(rt.Ctx)
|
||||
_, _, err := workflowEngine.Execute(pipeline, event, triggerCtx)
|
||||
if err != nil {
|
||||
logger.Errorf("async workflow execute error: pipeline_id=%d execution_id=%s err=%v",
|
||||
pipeline.ID, executionID, err)
|
||||
}
|
||||
}()
|
||||
|
||||
return executionID, nil
|
||||
}
|
||||
|
||||
// triggerEventPipelineByService Service 调用触发工作流执行
|
||||
func (rt *Router) triggerEventPipelineByService(c *gin.Context) {
|
||||
pipelineID := ginx.UrlParamInt64(c, "id")
|
||||
var f EventPipelineRequest
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
// 获取 Pipeline
|
||||
pipeline, err := models.GetEventPipeline(rt.Ctx, pipelineID)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusNotFound, "pipeline not found: %v", err)
|
||||
}
|
||||
|
||||
executionID, err := rt.executePipelineTrigger(pipeline, &f, f.Username)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, "%v", err)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(gin.H{
|
||||
"execution_id": executionID,
|
||||
"message": "workflow execution started",
|
||||
}, nil)
|
||||
}
|
||||
|
||||
// triggerEventPipelineByAPI API 触发工作流执行
|
||||
func (rt *Router) triggerEventPipelineByAPI(c *gin.Context) {
|
||||
pipelineID := ginx.UrlParamInt64(c, "id")
|
||||
var f EventPipelineRequest
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
// 获取 Pipeline
|
||||
pipeline, err := models.GetEventPipeline(rt.Ctx, pipelineID)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusNotFound, "pipeline not found: %v", err)
|
||||
}
|
||||
|
||||
// 检查权限
|
||||
me := c.MustGet("user").(*models.User)
|
||||
ginx.Dangerous(me.CheckGroupPermission(rt.Ctx, pipeline.TeamIds))
|
||||
|
||||
executionID, err := rt.executePipelineTrigger(pipeline, &f, me.Username)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, err.Error())
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(gin.H{
|
||||
"execution_id": executionID,
|
||||
"message": "workflow execution started",
|
||||
}, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) listAllEventPipelineExecutions(c *gin.Context) {
|
||||
pipelineId := ginx.QueryInt64(c, "pipeline_id", 0)
|
||||
pipelineName := ginx.QueryStr(c, "pipeline_name", "")
|
||||
mode := ginx.QueryStr(c, "mode", "")
|
||||
status := ginx.QueryStr(c, "status", "")
|
||||
limit := ginx.QueryInt(c, "limit", 20)
|
||||
offset := ginx.QueryInt(c, "p", 1)
|
||||
|
||||
if limit <= 0 || limit > 1000 {
|
||||
limit = 20
|
||||
}
|
||||
if offset <= 0 {
|
||||
offset = 1
|
||||
}
|
||||
|
||||
executions, total, err := models.ListAllEventPipelineExecutions(rt.Ctx, pipelineId, pipelineName, mode, status, limit, (offset-1)*limit)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(gin.H{
|
||||
"list": executions,
|
||||
"total": total,
|
||||
}, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) listEventPipelineExecutions(c *gin.Context) {
|
||||
pipelineID := ginx.UrlParamInt64(c, "id")
|
||||
mode := ginx.QueryStr(c, "mode", "")
|
||||
status := ginx.QueryStr(c, "status", "")
|
||||
limit := ginx.QueryInt(c, "limit", 20)
|
||||
offset := ginx.QueryInt(c, "p", 1)
|
||||
|
||||
if limit <= 0 || limit > 1000 {
|
||||
limit = 20
|
||||
}
|
||||
if offset <= 0 {
|
||||
offset = 1
|
||||
}
|
||||
|
||||
executions, total, err := models.ListEventPipelineExecutions(rt.Ctx, pipelineID, mode, status, limit, (offset-1)*limit)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(gin.H{
|
||||
"list": executions,
|
||||
"total": total,
|
||||
}, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) getEventPipelineExecution(c *gin.Context) {
|
||||
execID := ginx.UrlParamStr(c, "exec_id")
|
||||
|
||||
detail, err := models.GetEventPipelineExecutionDetail(rt.Ctx, execID)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusNotFound, "execution not found: %v", err)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(detail, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) getEventPipelineExecutionStats(c *gin.Context) {
|
||||
pipelineID := ginx.UrlParamInt64(c, "id")
|
||||
|
||||
stats, err := models.GetEventPipelineExecutionStatistics(rt.Ctx, pipelineID)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(stats, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) cleanEventPipelineExecutions(c *gin.Context) {
|
||||
var f struct {
|
||||
BeforeDays int `json:"before_days"`
|
||||
}
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
if f.BeforeDays <= 0 {
|
||||
f.BeforeDays = 30
|
||||
}
|
||||
|
||||
beforeTime := time.Now().AddDate(0, 0, -f.BeforeDays).Unix()
|
||||
affected, err := models.DeleteEventPipelineExecutions(rt.Ctx, beforeTime)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(gin.H{
|
||||
"deleted": affected,
|
||||
}, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) streamEventPipeline(c *gin.Context) {
|
||||
pipelineID := ginx.UrlParamInt64(c, "id")
|
||||
|
||||
var f EventPipelineRequest
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
pipeline, err := models.GetEventPipeline(rt.Ctx, pipelineID)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusNotFound, "pipeline not found: %v", err)
|
||||
}
|
||||
|
||||
me := c.MustGet("user").(*models.User)
|
||||
ginx.Dangerous(me.CheckGroupPermission(rt.Ctx, pipeline.TeamIds))
|
||||
|
||||
var event *models.AlertCurEvent
|
||||
if f.Event != nil {
|
||||
event = f.Event
|
||||
} else {
|
||||
event = &models.AlertCurEvent{
|
||||
TriggerTime: time.Now().Unix(),
|
||||
}
|
||||
}
|
||||
|
||||
triggerCtx := &models.WorkflowTriggerContext{
|
||||
Mode: models.TriggerModeAPI,
|
||||
TriggerBy: me.Username,
|
||||
InputsOverrides: f.InputsOverrides,
|
||||
RequestID: uuid.New().String(),
|
||||
Stream: true, // 流式端点强制启用流式输出
|
||||
}
|
||||
|
||||
workflowEngine := engine.NewWorkflowEngine(rt.Ctx)
|
||||
_, result, err := workflowEngine.Execute(pipeline, event, triggerCtx)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusInternalServerError, "execute failed: %v", err)
|
||||
}
|
||||
|
||||
if result.Stream && result.StreamChan != nil {
|
||||
rt.handleStreamResponse(c, result, triggerCtx.RequestID)
|
||||
return
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(result, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) handleStreamResponse(c *gin.Context, result *models.WorkflowResult, requestID string) {
|
||||
// 设置 SSE 响应头
|
||||
c.Header("Content-Type", "text/event-stream")
|
||||
c.Header("Cache-Control", "no-cache")
|
||||
c.Header("Connection", "keep-alive")
|
||||
c.Header("X-Accel-Buffering", "no") // 禁用 nginx 缓冲
|
||||
c.Header("X-Request-ID", requestID)
|
||||
|
||||
flusher, ok := c.Writer.(http.Flusher)
|
||||
if !ok {
|
||||
ginx.Bomb(http.StatusInternalServerError, "streaming not supported")
|
||||
return
|
||||
}
|
||||
|
||||
// 发送初始连接成功消息
|
||||
initData := fmt.Sprintf(`{"type":"connected","request_id":"%s","timestamp":%d}`, requestID, time.Now().UnixMilli())
|
||||
fmt.Fprintf(c.Writer, "data: %s\n\n", initData)
|
||||
flusher.Flush()
|
||||
|
||||
// 从 channel 读取并发送 SSE
|
||||
timeout := time.After(30 * time.Minute) // 最长流式输出时间
|
||||
for {
|
||||
select {
|
||||
case chunk, ok := <-result.StreamChan:
|
||||
if !ok {
|
||||
// channel 关闭,发送结束标记
|
||||
return
|
||||
}
|
||||
|
||||
data, err := json.Marshal(chunk)
|
||||
if err != nil {
|
||||
logger.Errorf("stream: failed to marshal chunk: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
fmt.Fprintf(c.Writer, "data: %s\n\n", data)
|
||||
flusher.Flush()
|
||||
|
||||
if chunk.Done {
|
||||
return
|
||||
}
|
||||
|
||||
case <-c.Request.Context().Done():
|
||||
// 客户端断开连接
|
||||
logger.Infof("stream: client disconnected, request_id=%s", requestID)
|
||||
return
|
||||
case <-timeout:
|
||||
logger.Errorf("stream: timeout, request_id=%s", requestID)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (rt *Router) streamEventPipelineByService(c *gin.Context) {
|
||||
pipelineID := ginx.UrlParamInt64(c, "id")
|
||||
|
||||
var f EventPipelineRequest
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
pipeline, err := models.GetEventPipeline(rt.Ctx, pipelineID)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusNotFound, "pipeline not found: %v", err)
|
||||
}
|
||||
|
||||
var event *models.AlertCurEvent
|
||||
if f.Event != nil {
|
||||
event = f.Event
|
||||
} else {
|
||||
event = &models.AlertCurEvent{
|
||||
TriggerTime: time.Now().Unix(),
|
||||
}
|
||||
}
|
||||
|
||||
triggerCtx := &models.WorkflowTriggerContext{
|
||||
Mode: models.TriggerModeAPI,
|
||||
TriggerBy: f.Username,
|
||||
InputsOverrides: f.InputsOverrides,
|
||||
RequestID: uuid.New().String(),
|
||||
Stream: true, // 流式端点强制启用流式输出
|
||||
}
|
||||
|
||||
workflowEngine := engine.NewWorkflowEngine(rt.Ctx)
|
||||
_, result, err := workflowEngine.Execute(pipeline, event, triggerCtx)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusInternalServerError, "execute failed: %v", err)
|
||||
}
|
||||
|
||||
// 检查是否是流式输出
|
||||
if result.Stream && result.StreamChan != nil {
|
||||
rt.handleStreamResponse(c, result, triggerCtx.RequestID)
|
||||
return
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(result, nil)
|
||||
}
|
||||
|
||||
// eventPipelineExecutionAdd 接收 edge 节点同步的 Pipeline 执行记录
|
||||
func (rt *Router) eventPipelineExecutionAdd(c *gin.Context) {
|
||||
var execution models.EventPipelineExecution
|
||||
ginx.BindJSON(c, &execution)
|
||||
|
||||
if execution.ID == "" {
|
||||
ginx.Bomb(http.StatusBadRequest, "id is required")
|
||||
}
|
||||
if execution.PipelineID <= 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, "pipeline_id is required")
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Message(models.DB(rt.Ctx).Create(&execution).Error)
|
||||
}
|
||||
|
||||
@@ -7,9 +7,9 @@ import (
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
const defaultLimit = 300
|
||||
|
||||
@@ -15,9 +15,9 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pushgw/idents"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
|
||||
@@ -12,17 +12,18 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/cas"
|
||||
"github.com/ccfos/nightingale/v6/pkg/dingtalk"
|
||||
"github.com/ccfos/nightingale/v6/pkg/feishu"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ldapx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/logx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/oauth2x"
|
||||
"github.com/ccfos/nightingale/v6/pkg/oidcx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/secu"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/dgrijalva/jwt-go"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/pelletier/go-toml/v2"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
"gorm.io/gorm"
|
||||
)
|
||||
|
||||
@@ -36,7 +37,9 @@ type loginForm struct {
|
||||
func (rt *Router) loginPost(c *gin.Context) {
|
||||
var f loginForm
|
||||
ginx.BindJSON(c, &f)
|
||||
logger.Infof("username:%s login from:%s", f.Username, c.ClientIP())
|
||||
|
||||
rctx := c.Request.Context()
|
||||
logx.Infof(rctx, "username:%s login from:%s", f.Username, c.ClientIP())
|
||||
|
||||
if rt.HTTP.ShowCaptcha.Enable {
|
||||
if !CaptchaVerify(f.Captchaid, f.Verifyvalue) {
|
||||
@@ -49,23 +52,25 @@ func (rt *Router) loginPost(c *gin.Context) {
|
||||
if rt.HTTP.RSA.OpenRSA {
|
||||
decPassWord, err := secu.Decrypt(f.Password, rt.HTTP.RSA.RSAPrivateKey, rt.HTTP.RSA.RSAPassWord)
|
||||
if err != nil {
|
||||
logger.Errorf("RSA Decrypt failed: %v username: %s", err, f.Username)
|
||||
logx.Errorf(rctx, "RSA Decrypt failed: %v username: %s", err, f.Username)
|
||||
ginx.NewRender(c).Message(err)
|
||||
return
|
||||
}
|
||||
authPassWord = decPassWord
|
||||
}
|
||||
|
||||
reqCtx := rt.Ctx.WithContext(rctx)
|
||||
|
||||
var user *models.User
|
||||
var err error
|
||||
lc := rt.Sso.LDAP.Copy()
|
||||
if lc.Enable {
|
||||
user, err = ldapx.LdapLogin(rt.Ctx, f.Username, authPassWord, lc.DefaultRoles, lc.DefaultTeams, lc)
|
||||
user, err = ldapx.LdapLogin(reqCtx, f.Username, authPassWord, lc.DefaultRoles, lc.DefaultTeams, lc)
|
||||
if err != nil {
|
||||
logger.Debugf("ldap login failed: %v username: %s", err, f.Username)
|
||||
logx.Debugf(rctx, "ldap login failed: %v username: %s", err, f.Username)
|
||||
var errLoginInN9e error
|
||||
// to use n9e as the minimum guarantee for login
|
||||
if user, errLoginInN9e = models.PassLogin(rt.Ctx, rt.Redis, f.Username, authPassWord); errLoginInN9e != nil {
|
||||
if user, errLoginInN9e = models.PassLogin(reqCtx, rt.Redis, f.Username, authPassWord); errLoginInN9e != nil {
|
||||
ginx.NewRender(c).Message("ldap login failed: %v; n9e login failed: %v", err, errLoginInN9e)
|
||||
return
|
||||
}
|
||||
@@ -73,7 +78,7 @@ func (rt *Router) loginPost(c *gin.Context) {
|
||||
user.RolesLst = strings.Fields(user.Roles)
|
||||
}
|
||||
} else {
|
||||
user, err = models.PassLogin(rt.Ctx, rt.Redis, f.Username, authPassWord)
|
||||
user, err = models.PassLogin(reqCtx, rt.Redis, f.Username, authPassWord)
|
||||
ginx.Dangerous(err)
|
||||
}
|
||||
|
||||
@@ -97,7 +102,8 @@ func (rt *Router) loginPost(c *gin.Context) {
|
||||
}
|
||||
|
||||
func (rt *Router) logoutPost(c *gin.Context) {
|
||||
logger.Infof("username:%s logout from:%s", c.GetString("username"), c.ClientIP())
|
||||
rctx := c.Request.Context()
|
||||
logx.Infof(rctx, "username:%s logout from:%s", c.GetString("username"), c.ClientIP())
|
||||
metadata, err := rt.extractTokenMetadata(c.Request)
|
||||
if err != nil {
|
||||
ginx.NewRender(c, http.StatusBadRequest).Message("failed to parse jwt token")
|
||||
@@ -116,7 +122,7 @@ func (rt *Router) logoutPost(c *gin.Context) {
|
||||
// 获取用户的 id_token
|
||||
idToken, err := rt.fetchIdToken(c.Request.Context(), user.Id)
|
||||
if err != nil {
|
||||
logger.Debugf("fetch id_token failed: %v, user_id: %d", err, user.Id)
|
||||
logx.Debugf(rctx, "fetch id_token failed: %v, user_id: %d", err, user.Id)
|
||||
idToken = "" // 如果获取失败,使用空字符串
|
||||
}
|
||||
|
||||
@@ -219,7 +225,7 @@ func (rt *Router) refreshPost(c *gin.Context) {
|
||||
// 注意:这里不会获取新的 id_token,只是延长 Redis 中现有 id_token 的 TTL
|
||||
if idToken, err := rt.fetchIdToken(c.Request.Context(), userid); err == nil && idToken != "" {
|
||||
if err := rt.saveIdToken(c.Request.Context(), userid, idToken); err != nil {
|
||||
logger.Debugf("refresh id_token ttl failed: %v, user_id: %d", err, userid)
|
||||
logx.Debugf(c.Request.Context(), "refresh id_token ttl failed: %v, user_id: %d", err, userid)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -270,12 +276,13 @@ type CallbackOutput struct {
|
||||
}
|
||||
|
||||
func (rt *Router) loginCallback(c *gin.Context) {
|
||||
rctx := c.Request.Context()
|
||||
code := ginx.QueryStr(c, "code", "")
|
||||
state := ginx.QueryStr(c, "state", "")
|
||||
|
||||
ret, err := rt.Sso.OIDC.Callback(rt.Redis, c.Request.Context(), code, state)
|
||||
ret, err := rt.Sso.OIDC.Callback(rt.Redis, rctx, code, state)
|
||||
if err != nil {
|
||||
logger.Errorf("sso_callback fail. code:%s, state:%s, get ret: %+v. error: %v", code, state, ret, err)
|
||||
logx.Errorf(rctx, "sso_callback fail. code:%s, state:%s, get ret: %+v. error: %v", code, state, ret, err)
|
||||
ginx.NewRender(c).Data(CallbackOutput{}, err)
|
||||
return
|
||||
}
|
||||
@@ -298,7 +305,7 @@ func (rt *Router) loginCallback(c *gin.Context) {
|
||||
for _, gid := range rt.Sso.OIDC.DefaultTeams {
|
||||
err = models.UserGroupMemberAdd(rt.Ctx, gid, user.Id)
|
||||
if err != nil {
|
||||
logger.Errorf("user:%v UserGroupMemberAdd: %s", user, err)
|
||||
logx.Errorf(rctx, "user:%v UserGroupMemberAdd: %s", user, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -308,12 +315,12 @@ func (rt *Router) loginCallback(c *gin.Context) {
|
||||
userIdentity := fmt.Sprintf("%d-%s", user.Id, user.Username)
|
||||
ts, err := rt.createTokens(rt.HTTP.JWTAuth.SigningKey, userIdentity)
|
||||
ginx.Dangerous(err)
|
||||
ginx.Dangerous(rt.createAuth(c.Request.Context(), userIdentity, ts))
|
||||
ginx.Dangerous(rt.createAuth(rctx, userIdentity, ts))
|
||||
|
||||
// 保存 id_token 到 Redis,用于登出时使用
|
||||
if ret.IdToken != "" {
|
||||
if err := rt.saveIdToken(c.Request.Context(), user.Id, ret.IdToken); err != nil {
|
||||
logger.Errorf("save id_token failed: %v, user_id: %d", err, user.Id)
|
||||
if err := rt.saveIdToken(rctx, user.Id, ret.IdToken); err != nil {
|
||||
logx.Errorf(rctx, "save id_token failed: %v, user_id: %d", err, user.Id)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -354,7 +361,7 @@ func (rt *Router) loginRedirectCas(c *gin.Context) {
|
||||
}
|
||||
|
||||
if !rt.Sso.CAS.Enable {
|
||||
logger.Error("cas is not enable")
|
||||
logx.Errorf(c.Request.Context(), "cas is not enable")
|
||||
ginx.NewRender(c).Data("", nil)
|
||||
return
|
||||
}
|
||||
@@ -369,17 +376,18 @@ func (rt *Router) loginRedirectCas(c *gin.Context) {
|
||||
}
|
||||
|
||||
func (rt *Router) loginCallbackCas(c *gin.Context) {
|
||||
rctx := c.Request.Context()
|
||||
ticket := ginx.QueryStr(c, "ticket", "")
|
||||
state := ginx.QueryStr(c, "state", "")
|
||||
ret, err := rt.Sso.CAS.ValidateServiceTicket(c.Request.Context(), ticket, state, rt.Redis)
|
||||
ret, err := rt.Sso.CAS.ValidateServiceTicket(rctx, ticket, state, rt.Redis)
|
||||
if err != nil {
|
||||
logger.Errorf("ValidateServiceTicket: %s", err)
|
||||
logx.Errorf(rctx, "ValidateServiceTicket: %s", err)
|
||||
ginx.NewRender(c).Data("", err)
|
||||
return
|
||||
}
|
||||
user, err := models.UserGet(rt.Ctx, "username=?", ret.Username)
|
||||
if err != nil {
|
||||
logger.Errorf("UserGet: %s", err)
|
||||
logx.Errorf(rctx, "UserGet: %s", err)
|
||||
}
|
||||
ginx.Dangerous(err)
|
||||
if user != nil {
|
||||
@@ -398,10 +406,10 @@ func (rt *Router) loginCallbackCas(c *gin.Context) {
|
||||
userIdentity := fmt.Sprintf("%d-%s", user.Id, user.Username)
|
||||
ts, err := rt.createTokens(rt.HTTP.JWTAuth.SigningKey, userIdentity)
|
||||
if err != nil {
|
||||
logger.Errorf("createTokens: %s", err)
|
||||
logx.Errorf(rctx, "createTokens: %s", err)
|
||||
}
|
||||
ginx.Dangerous(err)
|
||||
ginx.Dangerous(rt.createAuth(c.Request.Context(), userIdentity, ts))
|
||||
ginx.Dangerous(rt.createAuth(rctx, userIdentity, ts))
|
||||
|
||||
redirect := "/"
|
||||
if ret.Redirect != "/login" {
|
||||
@@ -474,12 +482,13 @@ func (rt *Router) loginRedirectDingTalk(c *gin.Context) {
|
||||
}
|
||||
|
||||
func (rt *Router) loginCallbackDingTalk(c *gin.Context) {
|
||||
rctx := c.Request.Context()
|
||||
code := ginx.QueryStr(c, "code", "")
|
||||
state := ginx.QueryStr(c, "state", "")
|
||||
|
||||
ret, err := rt.Sso.DingTalk.Callback(rt.Redis, c.Request.Context(), code, state)
|
||||
ret, err := rt.Sso.DingTalk.Callback(rt.Redis, rctx, code, state)
|
||||
if err != nil {
|
||||
logger.Errorf("sso_callback DingTalk fail. code:%s, state:%s, get ret: %+v. error: %v", code, state, ret, err)
|
||||
logx.Errorf(rctx, "sso_callback DingTalk fail. code:%s, state:%s, get ret: %+v. error: %v", code, state, ret, err)
|
||||
ginx.NewRender(c).Data(CallbackOutput{}, err)
|
||||
return
|
||||
}
|
||||
@@ -519,13 +528,104 @@ func (rt *Router) loginCallbackDingTalk(c *gin.Context) {
|
||||
|
||||
}
|
||||
|
||||
func (rt *Router) loginCallbackOAuth(c *gin.Context) {
|
||||
func (rt *Router) loginRedirectFeiShu(c *gin.Context) {
|
||||
redirect := ginx.QueryStr(c, "redirect", "/")
|
||||
|
||||
v, exists := c.Get("userid")
|
||||
if exists {
|
||||
userid := v.(int64)
|
||||
user, err := models.UserGetById(rt.Ctx, userid)
|
||||
ginx.Dangerous(err)
|
||||
if user == nil {
|
||||
ginx.Bomb(200, "user not found")
|
||||
}
|
||||
|
||||
if user.Username != "" { // already login
|
||||
ginx.NewRender(c).Data(redirect, nil)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if rt.Sso.FeiShu == nil || !rt.Sso.FeiShu.Enable {
|
||||
ginx.NewRender(c).Data("", nil)
|
||||
return
|
||||
}
|
||||
|
||||
redirect, err := rt.Sso.FeiShu.Authorize(rt.Redis, redirect)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(redirect, err)
|
||||
}
|
||||
|
||||
func (rt *Router) loginCallbackFeiShu(c *gin.Context) {
|
||||
rctx := c.Request.Context()
|
||||
code := ginx.QueryStr(c, "code", "")
|
||||
state := ginx.QueryStr(c, "state", "")
|
||||
|
||||
ret, err := rt.Sso.OAuth2.Callback(rt.Redis, c.Request.Context(), code, state)
|
||||
ret, err := rt.Sso.FeiShu.Callback(rt.Redis, rctx, code, state)
|
||||
if err != nil {
|
||||
logger.Debugf("sso.callback() get ret %+v error %v", ret, err)
|
||||
logx.Errorf(rctx, "sso_callback FeiShu fail. code:%s, state:%s, get ret: %+v. error: %v", code, state, ret, err)
|
||||
ginx.NewRender(c).Data(CallbackOutput{}, err)
|
||||
return
|
||||
}
|
||||
|
||||
user, err := models.UserGet(rt.Ctx, "username=?", ret.Username)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if user != nil {
|
||||
if rt.Sso.FeiShu != nil && rt.Sso.FeiShu.FeiShuConfig != nil && rt.Sso.FeiShu.FeiShuConfig.CoverAttributes {
|
||||
updatedFields := user.UpdateSsoFields(feishu.SsoTypeName, ret.Nickname, ret.Phone, ret.Email)
|
||||
ginx.Dangerous(user.Update(rt.Ctx, "update_at", updatedFields...))
|
||||
}
|
||||
} else {
|
||||
user = new(models.User)
|
||||
defaultRoles := []string{}
|
||||
defaultUserGroups := []int64{}
|
||||
if rt.Sso.FeiShu != nil && rt.Sso.FeiShu.FeiShuConfig != nil {
|
||||
defaultRoles = rt.Sso.FeiShu.FeiShuConfig.DefaultRoles
|
||||
defaultUserGroups = rt.Sso.FeiShu.FeiShuConfig.DefaultUserGroups
|
||||
}
|
||||
|
||||
user.FullSsoFields(feishu.SsoTypeName, ret.Username, ret.Nickname, ret.Phone, ret.Email, defaultRoles)
|
||||
ginx.Dangerous(user.Add(rt.Ctx))
|
||||
|
||||
if len(defaultUserGroups) > 0 {
|
||||
err = user.AddToUserGroups(rt.Ctx, defaultUserGroups)
|
||||
if err != nil {
|
||||
logx.Errorf(rctx, "sso feishu add user group error %v %v", ret, err)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// set user login state
|
||||
userIdentity := fmt.Sprintf("%d-%s", user.Id, user.Username)
|
||||
ts, err := rt.createTokens(rt.HTTP.JWTAuth.SigningKey, userIdentity)
|
||||
ginx.Dangerous(err)
|
||||
ginx.Dangerous(rt.createAuth(c.Request.Context(), userIdentity, ts))
|
||||
|
||||
redirect := "/"
|
||||
if ret.Redirect != "/login" {
|
||||
redirect = ret.Redirect
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(CallbackOutput{
|
||||
Redirect: redirect,
|
||||
User: user,
|
||||
AccessToken: ts.AccessToken,
|
||||
RefreshToken: ts.RefreshToken,
|
||||
}, nil)
|
||||
|
||||
}
|
||||
|
||||
func (rt *Router) loginCallbackOAuth(c *gin.Context) {
|
||||
rctx := c.Request.Context()
|
||||
code := ginx.QueryStr(c, "code", "")
|
||||
state := ginx.QueryStr(c, "state", "")
|
||||
|
||||
ret, err := rt.Sso.OAuth2.Callback(rt.Redis, rctx, code, state)
|
||||
if err != nil {
|
||||
logx.Debugf(rctx, "sso.callback() get ret %+v error %v", ret, err)
|
||||
ginx.NewRender(c).Data(CallbackOutput{}, err)
|
||||
return
|
||||
}
|
||||
@@ -569,10 +669,11 @@ type SsoConfigOutput struct {
|
||||
CasDisplayName string `json:"casDisplayName"`
|
||||
OauthDisplayName string `json:"oauthDisplayName"`
|
||||
DingTalkDisplayName string `json:"dingTalkDisplayName"`
|
||||
FeiShuDisplayName string `json:"feishuDisplayName"`
|
||||
}
|
||||
|
||||
func (rt *Router) ssoConfigNameGet(c *gin.Context) {
|
||||
var oidcDisplayName, casDisplayName, oauthDisplayName, dingTalkDisplayName string
|
||||
var oidcDisplayName, casDisplayName, oauthDisplayName, dingTalkDisplayName, feiShuDisplayName string
|
||||
if rt.Sso.OIDC != nil {
|
||||
oidcDisplayName = rt.Sso.OIDC.GetDisplayName()
|
||||
}
|
||||
@@ -589,11 +690,16 @@ func (rt *Router) ssoConfigNameGet(c *gin.Context) {
|
||||
dingTalkDisplayName = rt.Sso.DingTalk.GetDisplayName()
|
||||
}
|
||||
|
||||
if rt.Sso.FeiShu != nil {
|
||||
feiShuDisplayName = rt.Sso.FeiShu.GetDisplayName()
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(SsoConfigOutput{
|
||||
OidcDisplayName: oidcDisplayName,
|
||||
CasDisplayName: casDisplayName,
|
||||
OauthDisplayName: oauthDisplayName,
|
||||
DingTalkDisplayName: dingTalkDisplayName,
|
||||
FeiShuDisplayName: feiShuDisplayName,
|
||||
}, nil)
|
||||
}
|
||||
|
||||
@@ -608,6 +714,7 @@ func (rt *Router) ssoConfigGets(c *gin.Context) {
|
||||
|
||||
// TODO: dingTalkExist 为了兼容当前前端配置, 后期单点登陆统一调整后不在预先设置默认内容
|
||||
dingTalkExist := false
|
||||
feiShuExist := false
|
||||
for _, config := range lst {
|
||||
var ssoReqConfig models.SsoConfig
|
||||
ssoReqConfig.Id = config.Id
|
||||
@@ -618,6 +725,10 @@ func (rt *Router) ssoConfigGets(c *gin.Context) {
|
||||
dingTalkExist = true
|
||||
err := json.Unmarshal([]byte(config.Content), &ssoReqConfig.SettingJson)
|
||||
ginx.Dangerous(err)
|
||||
case feishu.SsoTypeName:
|
||||
feiShuExist = true
|
||||
err := json.Unmarshal([]byte(config.Content), &ssoReqConfig.SettingJson)
|
||||
ginx.Dangerous(err)
|
||||
default:
|
||||
ssoReqConfig.Content = config.Content
|
||||
}
|
||||
@@ -630,6 +741,11 @@ func (rt *Router) ssoConfigGets(c *gin.Context) {
|
||||
ssoConfig.Name = dingtalk.SsoTypeName
|
||||
ssoConfigs = append(ssoConfigs, ssoConfig)
|
||||
}
|
||||
if !feiShuExist {
|
||||
var ssoConfig models.SsoConfig
|
||||
ssoConfig.Name = feishu.SsoTypeName
|
||||
ssoConfigs = append(ssoConfigs, ssoConfig)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(ssoConfigs, nil)
|
||||
}
|
||||
@@ -657,6 +773,23 @@ func (rt *Router) ssoConfigUpdate(c *gin.Context) {
|
||||
err = f.Update(rt.Ctx)
|
||||
}
|
||||
ginx.Dangerous(err)
|
||||
case feishu.SsoTypeName:
|
||||
f.Name = ssoConfig.Name
|
||||
setting, err := json.Marshal(ssoConfig.SettingJson)
|
||||
ginx.Dangerous(err)
|
||||
f.Content = string(setting)
|
||||
f.UpdateAt = time.Now().Unix()
|
||||
sso, err := f.Query(rt.Ctx)
|
||||
if !errors.Is(err, gorm.ErrRecordNotFound) {
|
||||
ginx.Dangerous(err)
|
||||
}
|
||||
if errors.Is(err, gorm.ErrRecordNotFound) {
|
||||
err = f.Create(rt.Ctx)
|
||||
} else {
|
||||
f.Id = sso.Id
|
||||
err = f.Update(rt.Ctx)
|
||||
}
|
||||
ginx.Dangerous(err)
|
||||
default:
|
||||
f.Id = ssoConfig.Id
|
||||
f.Name = ssoConfig.Name
|
||||
@@ -695,6 +828,14 @@ func (rt *Router) ssoConfigUpdate(c *gin.Context) {
|
||||
rt.Sso.DingTalk = dingtalk.New(config)
|
||||
}
|
||||
rt.Sso.DingTalk.Reload(config)
|
||||
case feishu.SsoTypeName:
|
||||
var config feishu.Config
|
||||
err := json.Unmarshal([]byte(f.Content), &config)
|
||||
ginx.Dangerous(err)
|
||||
if rt.Sso.FeiShu == nil {
|
||||
rt.Sso.FeiShu = feishu.New(config)
|
||||
}
|
||||
rt.Sso.FeiShu.Reload(config)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Message(nil)
|
||||
|
||||
@@ -12,10 +12,10 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/pkg/slice"
|
||||
"github.com/ccfos/nightingale/v6/pkg/strx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/tplx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/google/uuid"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func (rt *Router) messageTemplatesAdd(c *gin.Context) {
|
||||
@@ -154,6 +154,7 @@ func (rt *Router) messageTemplatesGet(c *gin.Context) {
|
||||
|
||||
lst, err := models.MessageTemplatesGetBy(rt.Ctx, notifyChannelIdents)
|
||||
ginx.Dangerous(err)
|
||||
models.FillUpdateByNicknames(rt.Ctx, lst)
|
||||
|
||||
if me.IsAdmin() {
|
||||
ginx.NewRender(c).Data(lst, nil)
|
||||
|
||||
@@ -2,9 +2,9 @@ package router
|
||||
|
||||
import (
|
||||
"github.com/ccfos/nightingale/v6/center/cconf"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func (rt *Router) metricsDescGetFile(c *gin.Context) {
|
||||
|
||||
@@ -4,9 +4,9 @@ import (
|
||||
"net/http"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
// no param
|
||||
|
||||
@@ -9,9 +9,9 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/alert/mute"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/strx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
)
|
||||
|
||||
@@ -22,6 +22,9 @@ func (rt *Router) alertMuteGetsByBG(c *gin.Context) {
|
||||
query := ginx.QueryStr(c, "query", "")
|
||||
expired := ginx.QueryInt(c, "expired", -1)
|
||||
lst, err := models.AlertMuteGets(rt.Ctx, prods, bgid, -1, expired, query)
|
||||
if err == nil {
|
||||
models.FillUpdateByNicknames(rt.Ctx, lst)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
@@ -47,6 +50,9 @@ func (rt *Router) alertMuteGetsByGids(c *gin.Context) {
|
||||
}
|
||||
|
||||
lst, err := models.AlertMuteGetsByBGIds(rt.Ctx, gids)
|
||||
if err == nil {
|
||||
models.FillUpdateByNicknames(rt.Ctx, lst)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
@@ -58,6 +64,9 @@ func (rt *Router) alertMuteGets(c *gin.Context) {
|
||||
disabled := ginx.QueryInt(c, "disabled", -1)
|
||||
expired := ginx.QueryInt(c, "expired", -1)
|
||||
lst, err := models.AlertMuteGets(rt.Ctx, prods, bgid, disabled, expired, query)
|
||||
if err == nil {
|
||||
models.FillUpdateByNicknames(rt.Ctx, lst)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
|
||||
@@ -11,11 +11,11 @@ import (
|
||||
|
||||
"github.com/ccfos/nightingale/v6/center/cstats"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/golang-jwt/jwt"
|
||||
"github.com/google/uuid"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
const (
|
||||
|
||||
@@ -6,9 +6,9 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/alert/sender"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
|
||||
@@ -11,8 +11,8 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func (rt *Router) notifyChannelsAdd(c *gin.Context) {
|
||||
@@ -118,6 +118,9 @@ func (rt *Router) notifyChannelGetBy(c *gin.Context) {
|
||||
|
||||
func (rt *Router) notifyChannelsGet(c *gin.Context) {
|
||||
lst, err := models.NotifyChannelsGet(rt.Ctx, "", nil)
|
||||
if err == nil {
|
||||
models.FillUpdateByNicknames(rt.Ctx, lst)
|
||||
}
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
|
||||
|
||||
@@ -10,10 +10,10 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/memsto"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/tplx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/pelletier/go-toml/v2"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/str"
|
||||
)
|
||||
|
||||
|
||||
@@ -10,9 +10,9 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/slice"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
@@ -118,6 +118,7 @@ func (rt *Router) notifyRulesGet(c *gin.Context) {
|
||||
|
||||
lst, err := models.NotifyRulesGet(rt.Ctx, "", nil)
|
||||
ginx.Dangerous(err)
|
||||
models.FillUpdateByNicknames(rt.Ctx, lst)
|
||||
if me.IsAdmin() {
|
||||
ginx.NewRender(c).Data(lst, nil)
|
||||
return
|
||||
@@ -221,7 +222,7 @@ func SendNotifyChannelMessage(ctx *ctx.Context, userCache *memsto.UserCacheType,
|
||||
return "", fmt.Errorf("failed to send flashduty notify: %v", err)
|
||||
}
|
||||
}
|
||||
logger.Infof("channel_name: %v, event:%+v, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0], tplContent, customParams, resp, err)
|
||||
logger.Infof("channel_name: %v, event:%s, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0].Hash, tplContent, customParams, resp, err)
|
||||
return resp, nil
|
||||
case "pagerduty":
|
||||
client, err := models.GetHTTPClient(notifyChannel)
|
||||
@@ -235,7 +236,7 @@ func SendNotifyChannelMessage(ctx *ctx.Context, userCache *memsto.UserCacheType,
|
||||
return "", fmt.Errorf("failed to send pagerduty notify: %v", err)
|
||||
}
|
||||
}
|
||||
logger.Infof("channel_name: %v, event:%+v, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0], tplContent, customParams, resp, err)
|
||||
logger.Infof("channel_name: %v, event:%s, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0].Hash, tplContent, customParams, resp, err)
|
||||
return resp, nil
|
||||
case "http":
|
||||
client, err := models.GetHTTPClient(notifyChannel)
|
||||
@@ -253,7 +254,7 @@ func SendNotifyChannelMessage(ctx *ctx.Context, userCache *memsto.UserCacheType,
|
||||
|
||||
if dispatch.NeedBatchContacts(notifyChannel.RequestConfig.HTTPRequestConfig) || len(sendtos) == 0 {
|
||||
resp, err = notifyChannel.SendHTTP(events, tplContent, customParams, sendtos, client)
|
||||
logger.Infof("channel_name: %v, event:%+v, sendtos:%+v, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0], sendtos, tplContent, customParams, resp, err)
|
||||
logger.Infof("channel_name: %v, event:%s, sendtos:%+v, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0].Hash, sendtos, tplContent, customParams, resp, err)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to send http notify: %v", err)
|
||||
}
|
||||
@@ -261,7 +262,7 @@ func SendNotifyChannelMessage(ctx *ctx.Context, userCache *memsto.UserCacheType,
|
||||
} else {
|
||||
for i := range sendtos {
|
||||
resp, err = notifyChannel.SendHTTP(events, tplContent, customParams, []string{sendtos[i]}, client)
|
||||
logger.Infof("channel_name: %v, event:%+v, tplContent:%s, customParams:%v, sendto:%+v, respBody: %v, err: %v", notifyChannel.Name, events[0], tplContent, customParams, sendtos[i], resp, err)
|
||||
logger.Infof("channel_name: %v, event:%s, tplContent:%s, customParams:%v, sendto:%+v, respBody: %v, err: %v", notifyChannel.Name, events[0].Hash, tplContent, customParams, sendtos[i], resp, err)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to send http notify: %v", err)
|
||||
}
|
||||
@@ -280,7 +281,7 @@ func SendNotifyChannelMessage(ctx *ctx.Context, userCache *memsto.UserCacheType,
|
||||
return resp, nil
|
||||
case "script":
|
||||
resp, _, err := notifyChannel.SendScript(events, tplContent, customParams, sendtos)
|
||||
logger.Infof("channel_name: %v, event:%+v, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0], tplContent, customParams, resp, err)
|
||||
logger.Infof("channel_name: %v, event:%s, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0].Hash, tplContent, customParams, resp, err)
|
||||
return resp, err
|
||||
default:
|
||||
logger.Errorf("unsupported request type: %v", notifyChannel.RequestType)
|
||||
|
||||
@@ -11,9 +11,9 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/center/cconf"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/tplx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/str"
|
||||
)
|
||||
|
||||
@@ -25,11 +25,14 @@ func (rt *Router) notifyTplGets(c *gin.Context) {
|
||||
m[models.EmailSubject] = struct{}{}
|
||||
|
||||
lst, err := models.NotifyTplGets(rt.Ctx)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
for i := 0; i < len(lst); i++ {
|
||||
if _, exists := m[lst[i].Channel]; exists {
|
||||
lst[i].BuiltIn = true
|
||||
}
|
||||
}
|
||||
models.FillUpdateByNicknames(rt.Ctx, lst)
|
||||
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
@@ -200,6 +203,9 @@ func (rt *Router) messageTemplateGets(c *gin.Context) {
|
||||
ident := ginx.QueryStr(c, "ident", "")
|
||||
|
||||
tpls, err := models.MessageTemplateGets(rt.Ctx, id, name, ident)
|
||||
if err == nil {
|
||||
models.FillUpdateByNicknames(rt.Ctx, tpls)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(tpls, err)
|
||||
}
|
||||
|
||||
@@ -3,9 +3,9 @@ package router
|
||||
import (
|
||||
"github.com/ccfos/nightingale/v6/datasource/opensearch"
|
||||
"github.com/ccfos/nightingale/v6/dscache"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
|
||||
@@ -2,7 +2,6 @@ package router
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/http"
|
||||
@@ -13,12 +12,13 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/pkg/logx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/poster"
|
||||
pkgprom "github.com/ccfos/nightingale/v6/pkg/prom"
|
||||
"github.com/ccfos/nightingale/v6/prom"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
"github.com/toolkits/pkg/net/httplib"
|
||||
)
|
||||
@@ -39,15 +39,16 @@ func (rt *Router) promBatchQueryRange(c *gin.Context) {
|
||||
var f BatchQueryForm
|
||||
ginx.Dangerous(c.BindJSON(&f))
|
||||
|
||||
lst, err := PromBatchQueryRange(rt.PromClients, f)
|
||||
lst, err := PromBatchQueryRange(c.Request.Context(), rt.PromClients, f)
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
|
||||
func PromBatchQueryRange(pc *prom.PromClientMap, f BatchQueryForm) ([]model.Value, error) {
|
||||
func PromBatchQueryRange(ctx context.Context, pc *prom.PromClientMap, f BatchQueryForm) ([]model.Value, error) {
|
||||
var lst []model.Value
|
||||
|
||||
cli := pc.GetCli(f.DatasourceId)
|
||||
if cli == nil {
|
||||
logx.Warningf(ctx, "no such datasource id: %d", f.DatasourceId)
|
||||
return lst, fmt.Errorf("no such datasource id: %d", f.DatasourceId)
|
||||
}
|
||||
|
||||
@@ -58,8 +59,9 @@ func PromBatchQueryRange(pc *prom.PromClientMap, f BatchQueryForm) ([]model.Valu
|
||||
Step: time.Duration(item.Step) * time.Second,
|
||||
}
|
||||
|
||||
resp, _, err := cli.QueryRange(context.Background(), item.Query, r)
|
||||
resp, _, err := cli.QueryRange(ctx, item.Query, r)
|
||||
if err != nil {
|
||||
logx.Warningf(ctx, "query range error: query:%s err:%v", item.Query, err)
|
||||
return lst, err
|
||||
}
|
||||
|
||||
@@ -82,22 +84,23 @@ func (rt *Router) promBatchQueryInstant(c *gin.Context) {
|
||||
var f BatchInstantForm
|
||||
ginx.Dangerous(c.BindJSON(&f))
|
||||
|
||||
lst, err := PromBatchQueryInstant(rt.PromClients, f)
|
||||
lst, err := PromBatchQueryInstant(c.Request.Context(), rt.PromClients, f)
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
|
||||
func PromBatchQueryInstant(pc *prom.PromClientMap, f BatchInstantForm) ([]model.Value, error) {
|
||||
func PromBatchQueryInstant(ctx context.Context, pc *prom.PromClientMap, f BatchInstantForm) ([]model.Value, error) {
|
||||
var lst []model.Value
|
||||
|
||||
cli := pc.GetCli(f.DatasourceId)
|
||||
if cli == nil {
|
||||
logger.Warningf("no such datasource id: %d", f.DatasourceId)
|
||||
logx.Warningf(ctx, "no such datasource id: %d", f.DatasourceId)
|
||||
return lst, fmt.Errorf("no such datasource id: %d", f.DatasourceId)
|
||||
}
|
||||
|
||||
for _, item := range f.Queries {
|
||||
resp, _, err := cli.Query(context.Background(), item.Query, time.Unix(item.Time, 0))
|
||||
resp, _, err := cli.Query(ctx, item.Query, time.Unix(item.Time, 0))
|
||||
if err != nil {
|
||||
logx.Warningf(ctx, "query instant error: query:%s err:%v", item.Query, err)
|
||||
return lst, err
|
||||
}
|
||||
|
||||
@@ -169,8 +172,15 @@ func (rt *Router) dsProxy(c *gin.Context) {
|
||||
|
||||
transport, has := transportGet(dsId, ds.UpdatedAt)
|
||||
if !has {
|
||||
// 使用 TLS 配置(支持 mTLS)
|
||||
tlsConfig, err := ds.HTTPJson.TLS.TLSConfig()
|
||||
if err != nil {
|
||||
c.String(http.StatusInternalServerError, "failed to create TLS config: %s", err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
transport = &http.Transport{
|
||||
TLSClientConfig: &tls.Config{InsecureSkipVerify: ds.HTTPJson.TLS.SkipTlsVerify},
|
||||
TLSClientConfig: tlsConfig,
|
||||
Proxy: http.ProxyFromEnvironment,
|
||||
DialContext: (&net.Dialer{
|
||||
Timeout: time.Duration(ds.HTTPJson.DialTimeout) * time.Millisecond,
|
||||
@@ -183,7 +193,7 @@ func (rt *Router) dsProxy(c *gin.Context) {
|
||||
|
||||
modifyResponse := func(r *http.Response) error {
|
||||
if r.StatusCode == http.StatusUnauthorized {
|
||||
logger.Warningf("proxy path:%s unauthorized access ", c.Request.URL.Path)
|
||||
logx.Warningf(c.Request.Context(), "proxy path:%s unauthorized access ", c.Request.URL.Path)
|
||||
return fmt.Errorf("unauthorized access")
|
||||
}
|
||||
|
||||
|
||||
@@ -5,14 +5,17 @@ import (
|
||||
"sort"
|
||||
"sync"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/eval"
|
||||
"github.com/ccfos/nightingale/v6/dscache"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/logx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
func CheckDsPerm(c *gin.Context, dsId int64, cate string, q interface{}) bool {
|
||||
type CheckDsPermFunc func(c *gin.Context, dsId int64, cate string, q interface{}) bool
|
||||
|
||||
var CheckDsPerm CheckDsPermFunc = func(c *gin.Context, dsId int64, cate string, q interface{}) bool {
|
||||
// todo: 后续需要根据 cate 判断是否需要权限
|
||||
return true
|
||||
}
|
||||
@@ -44,6 +47,7 @@ func QueryLogBatchConcurrently(anonymousAccess bool, ctx *gin.Context, f QueryFr
|
||||
var mu sync.Mutex
|
||||
var wg sync.WaitGroup
|
||||
var errs []error
|
||||
rctx := ctx.Request.Context()
|
||||
|
||||
for _, q := range f.Queries {
|
||||
if !anonymousAccess && !CheckDsPerm(ctx, q.Did, q.DsCate, q) {
|
||||
@@ -52,20 +56,27 @@ func QueryLogBatchConcurrently(anonymousAccess bool, ctx *gin.Context, f QueryFr
|
||||
|
||||
plug, exists := dscache.DsCache.Get(q.DsCate, q.Did)
|
||||
if !exists {
|
||||
logger.Warningf("cluster:%d not exists query:%+v", q.Did, q)
|
||||
logx.Warningf(rctx, "cluster:%d not exists query:%+v", q.Did, q)
|
||||
return LogResp{}, fmt.Errorf("cluster not exists")
|
||||
}
|
||||
|
||||
// 根据数据源类型对 Query 进行模板渲染处理
|
||||
err := eval.ExecuteQueryTemplate(q.DsCate, q.Query, nil)
|
||||
if err != nil {
|
||||
logx.Warningf(rctx, "query template execute error: %v", err)
|
||||
return LogResp{}, fmt.Errorf("query template execute error: %v", err)
|
||||
}
|
||||
|
||||
wg.Add(1)
|
||||
go func(query Query) {
|
||||
defer wg.Done()
|
||||
|
||||
data, total, err := plug.QueryLog(ctx.Request.Context(), query.Query)
|
||||
data, total, err := plug.QueryLog(rctx, query.Query)
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
if err != nil {
|
||||
errMsg := fmt.Sprintf("query data error: %v query:%v\n ", err, query)
|
||||
logger.Warningf(errMsg)
|
||||
logx.Warningf(rctx, "%s", errMsg)
|
||||
errs = append(errs, err)
|
||||
return
|
||||
}
|
||||
@@ -111,6 +122,7 @@ func QueryDataConcurrently(anonymousAccess bool, ctx *gin.Context, f models.Quer
|
||||
var mu sync.Mutex
|
||||
var wg sync.WaitGroup
|
||||
var errs []error
|
||||
rctx := ctx.Request.Context()
|
||||
|
||||
for _, q := range f.Queries {
|
||||
if !anonymousAccess && !CheckDsPerm(ctx, f.DatasourceId, f.Cate, q) {
|
||||
@@ -119,7 +131,7 @@ func QueryDataConcurrently(anonymousAccess bool, ctx *gin.Context, f models.Quer
|
||||
|
||||
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
|
||||
if !exists {
|
||||
logger.Warningf("cluster:%d not exists", f.DatasourceId)
|
||||
logx.Warningf(rctx, "cluster:%d not exists", f.DatasourceId)
|
||||
return nil, fmt.Errorf("cluster not exists")
|
||||
}
|
||||
|
||||
@@ -127,16 +139,16 @@ func QueryDataConcurrently(anonymousAccess bool, ctx *gin.Context, f models.Quer
|
||||
go func(query interface{}) {
|
||||
defer wg.Done()
|
||||
|
||||
data, err := plug.QueryData(ctx.Request.Context(), query)
|
||||
data, err := plug.QueryData(rctx, query)
|
||||
if err != nil {
|
||||
logger.Warningf("query data error: req:%+v err:%v", query, err)
|
||||
logx.Warningf(rctx, "query data error: req:%+v err:%v", query, err)
|
||||
mu.Lock()
|
||||
errs = append(errs, err)
|
||||
mu.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
logger.Debugf("query data: req:%+v resp:%+v", query, data)
|
||||
logx.Debugf(rctx, "query data: req:%+v resp:%+v", query, data)
|
||||
mu.Lock()
|
||||
resp = append(resp, data...)
|
||||
mu.Unlock()
|
||||
@@ -182,6 +194,7 @@ func QueryLogConcurrently(anonymousAccess bool, ctx *gin.Context, f models.Query
|
||||
var mu sync.Mutex
|
||||
var wg sync.WaitGroup
|
||||
var errs []error
|
||||
rctx := ctx.Request.Context()
|
||||
|
||||
for _, q := range f.Queries {
|
||||
if !anonymousAccess && !CheckDsPerm(ctx, f.DatasourceId, f.Cate, q) {
|
||||
@@ -190,7 +203,7 @@ func QueryLogConcurrently(anonymousAccess bool, ctx *gin.Context, f models.Query
|
||||
|
||||
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
|
||||
if !exists {
|
||||
logger.Warningf("cluster:%d not exists query:%+v", f.DatasourceId, f)
|
||||
logx.Warningf(rctx, "cluster:%d not exists query:%+v", f.DatasourceId, f)
|
||||
return LogResp{}, fmt.Errorf("cluster not exists")
|
||||
}
|
||||
|
||||
@@ -198,11 +211,11 @@ func QueryLogConcurrently(anonymousAccess bool, ctx *gin.Context, f models.Query
|
||||
go func(query interface{}) {
|
||||
defer wg.Done()
|
||||
|
||||
data, total, err := plug.QueryLog(ctx.Request.Context(), query)
|
||||
logger.Debugf("query log: req:%+v resp:%+v", query, data)
|
||||
data, total, err := plug.QueryLog(rctx, query)
|
||||
logx.Debugf(rctx, "query log: req:%+v resp:%+v", query, data)
|
||||
if err != nil {
|
||||
errMsg := fmt.Sprintf("query data error: %v query:%v\n ", err, query)
|
||||
logger.Warningf(errMsg)
|
||||
logx.Warningf(rctx, "%s", errMsg)
|
||||
mu.Lock()
|
||||
errs = append(errs, err)
|
||||
mu.Unlock()
|
||||
@@ -240,6 +253,7 @@ func (rt *Router) QueryLogV2(c *gin.Context) {
|
||||
func (rt *Router) QueryLog(c *gin.Context) {
|
||||
var f models.QueryParam
|
||||
ginx.BindJSON(c, &f)
|
||||
rctx := c.Request.Context()
|
||||
|
||||
var resp []interface{}
|
||||
for _, q := range f.Queries {
|
||||
@@ -249,13 +263,13 @@ func (rt *Router) QueryLog(c *gin.Context) {
|
||||
|
||||
plug, exists := dscache.DsCache.Get("elasticsearch", f.DatasourceId)
|
||||
if !exists {
|
||||
logger.Warningf("cluster:%d not exists", f.DatasourceId)
|
||||
logx.Warningf(rctx, "cluster:%d not exists", f.DatasourceId)
|
||||
ginx.Bomb(200, "cluster not exists")
|
||||
}
|
||||
|
||||
data, _, err := plug.QueryLog(c.Request.Context(), q)
|
||||
data, _, err := plug.QueryLog(rctx, q)
|
||||
if err != nil {
|
||||
logger.Warningf("query data error: %v", err)
|
||||
logx.Warningf(rctx, "query data error: %v", err)
|
||||
ginx.Bomb(200, "err:%v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -7,14 +7,17 @@ import (
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/strx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func (rt *Router) recordingRuleGets(c *gin.Context) {
|
||||
busiGroupId := ginx.UrlParamInt64(c, "id")
|
||||
ars, err := models.RecordingRuleGets(rt.Ctx, busiGroupId)
|
||||
if err == nil {
|
||||
models.FillUpdateByNicknames(rt.Ctx, ars)
|
||||
}
|
||||
ginx.NewRender(c).Data(ars, err)
|
||||
}
|
||||
|
||||
@@ -39,6 +42,9 @@ func (rt *Router) recordingRuleGetsByGids(c *gin.Context) {
|
||||
}
|
||||
|
||||
ars, err := models.RecordingRuleGetsByBGIds(rt.Ctx, gids)
|
||||
if err == nil {
|
||||
models.FillUpdateByNicknames(rt.Ctx, ars)
|
||||
}
|
||||
ginx.NewRender(c).Data(ars, err)
|
||||
}
|
||||
|
||||
@@ -112,6 +118,7 @@ func (rt *Router) recordingRulePutByFE(c *gin.Context) {
|
||||
}
|
||||
|
||||
rt.bgrwCheck(c, ar.GroupId)
|
||||
rt.bgroCheck(c, f.GroupId)
|
||||
|
||||
f.UpdateBy = c.MustGet("username").(string)
|
||||
ginx.NewRender(c).Message(ar.Update(rt.Ctx, f))
|
||||
|
||||
@@ -6,9 +6,9 @@ import (
|
||||
|
||||
"github.com/ccfos/nightingale/v6/center/cconf"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func (rt *Router) rolesGets(c *gin.Context) {
|
||||
|
||||
@@ -5,8 +5,8 @@ import (
|
||||
|
||||
"github.com/ccfos/nightingale/v6/center/cconf"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
)
|
||||
|
||||
|
||||
145
center/router/router_saved_view.go
Normal file
145
center/router/router_saved_view.go
Normal file
@@ -0,0 +1,145 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/slice"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
func (rt *Router) savedViewGets(c *gin.Context) {
|
||||
page := ginx.QueryStr(c, "page", "")
|
||||
|
||||
me := c.MustGet("user").(*models.User)
|
||||
|
||||
lst, err := models.SavedViewGets(rt.Ctx, page)
|
||||
if err != nil {
|
||||
ginx.NewRender(c).Data(nil, err)
|
||||
return
|
||||
}
|
||||
models.FillUpdateByNicknames(rt.Ctx, lst)
|
||||
|
||||
userGids, err := models.MyGroupIds(rt.Ctx, me.Id)
|
||||
if err != nil {
|
||||
ginx.NewRender(c).Data(nil, err)
|
||||
return
|
||||
}
|
||||
|
||||
favoriteMap, err := models.SavedViewFavoriteGetByUserId(rt.Ctx, me.Id)
|
||||
if err != nil {
|
||||
ginx.NewRender(c).Data(nil, err)
|
||||
return
|
||||
}
|
||||
|
||||
favoriteViews := make([]models.SavedView, 0)
|
||||
normalViews := make([]models.SavedView, 0)
|
||||
|
||||
for _, view := range lst {
|
||||
visible := view.CreateBy == me.Username ||
|
||||
view.PublicCate == 2 ||
|
||||
(view.PublicCate == 1 && slice.HaveIntersection[int64](userGids, view.Gids))
|
||||
|
||||
if !visible {
|
||||
continue
|
||||
}
|
||||
|
||||
view.IsFavorite = favoriteMap[view.Id]
|
||||
|
||||
// 收藏的排前面
|
||||
if view.IsFavorite {
|
||||
favoriteViews = append(favoriteViews, view)
|
||||
} else {
|
||||
normalViews = append(normalViews, view)
|
||||
}
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(append(favoriteViews, normalViews...), nil)
|
||||
}
|
||||
|
||||
func (rt *Router) savedViewAdd(c *gin.Context) {
|
||||
var f models.SavedView
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
me := c.MustGet("user").(*models.User)
|
||||
f.Id = 0
|
||||
f.CreateBy = me.Username
|
||||
f.UpdateBy = me.Username
|
||||
|
||||
err := models.SavedViewAdd(rt.Ctx, &f)
|
||||
ginx.NewRender(c).Data(f.Id, err)
|
||||
}
|
||||
|
||||
func (rt *Router) savedViewPut(c *gin.Context) {
|
||||
id := ginx.UrlParamInt64(c, "id")
|
||||
|
||||
view, err := models.SavedViewGetById(rt.Ctx, id)
|
||||
if err != nil {
|
||||
ginx.NewRender(c).Data(nil, err)
|
||||
return
|
||||
}
|
||||
if view == nil {
|
||||
ginx.NewRender(c, http.StatusNotFound).Message("saved view not found")
|
||||
return
|
||||
}
|
||||
|
||||
me := c.MustGet("user").(*models.User)
|
||||
// 只有创建者可以更新
|
||||
if view.CreateBy != me.Username && !me.IsAdmin() {
|
||||
ginx.NewRender(c, http.StatusForbidden).Message("forbidden")
|
||||
return
|
||||
}
|
||||
|
||||
var f models.SavedView
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
view.Name = f.Name
|
||||
view.Filter = f.Filter
|
||||
view.PublicCate = f.PublicCate
|
||||
view.Gids = f.Gids
|
||||
|
||||
err = models.SavedViewUpdate(rt.Ctx, view, me.Username)
|
||||
ginx.NewRender(c).Message(err)
|
||||
}
|
||||
|
||||
func (rt *Router) savedViewDel(c *gin.Context) {
|
||||
id := ginx.UrlParamInt64(c, "id")
|
||||
|
||||
view, err := models.SavedViewGetById(rt.Ctx, id)
|
||||
if err != nil {
|
||||
ginx.NewRender(c).Data(nil, err)
|
||||
return
|
||||
}
|
||||
if view == nil {
|
||||
ginx.NewRender(c, http.StatusNotFound).Message("saved view not found")
|
||||
return
|
||||
}
|
||||
|
||||
me := c.MustGet("user").(*models.User)
|
||||
// 只有创建者或管理员可以删除
|
||||
if view.CreateBy != me.Username && !me.IsAdmin() {
|
||||
ginx.NewRender(c, http.StatusForbidden).Message("forbidden")
|
||||
return
|
||||
}
|
||||
|
||||
err = models.SavedViewDel(rt.Ctx, id)
|
||||
ginx.NewRender(c).Message(err)
|
||||
}
|
||||
|
||||
func (rt *Router) savedViewFavoriteAdd(c *gin.Context) {
|
||||
id := ginx.UrlParamInt64(c, "id")
|
||||
me := c.MustGet("user").(*models.User)
|
||||
|
||||
err := models.UserViewFavoriteAdd(rt.Ctx, id, me.Id)
|
||||
ginx.NewRender(c).Message(err)
|
||||
}
|
||||
|
||||
func (rt *Router) savedViewFavoriteDel(c *gin.Context) {
|
||||
id := ginx.UrlParamInt64(c, "id")
|
||||
me := c.MustGet("user").(*models.User)
|
||||
|
||||
err := models.UserViewFavoriteDel(rt.Ctx, id, me.Id)
|
||||
ginx.NewRender(c).Message(err)
|
||||
}
|
||||
@@ -5,10 +5,10 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/pkg/flashduty"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ormx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/secu"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/google/uuid"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
|
||||
@@ -4,9 +4,9 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func (rt *Router) serversGet(c *gin.Context) {
|
||||
|
||||
@@ -5,10 +5,10 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/google/uuid"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
// sourceTokenAdd 生成新的源令牌
|
||||
|
||||
@@ -13,10 +13,10 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/pkg/strx"
|
||||
"github.com/ccfos/nightingale/v6/pushgw/idents"
|
||||
"github.com/ccfos/nightingale/v6/storage"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
@@ -38,6 +38,16 @@ func (rt *Router) targetGetsByHostFilter(c *gin.Context) {
|
||||
total, err := models.TargetCountByFilter(rt.Ctx, query)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
models.FillTargetsBeatTime(rt.Redis, hosts)
|
||||
now := time.Now().Unix()
|
||||
for i := 0; i < len(hosts); i++ {
|
||||
if now-hosts[i].BeatTime < 60 {
|
||||
hosts[i].TargetUp = 2
|
||||
} else if now-hosts[i].BeatTime < 180 {
|
||||
hosts[i].TargetUp = 1
|
||||
}
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(gin.H{
|
||||
"list": hosts,
|
||||
"total": total,
|
||||
@@ -81,9 +91,24 @@ func (rt *Router) targetGets(c *gin.Context) {
|
||||
models.BuildTargetWhereWithBgids(bgids),
|
||||
models.BuildTargetWhereWithDsIds(dsIds),
|
||||
models.BuildTargetWhereWithQuery(query),
|
||||
models.BuildTargetWhereWithDowntime(downtime),
|
||||
models.BuildTargetWhereWithHosts(hosts),
|
||||
}
|
||||
|
||||
// downtime 筛选:从缓存获取心跳时间,选择较小的集合用 IN 或 NOT IN 过滤
|
||||
if downtime != 0 {
|
||||
downtimeOpt, hasMatch := rt.downtimeFilter(downtime)
|
||||
if !hasMatch {
|
||||
ginx.NewRender(c).Data(gin.H{
|
||||
"list": []*models.Target{},
|
||||
"total": 0,
|
||||
}, nil)
|
||||
return
|
||||
}
|
||||
if downtimeOpt != nil {
|
||||
options = append(options, downtimeOpt)
|
||||
}
|
||||
}
|
||||
|
||||
total, err := models.TargetTotal(rt.Ctx, options...)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
@@ -102,14 +127,17 @@ func (rt *Router) targetGets(c *gin.Context) {
|
||||
now := time.Now()
|
||||
cache := make(map[int64]*models.BusiGroup)
|
||||
|
||||
// 从 Redis 补全 BeatTime
|
||||
models.FillTargetsBeatTime(rt.Redis, list)
|
||||
|
||||
var keys []string
|
||||
for i := 0; i < len(list); i++ {
|
||||
ginx.Dangerous(list[i].FillGroup(rt.Ctx, cache))
|
||||
keys = append(keys, models.WrapIdent(list[i].Ident))
|
||||
|
||||
if now.Unix()-list[i].UpdateAt < 60 {
|
||||
if now.Unix()-list[i].BeatTime < 60 {
|
||||
list[i].TargetUp = 2
|
||||
} else if now.Unix()-list[i].UpdateAt < 180 {
|
||||
} else if now.Unix()-list[i].BeatTime < 180 {
|
||||
list[i].TargetUp = 1
|
||||
}
|
||||
}
|
||||
@@ -148,6 +176,43 @@ func (rt *Router) targetGets(c *gin.Context) {
|
||||
}, nil)
|
||||
}
|
||||
|
||||
// downtimeFilter 从缓存获取心跳时间,生成 downtime 筛选条件
|
||||
// 选择匹配集和非匹配集中较小的一方,用 IN 或 NOT IN 来减少 SQL 参数量
|
||||
// 返回值:
|
||||
// - option: 筛选条件,nil 表示所有 target 都符合条件(无需过滤)
|
||||
// - hasMatch: 是否有符合条件的 target,false 表示无匹配应返回空结果
|
||||
func (rt *Router) downtimeFilter(downtime int64) (option models.BuildTargetWhereOption, hasMatch bool) {
|
||||
now := time.Now().Unix()
|
||||
targets := rt.TargetCache.GetAll()
|
||||
var matchIdents, nonMatchIdents []string
|
||||
for _, target := range targets {
|
||||
matched := false
|
||||
if downtime > 0 {
|
||||
matched = target.BeatTime < now-downtime
|
||||
} else if downtime < 0 {
|
||||
matched = target.BeatTime > now+downtime
|
||||
}
|
||||
if matched {
|
||||
matchIdents = append(matchIdents, target.Ident)
|
||||
} else {
|
||||
nonMatchIdents = append(nonMatchIdents, target.Ident)
|
||||
}
|
||||
}
|
||||
|
||||
if len(matchIdents) == 0 {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
if len(nonMatchIdents) == 0 {
|
||||
return nil, true
|
||||
}
|
||||
|
||||
if len(matchIdents) <= len(nonMatchIdents) {
|
||||
return models.BuildTargetWhereWithIdents(matchIdents), true
|
||||
}
|
||||
return models.BuildTargetWhereExcludeIdents(nonMatchIdents), true
|
||||
}
|
||||
|
||||
func (rt *Router) targetExtendInfoByIdent(c *gin.Context) {
|
||||
ident := ginx.QueryStr(c, "ident", "")
|
||||
key := models.WrapExtendIdent(ident)
|
||||
|
||||
@@ -7,9 +7,9 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/alert/sender"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/strx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
)
|
||||
|
||||
|
||||
@@ -8,9 +8,9 @@ import (
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/strx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
"github.com/toolkits/pkg/str"
|
||||
)
|
||||
@@ -25,6 +25,7 @@ func (rt *Router) taskTplGets(c *gin.Context) {
|
||||
|
||||
list, err := models.TaskTplGets(rt.Ctx, []int64{groupId}, query, limit, ginx.Offset(c, limit))
|
||||
ginx.Dangerous(err)
|
||||
models.FillUpdateByNicknames(rt.Ctx, list)
|
||||
|
||||
ginx.NewRender(c).Data(gin.H{
|
||||
"total": total,
|
||||
@@ -60,6 +61,7 @@ func (rt *Router) taskTplGetsByGids(c *gin.Context) {
|
||||
|
||||
list, err := models.TaskTplGets(rt.Ctx, gids, query, limit, ginx.Offset(c, limit))
|
||||
ginx.Dangerous(err)
|
||||
models.FillUpdateByNicknames(rt.Ctx, list)
|
||||
|
||||
ginx.NewRender(c).Data(gin.H{
|
||||
"total": total,
|
||||
|
||||
@@ -2,13 +2,14 @@ package router
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/center/cconf"
|
||||
"github.com/ccfos/nightingale/v6/datasource/tdengine"
|
||||
"github.com/ccfos/nightingale/v6/dscache"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
type databasesQueryForm struct {
|
||||
|
||||
136
center/router/router_trace_logs.go
Normal file
136
center/router/router_trace_logs.go
Normal file
@@ -0,0 +1,136 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/loggrep"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
// traceLogsPage renders an HTML log viewer page for trace logs.
|
||||
func (rt *Router) traceLogsPage(c *gin.Context) {
|
||||
traceId := ginx.UrlParamStr(c, "traceid")
|
||||
if !loggrep.IsValidTraceID(traceId) {
|
||||
c.String(http.StatusBadRequest, "invalid trace id format")
|
||||
return
|
||||
}
|
||||
|
||||
logs, instance, err := rt.getTraceLogs(traceId)
|
||||
if err != nil {
|
||||
c.String(http.StatusInternalServerError, "Error: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
c.Header("Content-Type", "text/html; charset=utf-8")
|
||||
err = loggrep.RenderTraceLogsHTML(c.Writer, loggrep.TraceLogsPageData{
|
||||
TraceID: traceId,
|
||||
Instance: instance,
|
||||
Logs: logs,
|
||||
Total: len(logs),
|
||||
})
|
||||
if err != nil {
|
||||
c.String(http.StatusInternalServerError, "render error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// traceLogsJSON returns JSON for trace logs.
|
||||
func (rt *Router) traceLogsJSON(c *gin.Context) {
|
||||
traceId := ginx.UrlParamStr(c, "traceid")
|
||||
if !loggrep.IsValidTraceID(traceId) {
|
||||
ginx.Bomb(200, "invalid trace id format")
|
||||
}
|
||||
|
||||
logs, instance, err := rt.getTraceLogs(traceId)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(loggrep.EventDetailResp{
|
||||
Logs: logs,
|
||||
Instance: instance,
|
||||
}, nil)
|
||||
}
|
||||
|
||||
// getTraceLogs finds the same-engine instances and queries each one
|
||||
// until trace logs are found. Trace logs belong to a single instance.
|
||||
func (rt *Router) getTraceLogs(traceId string) ([]string, string, error) {
|
||||
keyword := "trace_id=" + traceId
|
||||
instance := fmt.Sprintf("%s:%d", rt.Alert.Heartbeat.IP, rt.HTTP.Port)
|
||||
engineName := rt.Alert.Heartbeat.EngineName
|
||||
|
||||
// try local first
|
||||
logs, err := loggrep.GrepLatestLogFiles(rt.LogDir, keyword)
|
||||
if err == nil && len(logs) > 0 {
|
||||
return logs, instance, nil
|
||||
}
|
||||
|
||||
// find all instances with the same engineName
|
||||
servers, err := models.AlertingEngineGetsInstances(rt.Ctx,
|
||||
"engine_cluster = ? and clock > ?",
|
||||
engineName, time.Now().Unix()-30)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
|
||||
// loop through remote instances until we find logs
|
||||
for _, node := range servers {
|
||||
if node == instance {
|
||||
continue // already tried local
|
||||
}
|
||||
|
||||
logs, nodeAddr, err := rt.forwardTraceLogs(node, traceId)
|
||||
if err != nil {
|
||||
logger.Errorf("forwardTraceLogs failed: %v", err)
|
||||
continue
|
||||
}
|
||||
if len(logs) > 0 {
|
||||
return logs, nodeAddr, nil
|
||||
}
|
||||
}
|
||||
|
||||
return nil, instance, nil
|
||||
}
|
||||
|
||||
func (rt *Router) forwardTraceLogs(node, traceId string) ([]string, string, error) {
|
||||
url := fmt.Sprintf("http://%s/v1/n9e/trace-logs/%s", node, traceId)
|
||||
req, err := http.NewRequest("GET", url, nil)
|
||||
if err != nil {
|
||||
return nil, node, err
|
||||
}
|
||||
|
||||
for user, pass := range rt.HTTP.APIForService.BasicAuth {
|
||||
req.SetBasicAuth(user, pass)
|
||||
break
|
||||
}
|
||||
|
||||
client := &http.Client{Timeout: 15 * time.Second}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, node, fmt.Errorf("forward to %s failed: %v", node, err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
body, err := io.ReadAll(io.LimitReader(resp.Body, 10*1024*1024))
|
||||
if err != nil {
|
||||
return nil, node, err
|
||||
}
|
||||
|
||||
var result struct {
|
||||
Dat loggrep.EventDetailResp `json:"dat"`
|
||||
Err string `json:"err"`
|
||||
}
|
||||
if err := json.Unmarshal(body, &result); err != nil {
|
||||
return nil, node, err
|
||||
}
|
||||
if result.Err != "" {
|
||||
return nil, node, fmt.Errorf("%s", result.Err)
|
||||
}
|
||||
|
||||
return result.Dat.Logs, result.Dat.Instance, nil
|
||||
}
|
||||
@@ -9,9 +9,9 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/pkg/flashduty"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ormx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/secu"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
"gorm.io/gorm"
|
||||
)
|
||||
|
||||
@@ -7,9 +7,9 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/flashduty"
|
||||
"github.com/ccfos/nightingale/v6/pkg/strx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
@@ -27,6 +27,9 @@ func (rt *Router) userGroupGets(c *gin.Context) {
|
||||
|
||||
me := c.MustGet("user").(*models.User)
|
||||
lst, err := me.UserGroups(rt.Ctx, limit, query)
|
||||
if err == nil {
|
||||
models.FillUpdateByNicknames(rt.Ctx, lst)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
|
||||
@@ -5,9 +5,9 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ginx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func (rt *Router) userVariableConfigGets(context *gin.Context) {
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/pkg/cas"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/dingtalk"
|
||||
"github.com/ccfos/nightingale/v6/pkg/feishu"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ldapx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/oauth2x"
|
||||
"github.com/ccfos/nightingale/v6/pkg/oidcx"
|
||||
@@ -27,6 +28,7 @@ type SsoClient struct {
|
||||
CAS *cas.SsoClient
|
||||
OAuth2 *oauth2x.SsoClient
|
||||
DingTalk *dingtalk.SsoClient
|
||||
FeiShu *feishu.SsoClient
|
||||
LastUpdateTime int64
|
||||
configCache *memsto.ConfigCache
|
||||
configLastUpdateTime int64
|
||||
@@ -203,6 +205,13 @@ func Init(center cconf.Center, ctx *ctx.Context, configCache *memsto.ConfigCache
|
||||
log.Fatalf("init %s failed: %s", dingtalk.SsoTypeName, err)
|
||||
}
|
||||
ssoClient.DingTalk = dingtalk.New(config)
|
||||
case feishu.SsoTypeName:
|
||||
var config feishu.Config
|
||||
err := json.Unmarshal([]byte(cfg.Content), &config)
|
||||
if err != nil {
|
||||
log.Fatalf("init %s failed: %s", feishu.SsoTypeName, err)
|
||||
}
|
||||
ssoClient.FeiShu = feishu.New(config)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -291,6 +300,22 @@ func (s *SsoClient) reload(ctx *ctx.Context) error {
|
||||
s.DingTalk = nil
|
||||
}
|
||||
|
||||
if feiShuConfig, ok := ssoConfigMap[feishu.SsoTypeName]; ok {
|
||||
var config feishu.Config
|
||||
err := json.Unmarshal([]byte(feiShuConfig.Content), &config)
|
||||
if err != nil {
|
||||
logger.Warningf("reload %s failed: %s", feishu.SsoTypeName, err)
|
||||
} else {
|
||||
if s.FeiShu != nil {
|
||||
s.FeiShu.Reload(config)
|
||||
} else {
|
||||
s.FeiShu = feishu.New(config)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
s.FeiShu = nil
|
||||
}
|
||||
|
||||
s.LastUpdateTime = lastUpdateTime
|
||||
s.configLastUpdateTime = lastCacheUpdateTime
|
||||
return nil
|
||||
|
||||
@@ -71,7 +71,10 @@ CREATE TABLE `datasource`
|
||||
`updated_at` bigint not null default 0,
|
||||
`updated_by` varchar(64) not null default '',
|
||||
PRIMARY KEY (`id`)
|
||||
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
|
||||
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
|
||||
|
||||
-- datasource add weight field
|
||||
alter table `datasource` add `weight` int not null default 0;
|
||||
|
||||
CREATE TABLE `builtin_cate` (
|
||||
`id` bigint unsigned not null auto_increment,
|
||||
|
||||
@@ -87,7 +87,7 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
|
||||
alert.Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache,
|
||||
alertRuleCache, notifyConfigCache, taskTplsCache, dsCache, ctx, promClients, userCache, userGroupCache, notifyRuleCache, notifyChannelCache, messageTemplateCache, configCvalCache)
|
||||
|
||||
alertrtRouter := alertrt.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors)
|
||||
alertrtRouter := alertrt.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors, config.Log.Dir)
|
||||
|
||||
alertrtRouter.Config(r)
|
||||
|
||||
|
||||
67
cron/clean_pipeline_execution.go
Normal file
67
cron/clean_pipeline_execution.go
Normal file
@@ -0,0 +1,67 @@
|
||||
package cron
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
|
||||
"github.com/robfig/cron/v3"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultBatchSize = 100 // 每批删除数量
|
||||
defaultSleepMs = 10 // 每批删除后休眠时间(毫秒)
|
||||
)
|
||||
|
||||
// cleanPipelineExecutionInBatches 分批删除执行记录,避免大批量删除影响数据库性能
|
||||
func cleanPipelineExecutionInBatches(ctx *ctx.Context, day int) {
|
||||
threshold := time.Now().Unix() - 86400*int64(day)
|
||||
var totalDeleted int64
|
||||
|
||||
for {
|
||||
deleted, err := models.DeleteEventPipelineExecutionsInBatches(ctx, threshold, defaultBatchSize)
|
||||
if err != nil {
|
||||
logger.Errorf("Failed to clean pipeline execution records in batch: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
totalDeleted += deleted
|
||||
|
||||
// 如果本批删除数量小于 batchSize,说明已删除完毕
|
||||
if deleted < int64(defaultBatchSize) {
|
||||
break
|
||||
}
|
||||
|
||||
// 休眠一段时间,降低数据库压力
|
||||
time.Sleep(time.Duration(defaultSleepMs) * time.Millisecond)
|
||||
}
|
||||
|
||||
if totalDeleted > 0 {
|
||||
logger.Infof("Cleaned %d pipeline execution records older than %d days", totalDeleted, day)
|
||||
}
|
||||
}
|
||||
|
||||
// CleanPipelineExecution starts a cron job to clean old pipeline execution records in batches
|
||||
// Runs daily at 6:00 AM
|
||||
// day: 数据保留天数,默认 7 天
|
||||
// 使用分批删除方式,每批 100 条,间隔 10ms,避免大批量删除影响数据库性能
|
||||
func CleanPipelineExecution(ctx *ctx.Context, day int) {
|
||||
c := cron.New()
|
||||
if day < 1 {
|
||||
day = 7 // default retention: 7 days
|
||||
}
|
||||
|
||||
_, err := c.AddFunc("0 6 * * *", func() {
|
||||
cleanPipelineExecutionInBatches(ctx, day)
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
logger.Errorf("Failed to add clean pipeline execution cron job: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
c.Start()
|
||||
logger.Infof("Pipeline execution cleanup cron started, retention: %d days, batch_size: %d, sleep_ms: %d", day, defaultBatchSize, defaultSleepMs)
|
||||
}
|
||||
@@ -12,6 +12,8 @@ import (
|
||||
|
||||
"github.com/mitchellh/mapstructure"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/pkg/logx"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -178,14 +180,9 @@ func (c *Clickhouse) QueryData(ctx context.Context, query interface{}) ([]models
|
||||
|
||||
rows, err := c.QueryTimeseries(ctx, ckQueryParam)
|
||||
if err != nil {
|
||||
logger.Warningf("query:%+v get data err:%v", ckQueryParam, err)
|
||||
logx.Warningf(ctx, "query:%+v get data err:%v", ckQueryParam, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
logger.Warningf("query:%+v get data err:%v", ckQueryParam, err)
|
||||
return []models.DataResp{}, err
|
||||
}
|
||||
data := make([]models.DataResp, 0)
|
||||
for i := range rows {
|
||||
data = append(data, models.DataResp{
|
||||
@@ -214,7 +211,7 @@ func (c *Clickhouse) QueryLog(ctx context.Context, query interface{}) ([]interfa
|
||||
|
||||
rows, err := c.Query(ctx, ckQueryParam)
|
||||
if err != nil {
|
||||
logger.Warningf("query:%+v get data err:%v", ckQueryParam, err)
|
||||
logx.Warningf(ctx, "query:%+v get data err:%v", ckQueryParam, err)
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
|
||||
@@ -17,6 +17,7 @@ import (
|
||||
|
||||
"github.com/ccfos/nightingale/v6/memsto"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/logx"
|
||||
)
|
||||
|
||||
type FixedField string
|
||||
@@ -26,6 +27,10 @@ const (
|
||||
FieldId FixedField = "_id"
|
||||
)
|
||||
|
||||
// LabelSeparator 用于分隔多个标签的分隔符
|
||||
// 使用 ASCII 控制字符 Record Separator (0x1E),避免与用户数据中的 "--" 冲突
|
||||
const LabelSeparator = "\x1e"
|
||||
|
||||
type Query struct {
|
||||
Ref string `json:"ref" mapstructure:"ref"`
|
||||
IndexType string `json:"index_type" mapstructure:"index_type"` // 普通索引:index 索引模式:index_pattern
|
||||
@@ -128,7 +133,7 @@ func TransferData(metric, ref string, m map[string][][]float64) []models.DataRes
|
||||
}
|
||||
|
||||
data.Metric["__name__"] = model.LabelValue(metric)
|
||||
labels := strings.Split(k, "--")
|
||||
labels := strings.Split(k, LabelSeparator)
|
||||
for _, label := range labels {
|
||||
arr := strings.SplitN(label, "=", 2)
|
||||
if len(arr) == 2 {
|
||||
@@ -197,7 +202,7 @@ func GetBuckets(labelKey string, keys []string, arr []interface{}, metrics *Metr
|
||||
case json.Number, string:
|
||||
if !getTs {
|
||||
if labels != "" {
|
||||
newlabels = fmt.Sprintf("%s--%s=%v", labels, labelKey, keyValue)
|
||||
newlabels = fmt.Sprintf("%s%s%s=%v", labels, LabelSeparator, labelKey, keyValue)
|
||||
} else {
|
||||
newlabels = fmt.Sprintf("%s=%v", labelKey, keyValue)
|
||||
}
|
||||
@@ -386,8 +391,8 @@ func QueryData(ctx context.Context, queryParam interface{}, cliTimeout int64, ve
|
||||
now := time.Now().Unix()
|
||||
var start, end int64
|
||||
if param.End != 0 && param.Start != 0 {
|
||||
end = param.End - param.End%param.Interval
|
||||
start = param.Start - param.Start%param.Interval
|
||||
end = param.End
|
||||
start = param.Start
|
||||
} else {
|
||||
end = now
|
||||
start = end - param.Interval
|
||||
@@ -539,7 +544,7 @@ func QueryData(ctx context.Context, queryParam interface{}, cliTimeout int64, ve
|
||||
|
||||
source, _ := queryString.Source()
|
||||
b, _ := json.Marshal(source)
|
||||
logger.Debugf("query_data q:%+v indexArr:%+v tsAggr:%+v query_string:%s", param, indexArr, tsAggr, string(b))
|
||||
logx.Debugf(ctx, "query_data q:%+v indexArr:%+v tsAggr:%+v query_string:%s", param, indexArr, tsAggr, string(b))
|
||||
|
||||
searchSource := elastic.NewSearchSource().
|
||||
Query(queryString).
|
||||
@@ -547,21 +552,29 @@ func QueryData(ctx context.Context, queryParam interface{}, cliTimeout int64, ve
|
||||
|
||||
searchSourceString, err := searchSource.Source()
|
||||
if err != nil {
|
||||
logger.Warningf("query_data searchSource:%s to string error:%v", searchSourceString, err)
|
||||
logx.Warningf(ctx, "query_data searchSource:%s to string error:%v", searchSourceString, err)
|
||||
}
|
||||
|
||||
jsonSearchSource, err := json.Marshal(searchSourceString)
|
||||
if err != nil {
|
||||
logger.Warningf("query_data searchSource:%s to json error:%v", searchSourceString, err)
|
||||
logx.Warningf(ctx, "query_data searchSource:%s to json error:%v", searchSourceString, err)
|
||||
}
|
||||
|
||||
result, err := search(ctx, indexArr, searchSource, param.Timeout, param.MaxShard)
|
||||
if err != nil {
|
||||
logger.Warningf("query_data searchSource:%s query_data error:%v", searchSourceString, err)
|
||||
logx.Warningf(ctx, "query_data searchSource:%s query_data error:%v", searchSourceString, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
logger.Debugf("query_data searchSource:%s resp:%s", string(jsonSearchSource), string(result.Aggregations["ts"]))
|
||||
// 检查是否有 shard failures,有部分数据时仅记录警告继续处理
|
||||
if shardErr := checkShardFailures(ctx, result.Shards, "query_data", searchSourceString); shardErr != nil {
|
||||
if len(result.Aggregations["ts"]) == 0 {
|
||||
return nil, shardErr
|
||||
}
|
||||
// 有部分数据,checkShardFailures 已记录警告,继续处理
|
||||
}
|
||||
|
||||
logx.Infof(ctx, "query_data searchSource:%s resp:%s", string(jsonSearchSource), string(result.Aggregations["ts"]))
|
||||
|
||||
js, err := simplejson.NewJson(result.Aggregations["ts"])
|
||||
if err != nil {
|
||||
@@ -598,6 +611,40 @@ func QueryData(ctx context.Context, queryParam interface{}, cliTimeout int64, ve
|
||||
return items, nil
|
||||
}
|
||||
|
||||
// checkShardFailures 检查 ES 查询结果中的 shard failures,返回格式化的错误信息
|
||||
func checkShardFailures(ctx context.Context, shards *elastic.ShardsInfo, logPrefix string, queryContext interface{}) error {
|
||||
if shards == nil || shards.Failed == 0 || len(shards.Failures) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
var failureReasons []string
|
||||
for _, failure := range shards.Failures {
|
||||
reason := ""
|
||||
if failure.Reason != nil {
|
||||
if reasonType, ok := failure.Reason["type"].(string); ok {
|
||||
reason = reasonType
|
||||
}
|
||||
if reasonMsg, ok := failure.Reason["reason"].(string); ok {
|
||||
if reason != "" {
|
||||
reason += ": " + reasonMsg
|
||||
} else {
|
||||
reason = reasonMsg
|
||||
}
|
||||
}
|
||||
}
|
||||
if reason != "" {
|
||||
failureReasons = append(failureReasons, fmt.Sprintf("index=%s shard=%d: %s", failure.Index, failure.Shard, reason))
|
||||
}
|
||||
}
|
||||
|
||||
if len(failureReasons) > 0 {
|
||||
errMsg := fmt.Sprintf("elasticsearch shard failures (%d/%d failed): %s", shards.Failed, shards.Total, strings.Join(failureReasons, "; "))
|
||||
logx.Warningf(ctx, "%s query:%v %s", logPrefix, queryContext, errMsg)
|
||||
return fmt.Errorf("%s", errMsg)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func HitFilter(typ string) bool {
|
||||
switch typ {
|
||||
case "keyword", "date", "long", "integer", "short", "byte", "double", "float", "half_float", "scaled_float", "unsigned_long":
|
||||
@@ -674,28 +721,34 @@ func QueryLog(ctx context.Context, queryParam interface{}, timeout int64, versio
|
||||
} else {
|
||||
source = source.From(param.P).Sort(param.DateField, param.Ascending)
|
||||
}
|
||||
sourceBytes, _ := json.Marshal(source)
|
||||
result, err := search(ctx, indexArr, source, param.Timeout, param.MaxShard)
|
||||
if err != nil {
|
||||
logger.Warningf("query data error:%v", err)
|
||||
logx.Warningf(ctx, "query_log source:%s error:%v", string(sourceBytes), err)
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
// 检查是否有 shard failures,有部分数据时仅记录警告继续处理
|
||||
if shardErr := checkShardFailures(ctx, result.Shards, "query_log", string(sourceBytes)); shardErr != nil {
|
||||
if len(result.Hits.Hits) == 0 {
|
||||
return nil, 0, shardErr
|
||||
}
|
||||
// 有部分数据,checkShardFailures 已记录警告,继续处理
|
||||
}
|
||||
|
||||
total := result.TotalHits()
|
||||
|
||||
var ret []interface{}
|
||||
|
||||
b, _ := json.Marshal(source)
|
||||
logger.Debugf("query data result query source:%s len:%d total:%d", string(b), len(result.Hits.Hits), total)
|
||||
logx.Debugf(ctx, "query_log source:%s len:%d total:%d", string(sourceBytes), len(result.Hits.Hits), total)
|
||||
|
||||
resultBytes, _ := json.Marshal(result)
|
||||
logger.Debugf("query data result query source:%s result:%s", string(b), string(resultBytes))
|
||||
logx.Debugf(ctx, "query_log source:%s result:%s", string(sourceBytes), string(resultBytes))
|
||||
|
||||
if strings.HasPrefix(version, "6") {
|
||||
for i := 0; i < len(result.Hits.Hits); i++ {
|
||||
var x map[string]interface{}
|
||||
err := json.Unmarshal(result.Hits.Hits[i].Source, &x)
|
||||
if err != nil {
|
||||
logger.Warningf("Unmarshal source error:%v", err)
|
||||
logx.Warningf(ctx, "Unmarshal source error:%v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
|
||||
@@ -133,4 +133,5 @@ type DatasourceInfo struct {
|
||||
CreatedAt int64 `json:"created_at"`
|
||||
UpdatedAt int64 `json:"updated_at"`
|
||||
IsDefault bool `json:"is_default"`
|
||||
Weight int `json:"weight"`
|
||||
}
|
||||
|
||||
@@ -4,15 +4,18 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/datasource"
|
||||
"github.com/ccfos/nightingale/v6/dskit/doris"
|
||||
"github.com/ccfos/nightingale/v6/dskit/types"
|
||||
"github.com/ccfos/nightingale/v6/pkg/macros"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/macros"
|
||||
|
||||
"github.com/mitchellh/mapstructure"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/pkg/logx"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -38,6 +41,8 @@ type QueryParam struct {
|
||||
To int64 `json:"to" mapstructure:"to"`
|
||||
TimeField string `json:"time_field" mapstructure:"time_field"`
|
||||
TimeFormat string `json:"time_format" mapstructure:"time_format"`
|
||||
Interval int64 `json:"interval" mapstructure:"interval"` // 查询时间间隔(秒)
|
||||
Offset int `json:"offset" mapstructure:"offset"` // 延迟计算,不在使用通用配置delay
|
||||
}
|
||||
|
||||
func (d *Doris) InitClient() error {
|
||||
@@ -76,52 +81,19 @@ func (d *Doris) Equal(p datasource.Datasource) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// only compare first shard
|
||||
if d.Addr != newest.Addr {
|
||||
return false
|
||||
}
|
||||
|
||||
if d.User != newest.User {
|
||||
return false
|
||||
}
|
||||
|
||||
if d.Password != newest.Password {
|
||||
return false
|
||||
}
|
||||
|
||||
if d.EnableWrite != newest.EnableWrite {
|
||||
return false
|
||||
}
|
||||
|
||||
if d.FeAddr != newest.FeAddr {
|
||||
return false
|
||||
}
|
||||
|
||||
if d.MaxQueryRows != newest.MaxQueryRows {
|
||||
return false
|
||||
}
|
||||
|
||||
if d.Timeout != newest.Timeout {
|
||||
return false
|
||||
}
|
||||
|
||||
if d.MaxIdleConns != newest.MaxIdleConns {
|
||||
return false
|
||||
}
|
||||
|
||||
if d.MaxOpenConns != newest.MaxOpenConns {
|
||||
return false
|
||||
}
|
||||
|
||||
if d.ConnMaxLifetime != newest.ConnMaxLifetime {
|
||||
return false
|
||||
}
|
||||
|
||||
if d.ClusterName != newest.ClusterName {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
return d.Addr == newest.Addr &&
|
||||
d.FeAddr == newest.FeAddr &&
|
||||
d.User == newest.User &&
|
||||
d.Password == newest.Password &&
|
||||
d.EnableWrite == newest.EnableWrite &&
|
||||
d.UserWrite == newest.UserWrite &&
|
||||
d.PasswordWrite == newest.PasswordWrite &&
|
||||
d.MaxQueryRows == newest.MaxQueryRows &&
|
||||
d.Timeout == newest.Timeout &&
|
||||
d.MaxIdleConns == newest.MaxIdleConns &&
|
||||
d.MaxOpenConns == newest.MaxOpenConns &&
|
||||
d.ConnMaxLifetime == newest.ConnMaxLifetime &&
|
||||
d.ClusterName == newest.ClusterName
|
||||
}
|
||||
|
||||
func (d *Doris) MakeLogQuery(ctx context.Context, query interface{}, eventTags []string, start, end int64) (interface{}, error) {
|
||||
@@ -146,6 +118,30 @@ func (d *Doris) QueryData(ctx context.Context, query interface{}) ([]models.Data
|
||||
return nil, fmt.Errorf("valueKey is required")
|
||||
}
|
||||
|
||||
// 设置默认 interval
|
||||
if dorisQueryParam.Interval == 0 {
|
||||
dorisQueryParam.Interval = 60
|
||||
}
|
||||
|
||||
// 计算时间范围
|
||||
now := time.Now().Unix()
|
||||
var start, end int64
|
||||
if dorisQueryParam.To != 0 && dorisQueryParam.From != 0 {
|
||||
end = dorisQueryParam.To
|
||||
start = dorisQueryParam.From
|
||||
} else {
|
||||
end = now
|
||||
start = end - dorisQueryParam.Interval
|
||||
}
|
||||
|
||||
if dorisQueryParam.Offset != 0 {
|
||||
end -= int64(dorisQueryParam.Offset)
|
||||
start -= int64(dorisQueryParam.Offset)
|
||||
}
|
||||
|
||||
dorisQueryParam.From = start
|
||||
dorisQueryParam.To = end
|
||||
|
||||
if strings.Contains(dorisQueryParam.SQL, "$__") {
|
||||
var err error
|
||||
dorisQueryParam.SQL, err = macros.Macro(dorisQueryParam.SQL, dorisQueryParam.From, dorisQueryParam.To)
|
||||
@@ -154,17 +150,18 @@ func (d *Doris) QueryData(ctx context.Context, query interface{}) ([]models.Data
|
||||
}
|
||||
}
|
||||
|
||||
items, err := d.QueryTimeseries(context.TODO(), &doris.QueryParam{
|
||||
items, err := d.QueryTimeseries(ctx, &doris.QueryParam{
|
||||
Database: dorisQueryParam.Database,
|
||||
Sql: dorisQueryParam.SQL,
|
||||
Keys: types.Keys{
|
||||
ValueKey: dorisQueryParam.Keys.ValueKey,
|
||||
LabelKey: dorisQueryParam.Keys.LabelKey,
|
||||
TimeKey: dorisQueryParam.Keys.TimeKey,
|
||||
Offset: dorisQueryParam.Offset,
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
logger.Warningf("query:%+v get data err:%v", dorisQueryParam, err)
|
||||
logx.Warningf(ctx, "query:%+v get data err:%v", dorisQueryParam, err)
|
||||
return []models.DataResp{}, err
|
||||
}
|
||||
data := make([]models.DataResp, 0)
|
||||
@@ -177,7 +174,7 @@ func (d *Doris) QueryData(ctx context.Context, query interface{}) ([]models.Data
|
||||
}
|
||||
|
||||
// parse resp to time series data
|
||||
logger.Infof("req:%+v keys:%+v \n data:%v", dorisQueryParam, dorisQueryParam.Keys, data)
|
||||
logx.Infof(ctx, "req:%+v keys:%+v \n data:%v", dorisQueryParam, dorisQueryParam.Keys, data)
|
||||
|
||||
return data, nil
|
||||
}
|
||||
@@ -188,6 +185,18 @@ func (d *Doris) QueryLog(ctx context.Context, query interface{}) ([]interface{},
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
// 记录规则预览场景下,只传了interval, 没有传From和To
|
||||
now := time.Now().Unix()
|
||||
if dorisQueryParam.To == 0 && dorisQueryParam.From == 0 && dorisQueryParam.Interval != 0 {
|
||||
dorisQueryParam.To = now
|
||||
dorisQueryParam.From = now - dorisQueryParam.Interval
|
||||
}
|
||||
|
||||
if dorisQueryParam.Offset != 0 {
|
||||
dorisQueryParam.To -= int64(dorisQueryParam.Offset)
|
||||
dorisQueryParam.From -= int64(dorisQueryParam.Offset)
|
||||
}
|
||||
|
||||
if strings.Contains(dorisQueryParam.SQL, "$__") {
|
||||
var err error
|
||||
dorisQueryParam.SQL, err = macros.Macro(dorisQueryParam.SQL, dorisQueryParam.From, dorisQueryParam.To)
|
||||
@@ -201,7 +210,7 @@ func (d *Doris) QueryLog(ctx context.Context, query interface{}) ([]interface{},
|
||||
Sql: dorisQueryParam.SQL,
|
||||
})
|
||||
if err != nil {
|
||||
logger.Warningf("query:%+v get data err:%v", dorisQueryParam, err)
|
||||
logx.Warningf(ctx, "query:%+v get data err:%v", dorisQueryParam, err)
|
||||
return []interface{}{}, 0, err
|
||||
}
|
||||
logs := make([]interface{}, 0)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user