Compare commits

..

20 Commits

Author SHA1 Message Date
huangjie
bec92fdc60 refactor: feishu sso support default usergroup (#3077) 2026-02-10 17:49:58 +08:00
Yening Qin
671f14092c update list api (#3075) 2026-02-09 20:26:18 +08:00
liufuniu
99d6ba9508 fix: doris datasource equal (#3073) 2026-02-05 17:46:05 +08:00
liufuniu
47f3eae09d refactor: update doris datasource (#3070) 2026-02-05 13:42:32 +08:00
liufuniu
5e89c670a8 doris:add write user (#3066) 2026-02-04 17:27:26 +08:00
ning
e1cc37c753 refactor: recording rule api 2026-02-02 16:29:24 +08:00
ning
2be94f592c refactor: change ident meta mset 2026-02-02 15:14:37 +08:00
ning
5babc4310a refactor: recording rule api 2026-02-02 14:42:02 +08:00
ning
f968fcd593 Merge branch 'release-21' of github.com:ccfos/nightingale into release-21 2026-01-28 10:41:24 +08:00
ning
4dc7035550 brain fix get datasource 2026-01-28 10:28:26 +08:00
huangjie
2a2b46ca7b feishu userid (#3058)
Co-authored-by: jie210 <huangjie@flashcat.com>
2026-01-26 20:20:45 +08:00
ning
ed96ab9d5b optimize drop sample 2026-01-23 15:26:46 +08:00
Yening Qin
2e2bbd6aeb Update workflow (#3051) 2026-01-22 19:45:41 +08:00
ning
c93694a2a9 refactor: update init metrics tpl 2026-01-21 19:45:57 +08:00
ning
cfb8c3b66a refactor: update doris check max rows 2026-01-21 16:03:04 +08:00
ning
cb5e62b7bb fix save workflow execution 2026-01-20 21:28:51 +08:00
yuansheng
ebfde8d6a0 refactor: record_rule support writeback_enabled (#3048) 2026-01-20 19:32:09 +08:00
ning
b4dcaebf83 refactor: update doris check max rows 2026-01-20 16:34:50 +08:00
huangjie
fa491e313a sso add feishu (#3046) 2026-01-19 14:12:38 +08:00
ning
4fe2b5042f refactor: update trigger value 2026-01-14 19:41:32 +08:00
123 changed files with 450 additions and 2383 deletions

View File

@@ -31,9 +31,7 @@
Nightingale is an open-source monitoring project that focuses on alerting. Similar to Grafana, Nightingale also connects with various existing data sources. However, while Grafana emphasizes visualization, Nightingale places greater emphasis on the alerting engine, as well as the processing and distribution of alarms.
> 💡 Nightingale has now officially launched the [MCP-Server](https://github.com/n9e/n9e-mcp-server/). This MCP Server enables AI assistants to interact with the Nightingale API using natural language, facilitating alert management, monitoring, and observability tasks.
>
> The Nightingale project was initially developed and open-sourced by DiDi.inc. On May 11, 2022, it was donated to the Open Source Development Committee of the China Computer Federation (CCF ODTC).
> The Nightingale project was initially developed and open-sourced by DiDi.inc. On May 11, 2022, it was donated to the Open Source Development Committee of the China Computer Federation (CCF ODC).
![](https://n9e.github.io/img/global/arch-bg.png)

View File

@@ -3,7 +3,7 @@
<img src="doc/img/Nightingale_L_V.png" alt="nightingale - cloud native monitoring" width="100" /></a>
</p>
<p align="center">
<b>开源监控告警管理专家</b>
<b>开源告警管理专家</b>
</p>
<p align="center">
@@ -33,8 +33,7 @@
夜莺侧重于监控告警,类似于 Grafana 的数据源集成方式,夜莺也是对接多种既有的数据源,不过 Grafana 侧重于可视化,夜莺则是侧重于告警引擎、告警事件的处理和分发。
> - 💡夜莺正式推出了 [MCP-Server](https://github.com/n9e/n9e-mcp-server/),此 MCP Server 允许 AI 助手通过自然语言与夜莺 API 交互,实现告警管理、监控和可观测性任务
> - 夜莺监控项目,最初由滴滴开发和开源,并于 2022 年 5 月 11 日捐赠予中国计算机学会开源发展技术委员会CCF ODTC为 CCF ODTC 成立后接受捐赠的第一个开源项目。
> 夜莺监控项目,最初由滴滴开发和开源,并于 2022 年 5 月 11 日捐赠予中国计算机学会开源发展技术委员会CCF ODTC为 CCF ODTC 成立后接受捐赠的第一个开源项目
![](https://n9e.github.io/img/global/arch-bg.png)

View File

@@ -79,7 +79,7 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
r := httpx.GinEngine(config.Global.RunMode, config.HTTP,
configCvalCache.PrintBodyPaths, configCvalCache.PrintAccessLog)
rt := router.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors, config.Log.Dir)
rt := router.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors)
if config.Ibex.Enable {
ibex.ServerStart(false, nil, redis, config.HTTP.APIForService.BasicAuth, config.Alert.Heartbeat, &config.CenterApi, r, nil, config.Ibex, config.HTTP.Port)

View File

@@ -8,6 +8,7 @@ import (
"time"
"github.com/ccfos/nightingale/v6/alert/aconf"
"github.com/ccfos/nightingale/v6/alert/common"
"github.com/ccfos/nightingale/v6/alert/queue"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
@@ -98,12 +99,12 @@ func (e *Consumer) consumeOne(event *models.AlertCurEvent) {
e.dispatch.Astats.CounterAlertsTotal.WithLabelValues(event.Cluster, eventType, event.GroupName).Inc()
if err := event.ParseRule("rule_name"); err != nil {
logger.Warningf("alert_eval_%d datasource_%d failed to parse rule name: %v", event.RuleId, event.DatasourceId, err)
logger.Warningf("ruleid:%d failed to parse rule name: %v", event.RuleId, err)
event.RuleName = fmt.Sprintf("failed to parse rule name: %v", err)
}
if err := event.ParseRule("annotations"); err != nil {
logger.Warningf("alert_eval_%d datasource_%d failed to parse annotations: %v", event.RuleId, event.DatasourceId, err)
logger.Warningf("ruleid:%d failed to parse annotations: %v", event.RuleId, err)
event.Annotations = fmt.Sprintf("failed to parse annotations: %v", err)
event.AnnotationsJSON["error"] = event.Annotations
}
@@ -111,7 +112,7 @@ func (e *Consumer) consumeOne(event *models.AlertCurEvent) {
e.queryRecoveryVal(event)
if err := event.ParseRule("rule_note"); err != nil {
logger.Warningf("alert_eval_%d datasource_%d failed to parse rule note: %v", event.RuleId, event.DatasourceId, err)
logger.Warningf("ruleid:%d failed to parse rule note: %v", event.RuleId, err)
event.RuleNote = fmt.Sprintf("failed to parse rule note: %v", err)
}
@@ -130,7 +131,7 @@ func (e *Consumer) persist(event *models.AlertCurEvent) {
var err error
event.Id, err = poster.PostByUrlsWithResp[int64](e.ctx, "/v1/n9e/event-persist", event)
if err != nil {
logger.Errorf("event:%s persist err:%v", event.Hash, err)
logger.Errorf("event:%+v persist err:%v", event, err)
e.dispatch.Astats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", event.DatasourceId), "persist_event", event.GroupName, fmt.Sprintf("%v", event.RuleId)).Inc()
}
return
@@ -138,7 +139,7 @@ func (e *Consumer) persist(event *models.AlertCurEvent) {
err := models.EventPersist(e.ctx, event)
if err != nil {
logger.Errorf("event:%s persist err:%v", event.Hash, err)
logger.Errorf("event%+v persist err:%v", event, err)
e.dispatch.Astats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", event.DatasourceId), "persist_event", event.GroupName, fmt.Sprintf("%v", event.RuleId)).Inc()
}
}
@@ -156,12 +157,12 @@ func (e *Consumer) queryRecoveryVal(event *models.AlertCurEvent) {
promql = strings.TrimSpace(promql)
if promql == "" {
logger.Warningf("alert_eval_%d datasource_%d promql is blank", event.RuleId, event.DatasourceId)
logger.Warningf("rule_eval:%s promql is blank", getKey(event))
return
}
if e.promClients.IsNil(event.DatasourceId) {
logger.Warningf("alert_eval_%d datasource_%d error reader client is nil", event.RuleId, event.DatasourceId)
logger.Warningf("rule_eval:%s error reader client is nil", getKey(event))
return
}
@@ -170,7 +171,7 @@ func (e *Consumer) queryRecoveryVal(event *models.AlertCurEvent) {
var warnings promsdk.Warnings
value, warnings, err := readerClient.Query(e.ctx.Ctx, promql, time.Now())
if err != nil {
logger.Errorf("alert_eval_%d datasource_%d promql:%s, error:%v", event.RuleId, event.DatasourceId, promql, err)
logger.Errorf("rule_eval:%s promql:%s, error:%v", getKey(event), promql, err)
event.AnnotationsJSON["recovery_promql_error"] = fmt.Sprintf("promql:%s error:%v", promql, err)
b, err := json.Marshal(event.AnnotationsJSON)
@@ -184,12 +185,12 @@ func (e *Consumer) queryRecoveryVal(event *models.AlertCurEvent) {
}
if len(warnings) > 0 {
logger.Errorf("alert_eval_%d datasource_%d promql:%s, warnings:%v", event.RuleId, event.DatasourceId, promql, warnings)
logger.Errorf("rule_eval:%s promql:%s, warnings:%v", getKey(event), promql, warnings)
}
anomalyPoints := models.ConvertAnomalyPoints(value)
if len(anomalyPoints) == 0 {
logger.Warningf("alert_eval_%d datasource_%d promql:%s, result is empty", event.RuleId, event.DatasourceId, promql)
logger.Warningf("rule_eval:%s promql:%s, result is empty", getKey(event), promql)
event.AnnotationsJSON["recovery_promql_error"] = fmt.Sprintf("promql:%s error:%s", promql, "result is empty")
} else {
event.AnnotationsJSON["recovery_value"] = fmt.Sprintf("%v", anomalyPoints[0].Value)
@@ -204,3 +205,6 @@ func (e *Consumer) queryRecoveryVal(event *models.AlertCurEvent) {
}
}
func getKey(event *models.AlertCurEvent) string {
return common.RuleKey(event.DatasourceId, event.RuleId)
}

View File

@@ -171,7 +171,7 @@ func (e *Dispatch) HandleEventWithNotifyRule(eventOrigin *models.AlertCurEvent)
// 深拷贝新的 event避免并发修改 event 冲突
eventCopy := eventOrigin.DeepCopy()
logger.Infof("notify rule ids: %v, event: %s", notifyRuleId, eventCopy.Hash)
logger.Infof("notify rule ids: %v, event: %+v", notifyRuleId, eventCopy)
notifyRule := e.notifyRuleCache.Get(notifyRuleId)
if notifyRule == nil {
continue
@@ -185,7 +185,7 @@ func (e *Dispatch) HandleEventWithNotifyRule(eventOrigin *models.AlertCurEvent)
eventCopy = HandleEventPipeline(notifyRule.PipelineConfigs, eventOrigin, eventCopy, e.eventProcessorCache, e.ctx, notifyRuleId, "notify_rule")
if ShouldSkipNotify(e.ctx, eventCopy, notifyRuleId) {
logger.Infof("notify_id: %d, event:%s, should skip notify", notifyRuleId, eventCopy.Hash)
logger.Infof("notify_id: %d, event:%+v, should skip notify", notifyRuleId, eventCopy)
continue
}
@@ -193,7 +193,7 @@ func (e *Dispatch) HandleEventWithNotifyRule(eventOrigin *models.AlertCurEvent)
for i := range notifyRule.NotifyConfigs {
err := NotifyRuleMatchCheck(&notifyRule.NotifyConfigs[i], eventCopy)
if err != nil {
logger.Errorf("notify_id: %d, event:%s, channel_id:%d, template_id: %d, notify_config:%+v, err:%v", notifyRuleId, eventCopy.Hash, notifyRule.NotifyConfigs[i].ChannelID, notifyRule.NotifyConfigs[i].TemplateID, notifyRule.NotifyConfigs[i], err)
logger.Errorf("notify_id: %d, event:%+v, channel_id:%d, template_id: %d, notify_config:%+v, err:%v", notifyRuleId, eventCopy, notifyRule.NotifyConfigs[i].ChannelID, notifyRule.NotifyConfigs[i].TemplateID, notifyRule.NotifyConfigs[i], err)
continue
}
@@ -201,12 +201,12 @@ func (e *Dispatch) HandleEventWithNotifyRule(eventOrigin *models.AlertCurEvent)
messageTemplate := e.messageTemplateCache.Get(notifyRule.NotifyConfigs[i].TemplateID)
if notifyChannel == nil {
sender.NotifyRecord(e.ctx, []*models.AlertCurEvent{eventCopy}, notifyRuleId, fmt.Sprintf("notify_channel_id:%d", notifyRule.NotifyConfigs[i].ChannelID), "", "", errors.New("notify_channel not found"))
logger.Warningf("notify_id: %d, event:%s, channel_id:%d, template_id: %d, notify_channel not found", notifyRuleId, eventCopy.Hash, notifyRule.NotifyConfigs[i].ChannelID, notifyRule.NotifyConfigs[i].TemplateID)
logger.Warningf("notify_id: %d, event:%+v, channel_id:%d, template_id: %d, notify_channel not found", notifyRuleId, eventCopy, notifyRule.NotifyConfigs[i].ChannelID, notifyRule.NotifyConfigs[i].TemplateID)
continue
}
if notifyChannel.RequestType != "flashduty" && notifyChannel.RequestType != "pagerduty" && messageTemplate == nil {
logger.Warningf("notify_id: %d, channel_name: %v, event:%s, template_id: %d, message_template not found", notifyRuleId, notifyChannel.Ident, eventCopy.Hash, notifyRule.NotifyConfigs[i].TemplateID)
logger.Warningf("notify_id: %d, channel_name: %v, event:%+v, template_id: %d, message_template not found", notifyRuleId, notifyChannel.Ident, eventCopy, notifyRule.NotifyConfigs[i].TemplateID)
sender.NotifyRecord(e.ctx, []*models.AlertCurEvent{eventCopy}, notifyRuleId, notifyChannel.Name, "", "", errors.New("message_template not found"))
continue
@@ -241,12 +241,12 @@ func HandleEventPipeline(pipelineConfigs []models.PipelineConfig, eventOrigin, e
eventPipeline := eventProcessorCache.Get(pipelineConfig.PipelineId)
if eventPipeline == nil {
logger.Warningf("processor_by_%s_id:%d pipeline_id:%d, event pipeline not found, event: %s", from, id, pipelineConfig.PipelineId, event.Hash)
logger.Warningf("processor_by_%s_id:%d pipeline_id:%d, event pipeline not found, event: %+v", from, id, pipelineConfig.PipelineId, event)
continue
}
if !PipelineApplicable(eventPipeline, event) {
logger.Debugf("processor_by_%s_id:%d pipeline_id:%d, event pipeline not applicable, event: %s", from, id, pipelineConfig.PipelineId, event.Hash)
logger.Debugf("processor_by_%s_id:%d pipeline_id:%d, event pipeline not applicable, event: %+v", from, id, pipelineConfig.PipelineId, event)
continue
}
@@ -263,7 +263,7 @@ func HandleEventPipeline(pipelineConfigs []models.PipelineConfig, eventOrigin, e
}
if resultEvent == nil {
logger.Infof("processor_by_%s_id:%d pipeline_id:%d, event dropped, event: %s", from, id, pipelineConfig.PipelineId, eventOrigin.Hash)
logger.Infof("processor_by_%s_id:%d pipeline_id:%d, event dropped, event: %+v", from, id, pipelineConfig.PipelineId, eventOrigin)
if from == "notify_rule" {
sender.NotifyRecord(ctx, []*models.AlertCurEvent{eventOrigin}, id, "", "", result.Message, fmt.Errorf("processor_by_%s_id:%d pipeline_id:%d, drop by pipeline", from, id, pipelineConfig.PipelineId))
}
@@ -301,7 +301,7 @@ func PipelineApplicable(pipeline *models.EventPipeline, event *models.AlertCurEv
tagFilters, err := models.ParseTagFilter(labelFiltersCopy)
if err != nil {
logger.Errorf("pipeline applicable failed to parse tag filter: %v event:%s pipeline:%+v", err, event.Hash, pipeline)
logger.Errorf("pipeline applicable failed to parse tag filter: %v event:%+v pipeline:%+v", err, event, pipeline)
return false
}
tagMatch = common.MatchTags(event.TagsMap, tagFilters)
@@ -315,7 +315,7 @@ func PipelineApplicable(pipeline *models.EventPipeline, event *models.AlertCurEv
tagFilters, err := models.ParseTagFilter(attrFiltersCopy)
if err != nil {
logger.Errorf("pipeline applicable failed to parse tag filter: %v event:%s pipeline:%+v err:%v", tagFilters, event.Hash, pipeline, err)
logger.Errorf("pipeline applicable failed to parse tag filter: %v event:%+v pipeline:%+v err:%v", tagFilters, event, pipeline, err)
return false
}
@@ -405,7 +405,7 @@ func NotifyRuleMatchCheck(notifyConfig *models.NotifyConfig, event *models.Alert
tagFilters, err := models.ParseTagFilter(labelKeysCopy)
if err != nil {
logger.Errorf("notify send failed to parse tag filter: %v event:%s notify_config:%+v", err, event.Hash, notifyConfig)
logger.Errorf("notify send failed to parse tag filter: %v event:%+v notify_config:%+v", err, event, notifyConfig)
return fmt.Errorf("failed to parse tag filter: %v", err)
}
tagMatch = common.MatchTags(event.TagsMap, tagFilters)
@@ -423,7 +423,7 @@ func NotifyRuleMatchCheck(notifyConfig *models.NotifyConfig, event *models.Alert
tagFilters, err := models.ParseTagFilter(attributesCopy)
if err != nil {
logger.Errorf("notify send failed to parse tag filter: %v event:%s notify_config:%+v err:%v", tagFilters, event.Hash, notifyConfig, err)
logger.Errorf("notify send failed to parse tag filter: %v event:%+v notify_config:%+v err:%v", tagFilters, event, notifyConfig, err)
return fmt.Errorf("failed to parse tag filter: %v", err)
}
@@ -434,7 +434,7 @@ func NotifyRuleMatchCheck(notifyConfig *models.NotifyConfig, event *models.Alert
return fmt.Errorf("event attributes not match attributes filter")
}
logger.Infof("notify send timeMatch:%v severityMatch:%v tagMatch:%v attributesMatch:%v event:%s notify_config:%+v", timeMatch, severityMatch, tagMatch, attributesMatch, event.Hash, notifyConfig)
logger.Infof("notify send timeMatch:%v severityMatch:%v tagMatch:%v attributesMatch:%v event:%+v notify_config:%+v", timeMatch, severityMatch, tagMatch, attributesMatch, event, notifyConfig)
return nil
}
@@ -547,7 +547,7 @@ func SendNotifyRuleMessage(ctx *ctx.Context, userCache *memsto.UserCacheType, us
start := time.Now()
respBody, err := notifyChannel.SendFlashDuty(events, flashDutyChannelIDs[i], notifyChannelCache.GetHttpClient(notifyChannel.ID))
respBody = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), respBody)
logger.Infof("duty_sender notify_id: %d, channel_name: %v, event:%s, IntegrationUrl: %v dutychannel_id: %v, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0].Hash, notifyChannel.RequestConfig.FlashDutyRequestConfig.IntegrationUrl, flashDutyChannelIDs[i], respBody, err)
logger.Infof("duty_sender notify_id: %d, channel_name: %v, event:%+v, IntegrationUrl: %v dutychannel_id: %v, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0], notifyChannel.RequestConfig.FlashDutyRequestConfig.IntegrationUrl, flashDutyChannelIDs[i], respBody, err)
sender.NotifyRecord(ctx, events, notifyRuleId, notifyChannel.Name, strconv.FormatInt(flashDutyChannelIDs[i], 10), respBody, err)
}
@@ -556,7 +556,7 @@ func SendNotifyRuleMessage(ctx *ctx.Context, userCache *memsto.UserCacheType, us
start := time.Now()
respBody, err := notifyChannel.SendPagerDuty(events, routingKey, siteInfo.SiteUrl, notifyChannelCache.GetHttpClient(notifyChannel.ID))
respBody = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), respBody)
logger.Infof("pagerduty_sender notify_id: %d, channel_name: %v, event:%s, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0].Hash, respBody, err)
logger.Infof("pagerduty_sender notify_id: %d, channel_name: %v, event:%+v, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0], respBody, err)
sender.NotifyRecord(ctx, events, notifyRuleId, notifyChannel.Name, "", respBody, err)
}
@@ -587,10 +587,10 @@ func SendNotifyRuleMessage(ctx *ctx.Context, userCache *memsto.UserCacheType, us
start := time.Now()
target, res, err := notifyChannel.SendScript(events, tplContent, customParams, sendtos)
res = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), res)
logger.Infof("script_sender notify_id: %d, channel_name: %v, event:%s, tplContent:%s, customParams:%v, target:%s, res:%s, err:%v", notifyRuleId, notifyChannel.Name, events[0].Hash, tplContent, customParams, target, res, err)
logger.Infof("script_sender notify_id: %d, channel_name: %v, event:%+v, tplContent:%s, customParams:%v, target:%s, res:%s, err:%v", notifyRuleId, notifyChannel.Name, events[0], tplContent, customParams, target, res, err)
sender.NotifyRecord(ctx, events, notifyRuleId, notifyChannel.Name, target, res, err)
default:
logger.Warningf("notify_id: %d, channel_name: %v, event:%s send type not found", notifyRuleId, notifyChannel.Name, events[0].Hash)
logger.Warningf("notify_id: %d, channel_name: %v, event:%+v send type not found", notifyRuleId, notifyChannel.Name, events[0])
}
}
@@ -734,7 +734,7 @@ func (e *Dispatch) Send(rule *models.AlertRule, event *models.AlertCurEvent, not
event = msgCtx.Events[0]
}
logger.Debugf("send to channel:%s event:%s users:%+v", channel, event.Hash, msgCtx.Users)
logger.Debugf("send to channel:%s event:%+v users:%+v", channel, event, msgCtx.Users)
s.Send(msgCtx)
}
}

View File

@@ -18,11 +18,11 @@ func LogEvent(event *models.AlertCurEvent, location string, err ...error) {
}
logger.Infof(
"alert_eval_%d event(%s %s) %s: sub_id:%d notify_rule_ids:%v cluster:%s %v%s@%d last_eval_time:%d %s",
event.RuleId,
"event(%s %s) %s: rule_id=%d sub_id:%d notify_rule_ids:%v cluster:%s %v%s@%d last_eval_time:%d %s",
event.Hash,
status,
location,
event.RuleId,
event.SubRuleId,
event.NotifyRuleIds,
event.Cluster,

View File

@@ -101,17 +101,17 @@ func (s *Scheduler) syncAlertRules() {
}
ds := s.datasourceCache.GetById(dsId)
if ds == nil {
logger.Debugf("alert_eval_%d datasource %d not found", rule.Id, dsId)
logger.Debugf("datasource %d not found", dsId)
continue
}
if ds.PluginType != ruleType {
logger.Debugf("alert_eval_%d datasource %d category is %s not %s", rule.Id, dsId, ds.PluginType, ruleType)
logger.Debugf("datasource %d category is %s not %s", dsId, ds.PluginType, ruleType)
continue
}
if ds.Status != "enabled" {
logger.Debugf("alert_eval_%d datasource %d status is %s", rule.Id, dsId, ds.Status)
logger.Debugf("datasource %d status is %s", dsId, ds.Status)
continue
}
processor := process.NewProcessor(s.aconf.Heartbeat.EngineName, rule, dsId, s.alertRuleCache, s.targetCache, s.targetsOfAlertRuleCache, s.busiGroupCache, s.alertMuteCache, s.datasourceCache, s.ctx, s.stats)
@@ -134,12 +134,12 @@ func (s *Scheduler) syncAlertRules() {
for _, dsId := range dsIds {
ds := s.datasourceCache.GetById(dsId)
if ds == nil {
logger.Debugf("alert_eval_%d datasource %d not found", rule.Id, dsId)
logger.Debugf("datasource %d not found", dsId)
continue
}
if ds.Status != "enabled" {
logger.Debugf("alert_eval_%d datasource %d status is %s", rule.Id, dsId, ds.Status)
logger.Debugf("datasource %d status is %s", dsId, ds.Status)
continue
}
processor := process.NewProcessor(s.aconf.Heartbeat.EngineName, rule, dsId, s.alertRuleCache, s.targetCache, s.targetsOfAlertRuleCache, s.busiGroupCache, s.alertMuteCache, s.datasourceCache, s.ctx, s.stats)

View File

@@ -109,7 +109,7 @@ func NewAlertRuleWorker(rule *models.AlertRule, datasourceId int64, Processor *p
})
if err != nil {
logger.Errorf("alert_eval_%d datasource_%d add cron pattern error: %v", arw.Rule.Id, arw.DatasourceId, err)
logger.Errorf("alert rule %s add cron pattern error: %v", arw.Key(), err)
}
Processor.ScheduleEntry = arw.Scheduler.Entry(entryID)
@@ -152,9 +152,9 @@ func (arw *AlertRuleWorker) Eval() {
defer func() {
if len(message) == 0 {
logger.Infof("alert_eval_%d datasource_%d finished, duration:%v", arw.Rule.Id, arw.DatasourceId, time.Since(begin))
logger.Infof("rule_eval:%s finished, duration:%v", arw.Key(), time.Since(begin))
} else {
logger.Warningf("alert_eval_%d datasource_%d finished, duration:%v, message:%s", arw.Rule.Id, arw.DatasourceId, time.Since(begin), message)
logger.Warningf("rule_eval:%s finished, duration:%v, message:%s", arw.Key(), time.Since(begin), message)
}
}()
@@ -236,7 +236,7 @@ func (arw *AlertRuleWorker) Eval() {
}
func (arw *AlertRuleWorker) Stop() {
logger.Infof("alert_eval_%d datasource_%d stopped", arw.Rule.Id, arw.DatasourceId)
logger.Infof("rule_eval:%s stopped", arw.Key())
close(arw.Quit)
c := arw.Scheduler.Stop()
<-c.Done()
@@ -252,7 +252,7 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
var rule *models.PromRuleConfig
if err := json.Unmarshal([]byte(ruleConfig), &rule); err != nil {
logger.Errorf("alert_eval_%d datasource_%d rule_config:%s, error:%v", arw.Rule.Id, arw.DatasourceId, ruleConfig, err)
logger.Errorf("rule_eval:%s rule_config:%s, error:%v", arw.Key(), ruleConfig, err)
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_RULE_CONFIG, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
fmt.Sprintf("%v", arw.Rule.Id),
@@ -263,7 +263,7 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
}
if rule == nil {
logger.Errorf("alert_eval_%d datasource_%d rule_config:%s, error:rule is nil", arw.Rule.Id, arw.DatasourceId, ruleConfig)
logger.Errorf("rule_eval:%s rule_config:%s, error:rule is nil", arw.Key(), ruleConfig)
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_RULE_CONFIG, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
fmt.Sprintf("%v", arw.Rule.Id),
@@ -278,7 +278,7 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
readerClient := arw.PromClients.GetCli(arw.DatasourceId)
if readerClient == nil {
logger.Warningf("alert_eval_%d datasource_%d error reader client is nil", arw.Rule.Id, arw.DatasourceId)
logger.Warningf("rule_eval:%s error reader client is nil", arw.Key())
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_CLIENT, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
fmt.Sprintf("%v", arw.Rule.Id),
@@ -314,13 +314,13 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
// 无变量
promql := strings.TrimSpace(query.PromQl)
if promql == "" {
logger.Warningf("alert_eval_%d datasource_%d promql is blank", arw.Rule.Id, arw.DatasourceId)
logger.Warningf("rule_eval:%s promql is blank", arw.Key())
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), CHECK_QUERY, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
continue
}
if arw.PromClients.IsNil(arw.DatasourceId) {
logger.Warningf("alert_eval_%d datasource_%d error reader client is nil", arw.Rule.Id, arw.DatasourceId)
logger.Warningf("rule_eval:%s error reader client is nil", arw.Key())
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_CLIENT, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
continue
}
@@ -329,7 +329,7 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
arw.Processor.Stats.CounterQueryDataTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId), fmt.Sprintf("%d", arw.Rule.Id)).Inc()
value, warnings, err := readerClient.Query(context.Background(), promql, time.Now())
if err != nil {
logger.Errorf("alert_eval_%d datasource_%d promql:%s, error:%v", arw.Rule.Id, arw.DatasourceId, promql, err)
logger.Errorf("rule_eval:%s promql:%s, error:%v", arw.Key(), promql, err)
arw.Processor.Stats.CounterQueryDataErrorTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId)).Inc()
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), QUERY_DATA, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
@@ -341,12 +341,12 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
}
if len(warnings) > 0 {
logger.Errorf("alert_eval_%d datasource_%d promql:%s, warnings:%v", arw.Rule.Id, arw.DatasourceId, promql, warnings)
logger.Errorf("rule_eval:%s promql:%s, warnings:%v", arw.Key(), promql, warnings)
arw.Processor.Stats.CounterQueryDataErrorTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId)).Inc()
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), QUERY_DATA, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
}
logger.Infof("alert_eval_%d datasource_%d query:%+v, value:%v", arw.Rule.Id, arw.DatasourceId, query, value)
logger.Infof("rule_eval:%s query:%+v, value:%v", arw.Key(), query, value)
points := models.ConvertAnomalyPoints(value)
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
fmt.Sprintf("%v", arw.Rule.Id),
@@ -440,14 +440,14 @@ func (arw *AlertRuleWorker) VarFillingAfterQuery(query models.PromQuery, readerC
arw.Processor.Stats.CounterQueryDataTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId), fmt.Sprintf("%d", arw.Rule.Id)).Inc()
value, _, err := readerClient.Query(context.Background(), curQuery, time.Now())
if err != nil {
logger.Errorf("alert_eval_%d datasource_%d promql:%s, error:%v", arw.Rule.Id, arw.DatasourceId, curQuery, err)
logger.Errorf("rule_eval:%s, promql:%s, error:%v", arw.Key(), curQuery, err)
continue
}
seqVals := getSamples(value)
// 得到参数变量的所有组合
paramPermutation, err := arw.getParamPermutation(param, ParamKeys, varToLabel, query.PromQl, readerClient)
if err != nil {
logger.Errorf("alert_eval_%d datasource_%d paramPermutation error:%v", arw.Rule.Id, arw.DatasourceId, err)
logger.Errorf("rule_eval:%s, paramPermutation error:%v", arw.Key(), err)
continue
}
// 判断哪些参数值符合条件
@@ -580,14 +580,14 @@ func (arw *AlertRuleWorker) getParamPermutation(paramVal map[string]models.Param
case "host":
hostIdents, err := arw.getHostIdents(paramQuery)
if err != nil {
logger.Errorf("alert_eval_%d datasource_%d fail to get host idents, error:%v", arw.Rule.Id, arw.DatasourceId, err)
logger.Errorf("rule_eval:%s, fail to get host idents, error:%v", arw.Key(), err)
break
}
params = hostIdents
case "device":
deviceIdents, err := arw.getDeviceIdents(paramQuery)
if err != nil {
logger.Errorf("alert_eval_%d datasource_%d fail to get device idents, error:%v", arw.Rule.Id, arw.DatasourceId, err)
logger.Errorf("rule_eval:%s, fail to get device idents, error:%v", arw.Key(), err)
break
}
params = deviceIdents
@@ -596,12 +596,12 @@ func (arw *AlertRuleWorker) getParamPermutation(paramVal map[string]models.Param
var query []string
err := json.Unmarshal(q, &query)
if err != nil {
logger.Errorf("alert_eval_%d datasource_%d query:%s fail to unmarshalling into string slice, error:%v", arw.Rule.Id, arw.DatasourceId, paramQuery.Query, err)
logger.Errorf("query:%s fail to unmarshalling into string slice, error:%v", paramQuery.Query, err)
}
if len(query) == 0 {
paramsKeyAllLabel, err := getParamKeyAllLabel(varToLabel[paramKey], originPromql, readerClient, arw.DatasourceId, arw.Rule.Id, arw.Processor.Stats)
if err != nil {
logger.Errorf("alert_eval_%d datasource_%d fail to getParamKeyAllLabel, error:%v query:%s", arw.Rule.Id, arw.DatasourceId, err, paramQuery.Query)
logger.Errorf("rule_eval:%s, fail to getParamKeyAllLabel, error:%v query:%s", arw.Key(), err, paramQuery.Query)
}
params = paramsKeyAllLabel
} else {
@@ -615,7 +615,7 @@ func (arw *AlertRuleWorker) getParamPermutation(paramVal map[string]models.Param
return nil, fmt.Errorf("param key: %s, params is empty", paramKey)
}
logger.Infof("alert_eval_%d datasource_%d paramKey: %s, params: %v", arw.Rule.Id, arw.DatasourceId, paramKey, params)
logger.Infof("rule_eval:%s paramKey: %s, params: %v", arw.Key(), paramKey, params)
paramMap[paramKey] = params
}
@@ -766,7 +766,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
var rule *models.HostRuleConfig
if err := json.Unmarshal([]byte(ruleConfig), &rule); err != nil {
logger.Errorf("alert_eval_%d datasource_%d rule_config:%s, error:%v", arw.Rule.Id, arw.DatasourceId, ruleConfig, err)
logger.Errorf("rule_eval:%s rule_config:%s, error:%v", arw.Key(), ruleConfig, err)
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_RULE_CONFIG, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
fmt.Sprintf("%v", arw.Rule.Id),
@@ -777,7 +777,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
}
if rule == nil {
logger.Errorf("alert_eval_%d datasource_%d rule_config:%s, error:rule is nil", arw.Rule.Id, arw.DatasourceId, ruleConfig)
logger.Errorf("rule_eval:%s rule_config:%s, error:rule is nil", arw.Key(), ruleConfig)
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_RULE_CONFIG, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
fmt.Sprintf("%v", arw.Rule.Id),
@@ -800,7 +800,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
// 如果是中心节点, 将不再上报数据的主机 engineName 为空的机器,也加入到 targets 中
missEngineIdents, exists = arw.Processor.TargetsOfAlertRuleCache.Get("", arw.Rule.Id)
if !exists {
logger.Debugf("alert_eval_%d datasource_%d targets not found engineName:%s", arw.Rule.Id, arw.DatasourceId, arw.Processor.EngineName)
logger.Debugf("rule_eval:%s targets not found engineName:%s", arw.Key(), arw.Processor.EngineName)
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), QUERY_DATA, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
}
}
@@ -808,7 +808,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
engineIdents, exists = arw.Processor.TargetsOfAlertRuleCache.Get(arw.Processor.EngineName, arw.Rule.Id)
if !exists {
logger.Warningf("alert_eval_%d datasource_%d targets not found engineName:%s", arw.Rule.Id, arw.DatasourceId, arw.Processor.EngineName)
logger.Warningf("rule_eval:%s targets not found engineName:%s", arw.Key(), arw.Processor.EngineName)
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), QUERY_DATA, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
}
idents = append(idents, engineIdents...)
@@ -835,7 +835,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
"",
).Set(float64(len(missTargets)))
logger.Debugf("alert_eval_%d datasource_%d missTargets:%v", arw.Rule.Id, arw.DatasourceId, missTargets)
logger.Debugf("rule_eval:%s missTargets:%v", arw.Key(), missTargets)
targets := arw.Processor.TargetCache.Gets(missTargets)
for _, target := range targets {
m := make(map[string]string)
@@ -844,7 +844,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
}
m["ident"] = target.Ident
lst = append(lst, models.NewAnomalyPoint(trigger.Type, m, now, float64(now-target.BeatTime), trigger.Severity))
lst = append(lst, models.NewAnomalyPoint(trigger.Type, m, now, float64(now-target.UpdateAt), trigger.Severity))
}
case "offset":
idents, exists := arw.Processor.TargetsOfAlertRuleCache.Get(arw.Processor.EngineName, arw.Rule.Id)
@@ -854,7 +854,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
fmt.Sprintf("%v", arw.Processor.DatasourceId()),
"",
).Set(0)
logger.Warningf("alert_eval_%d datasource_%d targets not found", arw.Rule.Id, arw.DatasourceId)
logger.Warningf("rule_eval:%s targets not found", arw.Key())
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), QUERY_DATA, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
continue
}
@@ -873,7 +873,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
continue
}
if target, exists := targetMap[ident]; exists {
if now-target.BeatTime > 120 {
if now-target.UpdateAt > 120 {
// means this target is not a active host, do not check offset
continue
}
@@ -885,7 +885,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
}
}
logger.Debugf("alert_eval_%d datasource_%d offsetIdents:%v", arw.Rule.Id, arw.DatasourceId, offsetIdents)
logger.Debugf("rule_eval:%s offsetIdents:%v", arw.Key(), offsetIdents)
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
fmt.Sprintf("%v", arw.Rule.Id),
fmt.Sprintf("%v", arw.Processor.DatasourceId()),
@@ -912,7 +912,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
fmt.Sprintf("%v", arw.Processor.DatasourceId()),
"",
).Set(0)
logger.Warningf("alert_eval_%d datasource_%d targets not found", arw.Rule.Id, arw.DatasourceId)
logger.Warningf("rule_eval:%s targets not found", arw.Key())
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), QUERY_DATA, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
continue
}
@@ -924,7 +924,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) ([]models.Ano
missTargets = append(missTargets, ident)
}
}
logger.Debugf("alert_eval_%d datasource_%d missTargets:%v", arw.Rule.Id, arw.DatasourceId, missTargets)
logger.Debugf("rule_eval:%s missTargets:%v", arw.Key(), missTargets)
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
fmt.Sprintf("%v", arw.Rule.Id),
fmt.Sprintf("%v", arw.Processor.DatasourceId()),
@@ -1120,7 +1120,7 @@ func ProcessJoins(ruleId int64, trigger models.Trigger, seriesTagIndexes map[str
// 有 join 条件,按条件依次合并
if len(seriesTagIndexes) < len(trigger.Joins)+1 {
logger.Errorf("alert_eval_%d queries' count: %d not match join condition's count: %d", ruleId, len(seriesTagIndexes), len(trigger.Joins))
logger.Errorf("rule_eval rid:%d queries' count: %d not match join condition's count: %d", ruleId, len(seriesTagIndexes), len(trigger.Joins))
return nil
}
@@ -1156,7 +1156,7 @@ func ProcessJoins(ruleId int64, trigger models.Trigger, seriesTagIndexes map[str
lastRehashed = exclude(curRehashed, lastRehashed)
last = flatten(lastRehashed)
default:
logger.Warningf("alert_eval_%d join type:%s not support", ruleId, trigger.Joins[i].JoinType)
logger.Warningf("rule_eval rid:%d join type:%s not support", ruleId, trigger.Joins[i].JoinType)
}
}
return last
@@ -1276,7 +1276,7 @@ func (arw *AlertRuleWorker) VarFillingBeforeQuery(query models.PromQuery, reader
// 得到参数变量的所有组合
paramPermutation, err := arw.getParamPermutation(param, ParamKeys, varToLabel, query.PromQl, readerClient)
if err != nil {
logger.Errorf("alert_eval_%d datasource_%d paramPermutation error:%v", arw.Rule.Id, arw.DatasourceId, err)
logger.Errorf("rule_eval:%s, paramPermutation error:%v", arw.Key(), err)
continue
}
@@ -1304,10 +1304,10 @@ func (arw *AlertRuleWorker) VarFillingBeforeQuery(query models.PromQuery, reader
arw.Processor.Stats.CounterQueryDataTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId), fmt.Sprintf("%d", arw.Rule.Id)).Inc()
value, _, err := readerClient.Query(context.Background(), promql, time.Now())
if err != nil {
logger.Errorf("alert_eval_%d datasource_%d promql:%s, error:%v", arw.Rule.Id, arw.DatasourceId, promql, err)
logger.Errorf("rule_eval:%s, promql:%s, error:%v", arw.Key(), promql, err)
return
}
logger.Infof("alert_eval_%d datasource_%d promql:%s, value:%+v", arw.Rule.Id, arw.DatasourceId, promql, value)
logger.Infof("rule_eval:%s, promql:%s, value:%+v", arw.Key(), promql, value)
points := models.ConvertAnomalyPoints(value)
if len(points) == 0 {
@@ -1446,7 +1446,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
recoverPoints := []models.AnomalyPoint{}
ruleConfig := strings.TrimSpace(rule.RuleConfig)
if ruleConfig == "" {
logger.Warningf("alert_eval_%d datasource_%d ruleConfig is blank", rule.Id, dsId)
logger.Warningf("rule_eval:%d ruleConfig is blank", rule.Id)
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_RULE_CONFIG, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
fmt.Sprintf("%v", arw.Rule.Id),
@@ -1454,15 +1454,15 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
"",
).Set(0)
return points, recoverPoints, fmt.Errorf("alert_eval_%d datasource_%d ruleConfig is blank", rule.Id, dsId)
return points, recoverPoints, fmt.Errorf("rule_eval:%d ruleConfig is blank", rule.Id)
}
var ruleQuery models.RuleQuery
err := json.Unmarshal([]byte(ruleConfig), &ruleQuery)
if err != nil {
logger.Warningf("alert_eval_%d datasource_%d promql parse error:%s", rule.Id, dsId, err.Error())
logger.Warningf("rule_eval:%d promql parse error:%s", rule.Id, err.Error())
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_RULE_CONFIG, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
return points, recoverPoints, fmt.Errorf("alert_eval_%d datasource_%d promql parse error:%s", rule.Id, dsId, err.Error())
return points, recoverPoints, fmt.Errorf("rule_eval:%d promql parse error:%s", rule.Id, err.Error())
}
arw.Inhibit = ruleQuery.Inhibit
@@ -1474,7 +1474,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
plug, exists := dscache.DsCache.Get(rule.Cate, dsId)
if !exists {
logger.Warningf("alert_eval_%d datasource_%d not exists", rule.Id, dsId)
logger.Warningf("rule_eval rid:%d datasource:%d not exists", rule.Id, dsId)
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_CLIENT, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
@@ -1483,11 +1483,11 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
fmt.Sprintf("%v", i),
).Set(-2)
return points, recoverPoints, fmt.Errorf("alert_eval_%d datasource_%d not exists", rule.Id, dsId)
return points, recoverPoints, fmt.Errorf("rule_eval:%d datasource:%d not exists", rule.Id, dsId)
}
if err = ExecuteQueryTemplate(rule.Cate, query, nil); err != nil {
logger.Warningf("alert_eval_%d datasource_%d execute query template error: %v", rule.Id, dsId, err)
logger.Warningf("rule_eval rid:%d execute query template error: %v", rule.Id, err)
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), EXEC_TEMPLATE, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
fmt.Sprintf("%v", arw.Rule.Id),
@@ -1500,7 +1500,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
series, err := plug.QueryData(ctx, query)
arw.Processor.Stats.CounterQueryDataTotal.WithLabelValues(fmt.Sprintf("%d", arw.DatasourceId), fmt.Sprintf("%d", rule.Id)).Inc()
if err != nil {
logger.Warningf("alert_eval_%d datasource_%d query data error: %v", rule.Id, dsId, err)
logger.Warningf("rule_eval rid:%d query data error: %v", rule.Id, err)
arw.Processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.Processor.DatasourceId()), GET_CLIENT, arw.Processor.BusiGroupCache.GetNameByBusiGroupId(arw.Rule.GroupId), fmt.Sprintf("%v", arw.Rule.Id)).Inc()
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
fmt.Sprintf("%v", arw.Rule.Id),
@@ -1508,7 +1508,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
fmt.Sprintf("%v", i),
).Set(-1)
return points, recoverPoints, fmt.Errorf("alert_eval_%d datasource_%d query data error: %v", rule.Id, dsId, err)
return points, recoverPoints, fmt.Errorf("rule_eval:%d query data error: %v", rule.Id, err)
}
arw.Processor.Stats.GaugeQuerySeriesCount.WithLabelValues(
@@ -1518,7 +1518,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
).Set(float64(len(series)))
// 此条日志很重要,是告警判断的现场值
logger.Infof("alert_eval_%d datasource_%d req:%+v resp:%v", rule.Id, dsId, query, series)
logger.Infof("rule_eval rid:%d req:%+v resp:%v", rule.Id, query, series)
for i := 0; i < len(series); i++ {
seriesHash := hash.GetHash(series[i].Metric, series[i].Ref)
tagHash := hash.GetTagHash(series[i].Metric)
@@ -1532,7 +1532,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
}
ref, err := GetQueryRef(query)
if err != nil {
logger.Warningf("alert_eval_%d datasource_%d query:%+v get ref error:%s", rule.Id, dsId, query, err.Error())
logger.Warningf("rule_eval rid:%d query:%+v get ref error:%s", rule.Id, query, err.Error())
continue
}
seriesTagIndexes[ref] = seriesTagIndex
@@ -1542,7 +1542,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
for _, query := range ruleQuery.Queries {
ref, unit, err := GetQueryRefAndUnit(query)
if err != nil {
logger.Warningf("alert_eval_%d datasource_%d query:%+v get ref and unit error:%s", rule.Id, dsId, query, err.Error())
logger.Warningf("rule_eval rid:%d query:%+v get ref and unit error:%s", rule.Id, query, err.Error())
continue
}
unitMap[ref] = unit
@@ -1565,12 +1565,12 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
for _, seriesHash := range seriesHash {
series, exists := seriesStore[seriesHash]
if !exists {
logger.Warningf("alert_eval_%d datasource_%d series:%+v not found", rule.Id, dsId, series)
logger.Warningf("rule_eval rid:%d series:%+v not found", rule.Id, series)
continue
}
t, v, exists := series.Last()
if !exists {
logger.Warningf("alert_eval_%d datasource_%d series:%+v value not found", rule.Id, dsId, series)
logger.Warningf("rule_eval rid:%d series:%+v value not found", rule.Id, series)
continue
}
@@ -1601,12 +1601,12 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
ts = int64(t)
sample = series
value = v
logger.Infof("alert_eval_%d datasource_%d origin series labels:%+v", rule.Id, dsId, series.Metric)
logger.Infof("rule_eval rid:%d origin series labels:%+v", rule.Id, series.Metric)
}
isTriggered := parser.CalcWithRid(trigger.Exp, m, rule.Id)
// 此条日志很重要,是告警判断的现场值
logger.Infof("alert_eval_%d datasource_%d trigger:%+v exp:%s res:%v m:%v", rule.Id, dsId, trigger, trigger.Exp, isTriggered, m)
logger.Infof("rule_eval rid:%d trigger:%+v exp:%s res:%v m:%v", rule.Id, trigger, trigger.Exp, isTriggered, m)
var values string
for k, v := range m {
@@ -1679,7 +1679,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
// 检查是否超过 resolve_after 时间
if now-int64(lastTs) > int64(ruleQuery.NodataTrigger.ResolveAfter) {
logger.Infof("alert_eval_%d datasource_%d series:%+v resolve after %d seconds now:%d lastTs:%d", rule.Id, dsId, lastSeries, ruleQuery.NodataTrigger.ResolveAfter, now, int64(lastTs))
logger.Infof("rule_eval rid:%d series:%+v resolve after %d seconds now:%d lastTs:%d", rule.Id, lastSeries, ruleQuery.NodataTrigger.ResolveAfter, now, int64(lastTs))
delete(arw.LastSeriesStore, hash)
continue
}
@@ -1700,7 +1700,7 @@ func (arw *AlertRuleWorker) GetAnomalyPoint(rule *models.AlertRule, dsId int64)
TriggerType: models.TriggerTypeNodata,
}
points = append(points, point)
logger.Infof("alert_eval_%d datasource_%d nodata point:%+v", rule.Id, dsId, point)
logger.Infof("rule_eval rid:%d nodata point:%+v", rule.Id, point)
}
}

View File

@@ -41,28 +41,8 @@ func IsMuted(rule *models.AlertRule, event *models.AlertCurEvent, targetCache *m
// TimeSpanMuteStrategy 根据规则配置的告警生效时间段过滤,如果产生的告警不在规则配置的告警生效时间段内,则不告警,即被mute
// 时间范围左闭右开默认范围00:00-24:00
// 如果规则配置了时区,则在该时区下进行时间判断;如果时区为空,则使用系统时区
func TimeSpanMuteStrategy(rule *models.AlertRule, event *models.AlertCurEvent) bool {
// 确定使用的时区
var targetLoc *time.Location
var err error
timezone := rule.TimeZone
if timezone == "" {
// 如果时区为空,使用系统时区(保持原有逻辑)
targetLoc = time.Local
} else {
// 加载规则配置的时区
targetLoc, err = time.LoadLocation(timezone)
if err != nil {
// 如果时区加载失败,记录错误并使用系统时区
logger.Warningf("Failed to load timezone %s for rule %d, using system timezone: %v", timezone, rule.Id, err)
targetLoc = time.Local
}
}
// 将触发时间转换到目标时区
tm := time.Unix(event.TriggerTime, 0).In(targetLoc)
tm := time.Unix(event.TriggerTime, 0)
triggerTime := tm.Format("15:04")
triggerWeek := strconv.Itoa(int(tm.Weekday()))
@@ -122,7 +102,7 @@ func IdentNotExistsMuteStrategy(rule *models.AlertRule, event *models.AlertCurEv
// 如果是target_up的告警,且ident已经不存在了,直接过滤掉
// 这里的判断有点太粗暴了,但是目前没有更好的办法
if !exists && strings.Contains(rule.PromQl, "target_up") {
logger.Debugf("alert_eval_%d [IdentNotExistsMuteStrategy] mute: cluster:%s ident:%s", rule.Id, event.Cluster, ident)
logger.Debugf("[%s] mute: rule_eval:%d cluster:%s ident:%s", "IdentNotExistsMuteStrategy", rule.Id, event.Cluster, ident)
return true
}
return false
@@ -144,7 +124,7 @@ func BgNotMatchMuteStrategy(rule *models.AlertRule, event *models.AlertCurEvent,
// 对于包含ident的告警事件check一下ident所属bg和rule所属bg是否相同
// 如果告警规则选择了只在本BG生效那其他BG的机器就不能因此规则产生告警
if exists && !target.MatchGroupId(rule.GroupId) {
logger.Debugf("alert_eval_%d [BgNotMatchMuteStrategy] mute: cluster:%s", rule.Id, event.Cluster)
logger.Debugf("[%s] mute: rule_eval:%d cluster:%s", "BgNotMatchMuteStrategy", rule.Id, event.Cluster)
return true
}
return false

View File

@@ -55,7 +55,7 @@ func (c *EventDropConfig) Process(ctx *ctx.Context, wfCtx *models.WorkflowContex
logger.Infof("processor eventdrop result: %v", result)
if result == "true" {
wfCtx.Event = nil
logger.Infof("processor eventdrop drop event: %s", event.Hash)
logger.Infof("processor eventdrop drop event: %v", event)
return wfCtx, "drop event success", nil
}

View File

@@ -131,7 +131,7 @@ func (p *Processor) Handle(anomalyPoints []models.AnomalyPoint, from string, inh
p.inhibit = inhibit
cachedRule := p.alertRuleCache.Get(p.rule.Id)
if cachedRule == nil {
logger.Warningf("alert_eval_%d datasource_%d handle error: rule not found, maybe rule has been deleted, anomalyPoints:%+v", p.rule.Id, p.datasourceId, anomalyPoints)
logger.Warningf("process handle error: rule not found %+v rule_id:%d maybe rule has been deleted", anomalyPoints, p.rule.Id)
p.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", p.DatasourceId()), "handle_event", p.BusiGroupCache.GetNameByBusiGroupId(p.rule.GroupId), fmt.Sprintf("%v", p.rule.Id)).Inc()
return
}
@@ -156,14 +156,14 @@ func (p *Processor) Handle(anomalyPoints []models.AnomalyPoint, from string, inh
eventCopy := event.DeepCopy()
event = dispatch.HandleEventPipeline(cachedRule.PipelineConfigs, eventCopy, event, dispatch.EventProcessorCache, p.ctx, cachedRule.Id, "alert_rule")
if event == nil {
logger.Infof("alert_eval_%d datasource_%d is muted drop by pipeline event:%s", p.rule.Id, p.datasourceId, eventCopy.Hash)
logger.Infof("rule_eval:%s is muted drop by pipeline event:%v", p.Key(), eventCopy)
continue
}
// event mute
isMuted, detail, muteId := mute.IsMuted(cachedRule, event, p.TargetCache, p.alertMuteCache)
if isMuted {
logger.Infof("alert_eval_%d datasource_%d is muted, detail:%s event:%s", p.rule.Id, p.datasourceId, detail, event.Hash)
logger.Infof("rule_eval:%s is muted, detail:%s event:%v", p.Key(), detail, event)
p.Stats.CounterMuteTotal.WithLabelValues(
fmt.Sprintf("%v", event.GroupName),
fmt.Sprintf("%v", p.rule.Id),
@@ -174,7 +174,7 @@ func (p *Processor) Handle(anomalyPoints []models.AnomalyPoint, from string, inh
}
if dispatch.EventMuteHook(event) {
logger.Infof("alert_eval_%d datasource_%d is muted by hook event:%s", p.rule.Id, p.datasourceId, event.Hash)
logger.Infof("rule_eval:%s is muted by hook event:%v", p.Key(), event)
p.Stats.CounterMuteTotal.WithLabelValues(
fmt.Sprintf("%v", event.GroupName),
fmt.Sprintf("%v", p.rule.Id),
@@ -247,7 +247,7 @@ func (p *Processor) BuildEvent(anomalyPoint models.AnomalyPoint, from string, no
if err := json.Unmarshal([]byte(p.rule.Annotations), &event.AnnotationsJSON); err != nil {
event.AnnotationsJSON = make(map[string]string) // 解析失败时使用空 map
logger.Warningf("alert_eval_%d datasource_%d unmarshal annotations json failed: %v", p.rule.Id, p.datasourceId, err)
logger.Warningf("unmarshal annotations json failed: %v, rule: %d", err, p.rule.Id)
}
if event.TriggerValues != "" && strings.Count(event.TriggerValues, "$") > 1 {
@@ -272,7 +272,7 @@ func (p *Processor) BuildEvent(anomalyPoint models.AnomalyPoint, from string, no
pt.GroupNames = p.BusiGroupCache.GetNamesByBusiGroupIds(pt.GroupIds)
event.Target = pt
} else {
logger.Infof("alert_eval_%d datasource_%d fill event target error, ident: %s doesn't exist in cache.", p.rule.Id, p.datasourceId, event.TargetIdent)
logger.Infof("fill event target error, ident: %s doesn't exist in cache.", event.TargetIdent)
}
}
@@ -371,19 +371,19 @@ func (p *Processor) RecoverSingle(byRecover bool, hash string, now int64, value
lastPendingEvent, has := p.pendingsUseByRecover.Get(hash)
if !has {
// 说明没有产生过异常点,就不需要恢复了
logger.Debugf("alert_eval_%d datasource_%d event:%s do not has pending event, not recover", p.rule.Id, p.datasourceId, event.Hash)
logger.Debugf("rule_eval:%s event:%v do not has pending event, not recover", p.Key(), event)
return
}
if now-lastPendingEvent.LastEvalTime < cachedRule.RecoverDuration {
logger.Debugf("alert_eval_%d datasource_%d event:%s not recover", p.rule.Id, p.datasourceId, event.Hash)
logger.Debugf("rule_eval:%s event:%v not recover", p.Key(), event)
return
}
}
// 如果设置了恢复条件,则不能在此处恢复,必须依靠 recoverPoint 来恢复
if event.RecoverConfig.JudgeType != models.Origin && !byRecover {
logger.Debugf("alert_eval_%d datasource_%d event:%s not recover", p.rule.Id, p.datasourceId, event.Hash)
logger.Debugf("rule_eval:%s event:%v not recover", p.Key(), event)
return
}
@@ -460,7 +460,7 @@ func (p *Processor) handleEvent(events []*models.AlertCurEvent) {
func (p *Processor) inhibitEvent(events []*models.AlertCurEvent, highSeverity int) {
for _, event := range events {
if p.inhibit && event.Severity > highSeverity {
logger.Debugf("alert_eval_%d datasource_%d event:%s inhibit highSeverity:%d", p.rule.Id, p.datasourceId, event.Hash, highSeverity)
logger.Debugf("rule_eval:%s event:%+v inhibit highSeverity:%d", p.Key(), event, highSeverity)
continue
}
p.fireEvent(event)
@@ -476,7 +476,7 @@ func (p *Processor) fireEvent(event *models.AlertCurEvent) {
message := "unknown"
defer func() {
logger.Infof("alert_eval_%d datasource_%d event-hash-%s %s", p.rule.Id, p.datasourceId, event.Hash, message)
logger.Infof("rule_eval:%s event-hash-%s %s", p.Key(), event.Hash, message)
}()
if fired, has := p.fires.Get(event.Hash); has {
@@ -527,7 +527,7 @@ func (p *Processor) pushEventToQueue(e *models.AlertCurEvent) {
dispatch.LogEvent(e, "push_queue")
if !queue.EventQueue.PushFront(e) {
logger.Warningf("alert_eval_%d datasource_%d event_push_queue: queue is full, event:%s", p.rule.Id, p.datasourceId, e.Hash)
logger.Warningf("event_push_queue: queue is full, event:%+v", e)
p.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", p.DatasourceId()), "push_event_queue", p.BusiGroupCache.GetNameByBusiGroupId(p.rule.GroupId), fmt.Sprintf("%v", p.rule.Id)).Inc()
}
}
@@ -538,7 +538,7 @@ func (p *Processor) RecoverAlertCurEventFromDb() {
curEvents, err := models.AlertCurEventGetByRuleIdAndDsId(p.ctx, p.rule.Id, p.datasourceId)
if err != nil {
logger.Errorf("alert_eval_%d datasource_%d recover event from db failed, err:%s", p.rule.Id, p.datasourceId, err)
logger.Errorf("recover event from db for rule:%s failed, err:%s", p.Key(), err)
p.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", p.DatasourceId()), "get_recover_event", p.BusiGroupCache.GetNameByBusiGroupId(p.rule.GroupId), fmt.Sprintf("%v", p.rule.Id)).Inc()
p.fires = NewAlertCurEventMap(nil)
return

View File

@@ -22,11 +22,10 @@ type Router struct {
AlertStats *astats.Stats
Ctx *ctx.Context
ExternalProcessors *process.ExternalProcessorsType
LogDir string
}
func New(httpConfig httpx.Config, alert aconf.Alert, amc *memsto.AlertMuteCacheType, tc *memsto.TargetCacheType, bgc *memsto.BusiGroupCacheType,
astats *astats.Stats, ctx *ctx.Context, externalProcessors *process.ExternalProcessorsType, logDir string) *Router {
astats *astats.Stats, ctx *ctx.Context, externalProcessors *process.ExternalProcessorsType) *Router {
return &Router{
HTTP: httpConfig,
Alert: alert,
@@ -36,7 +35,6 @@ func New(httpConfig httpx.Config, alert aconf.Alert, amc *memsto.AlertMuteCacheT
AlertStats: astats,
Ctx: ctx,
ExternalProcessors: externalProcessors,
LogDir: logDir,
}
}
@@ -52,9 +50,6 @@ func (rt *Router) Config(r *gin.Engine) {
service.POST("/event", rt.pushEventToQueue)
service.POST("/event-persist", rt.eventPersist)
service.POST("/make-event", rt.makeEvent)
service.GET("/event-detail/:hash", rt.eventDetail)
service.GET("/alert-eval-detail/:id", rt.alertEvalDetail)
service.GET("/trace-logs/:traceid", rt.traceLogs)
}
func Render(c *gin.Context, data, msg interface{}) {

View File

@@ -1,28 +0,0 @@
package router
import (
"fmt"
"github.com/ccfos/nightingale/v6/pkg/loggrep"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
)
func (rt *Router) alertEvalDetail(c *gin.Context) {
id := ginx.UrlParamStr(c, "id")
if !loggrep.IsValidRuleID(id) {
ginx.Bomb(200, "invalid rule id format")
}
instance := fmt.Sprintf("%s:%d", rt.Alert.Heartbeat.IP, rt.HTTP.Port)
keyword := fmt.Sprintf("alert_eval_%s", id)
logs, err := loggrep.GrepLogDir(rt.LogDir, keyword)
ginx.Dangerous(err)
ginx.NewRender(c).Data(loggrep.EventDetailResp{
Logs: logs,
Instance: instance,
}, nil)
}

View File

@@ -13,9 +13,9 @@ import (
"github.com/ccfos/nightingale/v6/alert/queue"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/poster"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
@@ -75,7 +75,7 @@ func (rt *Router) pushEventToQueue(c *gin.Context) {
dispatch.LogEvent(event, "http_push_queue")
if !queue.EventQueue.PushFront(event) {
msg := fmt.Sprintf("event:%s push_queue err: queue is full", event.Hash)
msg := fmt.Sprintf("event:%+v push_queue err: queue is full", event)
ginx.Bomb(200, msg)
logger.Warningf(msg)
}
@@ -105,21 +105,21 @@ func (rt *Router) makeEvent(c *gin.Context) {
for i := 0; i < len(events); i++ {
node, err := naming.DatasourceHashRing.GetNode(strconv.FormatInt(events[i].DatasourceId, 10), fmt.Sprintf("%d", events[i].RuleId))
if err != nil {
logger.Warningf("event(rule_id=%d ds_id=%d) get node err:%v", events[i].RuleId, events[i].DatasourceId, err)
logger.Warningf("event:%+v get node err:%v", events[i], err)
ginx.Bomb(200, "event node not exists")
}
if node != rt.Alert.Heartbeat.Endpoint {
err := forwardEvent(events[i], node)
if err != nil {
logger.Warningf("event(rule_id=%d ds_id=%d) forward err:%v", events[i].RuleId, events[i].DatasourceId, err)
logger.Warningf("event:%+v forward err:%v", events[i], err)
ginx.Bomb(200, "event forward error")
}
continue
}
ruleWorker, exists := rt.ExternalProcessors.GetExternalAlertRule(events[i].DatasourceId, events[i].RuleId)
logger.Debugf("handle event(rule_id=%d ds_id=%d) exists:%v", events[i].RuleId, events[i].DatasourceId, exists)
logger.Debugf("handle event:%+v exists:%v", events[i], exists)
if !exists {
ginx.Bomb(200, "rule not exists")
}
@@ -143,6 +143,6 @@ func forwardEvent(event *eventForm, instance string) error {
if err != nil {
return err
}
logger.Infof("forward event: result=succ url=%s code=%d rule_id=%d response=%s", ur, code, event.RuleId, string(res))
logger.Infof("forward event: result=succ url=%s code=%d event:%v response=%s", ur, code, event, string(res))
return nil
}

View File

@@ -1,27 +0,0 @@
package router
import (
"fmt"
"github.com/ccfos/nightingale/v6/pkg/loggrep"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
)
func (rt *Router) eventDetail(c *gin.Context) {
hash := ginx.UrlParamStr(c, "hash")
if !loggrep.IsValidHash(hash) {
ginx.Bomb(200, "invalid hash format")
}
instance := fmt.Sprintf("%s:%d", rt.Alert.Heartbeat.IP, rt.HTTP.Port)
logs, err := loggrep.GrepLogDir(rt.LogDir, hash)
ginx.Dangerous(err)
ginx.NewRender(c).Data(loggrep.EventDetailResp{
Logs: logs,
Instance: instance,
}, nil)
}

View File

@@ -1,28 +0,0 @@
package router
import (
"fmt"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/ccfos/nightingale/v6/pkg/loggrep"
"github.com/gin-gonic/gin"
)
func (rt *Router) traceLogs(c *gin.Context) {
traceId := ginx.UrlParamStr(c, "traceid")
if !loggrep.IsValidTraceID(traceId) {
ginx.Bomb(200, "invalid trace id format")
}
instance := fmt.Sprintf("%s:%d", rt.Alert.Heartbeat.IP, rt.HTTP.Port)
keyword := "trace_id=" + traceId
logs, err := loggrep.GrepLatestLogFiles(rt.LogDir, keyword)
ginx.Dangerous(err)
ginx.NewRender(c).Data(loggrep.EventDetailResp{
Logs: logs,
Instance: instance,
}, nil)
}

View File

@@ -205,6 +205,6 @@ func PushCallbackEvent(ctx *ctx.Context, webhook *models.Webhook, event *models.
succ := queue.eventQueue.Push(event)
if !succ {
logger.Warningf("Write channel(%s) full, current channel size: %d event:%s", webhook.Url, queue.eventQueue.Len(), event.Hash)
logger.Warningf("Write channel(%s) full, current channel size: %d event:%v", webhook.Url, queue.eventQueue.Len(), event)
}
}

View File

@@ -30,14 +30,14 @@ type IbexCallBacker struct {
func (c *IbexCallBacker) CallBack(ctx CallBackContext) {
if len(ctx.CallBackURL) == 0 || len(ctx.Events) == 0 {
logger.Warningf("event_callback_ibex: url or events is empty, url: %s", ctx.CallBackURL)
logger.Warningf("event_callback_ibex: url or events is empty, url: %s, events: %+v", ctx.CallBackURL, ctx.Events)
return
}
event := ctx.Events[0]
if event.IsRecovered {
logger.Infof("event_callback_ibex: event is recovered, event: %s", event.Hash)
logger.Infof("event_callback_ibex: event is recovered, event: %+v", event)
return
}
@@ -45,9 +45,9 @@ func (c *IbexCallBacker) CallBack(ctx CallBackContext) {
}
func (c *IbexCallBacker) handleIbex(ctx *ctx.Context, url string, event *models.AlertCurEvent) {
logger.Infof("event_callback_ibex: url: %s, event: %s", url, event.Hash)
logger.Infof("event_callback_ibex: url: %s, event: %+v", url, event)
if imodels.DB() == nil && ctx.IsCenter {
logger.Warningf("event_callback_ibex: db is nil, event: %s", event.Hash)
logger.Warningf("event_callback_ibex: db is nil, event: %+v", event)
return
}
@@ -66,7 +66,7 @@ func (c *IbexCallBacker) handleIbex(ctx *ctx.Context, url string, event *models.
id, err := strconv.ParseInt(idstr, 10, 64)
if err != nil {
logger.Errorf("event_callback_ibex: failed to parse url: %s event: %s", url, event.Hash)
logger.Errorf("event_callback_ibex: failed to parse url: %s event: %+v", url, event)
return
}
@@ -82,7 +82,7 @@ func (c *IbexCallBacker) handleIbex(ctx *ctx.Context, url string, event *models.
}
if host == "" {
logger.Errorf("event_callback_ibex: failed to get host, id: %d, event: %s", id, event.Hash)
logger.Errorf("event_callback_ibex: failed to get host, id: %d, event: %+v", id, event)
return
}
@@ -92,11 +92,11 @@ func (c *IbexCallBacker) handleIbex(ctx *ctx.Context, url string, event *models.
func CallIbex(ctx *ctx.Context, id int64, host string,
taskTplCache *memsto.TaskTplCache, targetCache *memsto.TargetCacheType,
userCache *memsto.UserCacheType, event *models.AlertCurEvent, args string) (int64, error) {
logger.Infof("event_callback_ibex: id: %d, host: %s, args: %s, event: %s", id, host, args, event.Hash)
logger.Infof("event_callback_ibex: id: %d, host: %s, args: %s, event: %+v", id, host, args, event)
tpl := taskTplCache.Get(id)
if tpl == nil {
err := fmt.Errorf("event_callback_ibex: no such tpl(%d), event: %s", id, event.Hash)
err := fmt.Errorf("event_callback_ibex: no such tpl(%d), event: %+v", id, event)
logger.Errorf("%s", err)
return 0, err
}
@@ -104,13 +104,13 @@ func CallIbex(ctx *ctx.Context, id int64, host string,
// tpl.GroupId - host - account 三元组校验权限
can, err := CanDoIbex(tpl.UpdateBy, tpl, host, targetCache, userCache)
if err != nil {
err = fmt.Errorf("event_callback_ibex: check perm fail: %v, event: %s", err, event.Hash)
err = fmt.Errorf("event_callback_ibex: check perm fail: %v, event: %+v", err, event)
logger.Errorf("%s", err)
return 0, err
}
if !can {
err = fmt.Errorf("event_callback_ibex: user(%s) no permission, event: %s", tpl.UpdateBy, event.Hash)
err = fmt.Errorf("event_callback_ibex: user(%s) no permission, event: %+v", tpl.UpdateBy, event)
logger.Errorf("%s", err)
return 0, err
}
@@ -136,7 +136,7 @@ func CallIbex(ctx *ctx.Context, id int64, host string,
tags, err := json.Marshal(tagsMap)
if err != nil {
err = fmt.Errorf("event_callback_ibex: failed to marshal tags to json: %v, event: %s", tagsMap, event.Hash)
err = fmt.Errorf("event_callback_ibex: failed to marshal tags to json: %v, event: %+v", tagsMap, event)
logger.Errorf("%s", err)
return 0, err
}
@@ -164,7 +164,7 @@ func CallIbex(ctx *ctx.Context, id int64, host string,
id, err = TaskAdd(in, tpl.UpdateBy, ctx.IsCenter)
if err != nil {
err = fmt.Errorf("event_callback_ibex: call ibex fail: %v, event: %s", err, event.Hash)
err = fmt.Errorf("event_callback_ibex: call ibex fail: %v, event: %+v", err, event)
logger.Errorf("%s", err)
return 0, err
}
@@ -187,7 +187,7 @@ func CallIbex(ctx *ctx.Context, id int64, host string,
}
if err = record.Add(ctx); err != nil {
err = fmt.Errorf("event_callback_ibex: persist task_record fail: %v, event: %s", err, event.Hash)
err = fmt.Errorf("event_callback_ibex: persist task_record fail: %v, event: %+v", err, event)
logger.Errorf("%s", err)
return id, err
}

View File

@@ -72,7 +72,7 @@ func sendWebhook(webhook *models.Webhook, event interface{}, stats *astats.Stats
}
bs, err := json.Marshal(event)
if err != nil {
logger.Errorf("%s alertingWebhook failed to marshal event err:%v", channel, err)
logger.Errorf("%s alertingWebhook failed to marshal event:%+v err:%v", channel, event, err)
return false, "", err
}
@@ -145,7 +145,7 @@ func SingleSendWebhooks(ctx *ctx.Context, webhooks map[string]*models.Webhook, e
func BatchSendWebhooks(ctx *ctx.Context, webhooks map[string]*models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
for _, conf := range webhooks {
logger.Infof("push event:%s to queue:%v", event.Hash, conf)
logger.Infof("push event:%+v to queue:%v", event, conf)
PushEvent(ctx, conf, event, stats)
}
}
@@ -183,7 +183,7 @@ func PushEvent(ctx *ctx.Context, webhook *models.Webhook, event *models.AlertCur
succ := queue.eventQueue.Push(event)
if !succ {
stats.AlertNotifyErrorTotal.WithLabelValues("push_event_queue").Inc()
logger.Warningf("Write channel(%s) full, current channel size: %d event:%s", webhook.Url, queue.eventQueue.Len(), event.Hash)
logger.Warningf("Write channel(%s) full, current channel size: %d event:%v", webhook.Url, queue.eventQueue.Len(), event)
}
}

View File

@@ -136,10 +136,10 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
go cron.CleanNotifyRecord(ctx, config.Center.CleanNotifyRecordDay)
go cron.CleanPipelineExecution(ctx, config.Center.CleanPipelineExecutionDay)
alertrtRouter := alertrt.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors, config.Log.Dir)
alertrtRouter := alertrt.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors)
centerRouter := centerrt.New(config.HTTP, config.Center, config.Alert, config.Ibex,
cconf.Operations, dsCache, notifyConfigCache, promClients,
redis, sso, ctx, metas, idents, targetCache, userCache, userGroupCache, userTokenCache, config.Log.Dir)
redis, sso, ctx, metas, idents, targetCache, userCache, userGroupCache, userTokenCache)
pushgwRouter := pushgwrt.New(config.HTTP, config.Pushgw, config.Alert, targetCache, busiGroupCache, idents, metas, writers, ctx)
r := httpx.GinEngine(config.Global.RunMode, config.HTTP, configCvalCache.PrintBodyPaths, configCvalCache.PrintAccessLog)

View File

@@ -24,11 +24,11 @@ import (
"github.com/ccfos/nightingale/v6/prom"
"github.com/ccfos/nightingale/v6/pushgw/idents"
"github.com/ccfos/nightingale/v6/storage"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"gorm.io/gorm"
"github.com/gin-gonic/gin"
"github.com/rakyll/statik/fs"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/runner"
)
@@ -51,7 +51,6 @@ type Router struct {
UserGroupCache *memsto.UserGroupCacheType
UserTokenCache *memsto.UserTokenCacheType
Ctx *ctx.Context
LogDir string
HeartbeatHook HeartbeatHookFunc
TargetDeleteHook models.TargetDeleteHookFunc
@@ -62,7 +61,7 @@ func New(httpConfig httpx.Config, center cconf.Center, alert aconf.Alert, ibex c
operations cconf.Operation, ds *memsto.DatasourceCacheType, ncc *memsto.NotifyConfigCacheType,
pc *prom.PromClientMap, redis storage.Redis,
sso *sso.SsoClient, ctx *ctx.Context, metaSet *metas.Set, idents *idents.Set,
tc *memsto.TargetCacheType, uc *memsto.UserCacheType, ugc *memsto.UserGroupCacheType, utc *memsto.UserTokenCacheType, logDir string) *Router {
tc *memsto.TargetCacheType, uc *memsto.UserCacheType, ugc *memsto.UserGroupCacheType, utc *memsto.UserTokenCacheType) *Router {
return &Router{
HTTP: httpConfig,
Center: center,
@@ -81,7 +80,6 @@ func New(httpConfig httpx.Config, center cconf.Center, alert aconf.Alert, ibex c
UserGroupCache: ugc,
UserTokenCache: utc,
Ctx: ctx,
LogDir: logDir,
HeartbeatHook: func(ident string) map[string]interface{} { return nil },
TargetDeleteHook: func(tx *gorm.DB, idents []string) error { return nil },
AlertRuleModifyHook: func(ar *models.AlertRule) {},
@@ -370,7 +368,6 @@ func (rt *Router) Config(r *gin.Engine) {
// pages.GET("/alert-rules/builtin/alerts-cates", rt.auth(), rt.user(), rt.builtinAlertCateGets)
// pages.GET("/alert-rules/builtin/list", rt.auth(), rt.user(), rt.builtinAlertRules)
pages.GET("/alert-rules/callbacks", rt.auth(), rt.user(), rt.alertRuleCallbacks)
pages.GET("/timezones", rt.auth(), rt.user(), rt.timezonesGet)
pages.GET("/busi-groups/alert-rules", rt.auth(), rt.user(), rt.perm("/alert-rules"), rt.alertRuleGetsByGids)
pages.GET("/busi-group/:id/alert-rules", rt.auth(), rt.user(), rt.perm("/alert-rules"), rt.alertRuleGets)
@@ -419,9 +416,6 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/alert-cur-event/:eid", rt.alertCurEventGet)
pages.GET("/alert-his-event/:eid", rt.alertHisEventGet)
pages.GET("/event-notify-records/:eid", rt.notificationRecordList)
pages.GET("/event-detail/:hash", rt.eventDetailPage)
pages.GET("/alert-eval-detail/:id", rt.alertEvalDetailPage)
pages.GET("/trace-logs/:traceid", rt.traceLogsPage)
// card logic
pages.GET("/alert-cur-events/list", rt.auth(), rt.user(), rt.alertCurEventsList)

View File

@@ -4,9 +4,9 @@ import (
"net/http"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
// no param

View File

@@ -10,9 +10,9 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)

View File

@@ -1,168 +0,0 @@
package router
import (
"encoding/json"
"fmt"
"io"
"net/http"
"sort"
"strconv"
"strings"
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/loggrep"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
)
// alertEvalDetailPage renders an HTML log viewer page for alert rule evaluation logs.
func (rt *Router) alertEvalDetailPage(c *gin.Context) {
id := ginx.UrlParamStr(c, "id")
if !loggrep.IsValidRuleID(id) {
c.String(http.StatusBadRequest, "invalid rule id format")
return
}
logs, instance, err := rt.getAlertEvalLogs(id)
if err != nil {
c.String(http.StatusInternalServerError, "Error: %v", err)
return
}
c.Header("Content-Type", "text/html; charset=utf-8")
err = loggrep.RenderAlertEvalHTML(c.Writer, loggrep.AlertEvalPageData{
RuleID: id,
Instance: instance,
Logs: logs,
Total: len(logs),
})
if err != nil {
c.String(http.StatusInternalServerError, "render error: %v", err)
}
}
// alertEvalDetailJSON returns JSON for alert rule evaluation logs.
func (rt *Router) alertEvalDetailJSON(c *gin.Context) {
id := ginx.UrlParamStr(c, "id")
if !loggrep.IsValidRuleID(id) {
ginx.Bomb(200, "invalid rule id format")
}
logs, instance, err := rt.getAlertEvalLogs(id)
ginx.Dangerous(err)
ginx.NewRender(c).Data(loggrep.EventDetailResp{
Logs: logs,
Instance: instance,
}, nil)
}
// getAlertEvalLogs resolves the target instance(s) and retrieves alert eval logs.
func (rt *Router) getAlertEvalLogs(id string) ([]string, string, error) {
ruleId, _ := strconv.ParseInt(id, 10, 64)
rule, err := models.AlertRuleGetById(rt.Ctx, ruleId)
if err != nil {
return nil, "", err
}
if rule == nil {
return nil, "", fmt.Errorf("no such alert rule")
}
instance := fmt.Sprintf("%s:%d", rt.Alert.Heartbeat.IP, rt.HTTP.Port)
keyword := fmt.Sprintf("alert_eval_%s", id)
// Get datasource IDs for this rule
dsIds := rt.DatasourceCache.GetIDsByDsCateAndQueries(rule.Cate, rule.DatasourceQueries)
if len(dsIds) == 0 {
// No datasources found (e.g. host rule), try local grep
logs, err := loggrep.GrepLogDir(rt.LogDir, keyword)
return logs, instance, err
}
// Find unique target nodes via hash ring, with DB fallback
nodeSet := make(map[string]struct{})
for _, dsId := range dsIds {
node, err := rt.getNodeForDatasource(dsId, id)
if err != nil {
continue
}
nodeSet[node] = struct{}{}
}
if len(nodeSet) == 0 {
// Hash ring not ready, grep locally
logs, err := loggrep.GrepLogDir(rt.LogDir, keyword)
return logs, instance, err
}
// Collect logs from all target nodes
var allLogs []string
var instances []string
for node := range nodeSet {
if node == instance {
logs, err := loggrep.GrepLogDir(rt.LogDir, keyword)
if err == nil {
allLogs = append(allLogs, logs...)
instances = append(instances, node)
}
} else {
logs, nodeAddr, err := rt.forwardAlertEvalDetail(node, id)
if err == nil {
allLogs = append(allLogs, logs...)
instances = append(instances, nodeAddr)
}
}
}
// Sort logs by timestamp descending
sort.Slice(allLogs, func(i, j int) bool {
return allLogs[i] > allLogs[j]
})
if len(allLogs) > loggrep.MaxLogLines {
allLogs = allLogs[:loggrep.MaxLogLines]
}
return allLogs, strings.Join(instances, ", "), nil
}
func (rt *Router) forwardAlertEvalDetail(node, id string) ([]string, string, error) {
url := fmt.Sprintf("http://%s/v1/n9e/alert-eval-detail/%s", node, id)
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, node, err
}
for user, pass := range rt.HTTP.APIForService.BasicAuth {
req.SetBasicAuth(user, pass)
break
}
client := &http.Client{Timeout: 15 * time.Second}
resp, err := client.Do(req)
if err != nil {
return nil, node, fmt.Errorf("forward to %s failed: %v", node, err)
}
defer resp.Body.Close()
body, err := io.ReadAll(io.LimitReader(resp.Body, 10*1024*1024)) // 10MB limit
if err != nil {
return nil, node, err
}
var result struct {
Dat loggrep.EventDetailResp `json:"dat"`
Err string `json:"err"`
}
if err := json.Unmarshal(body, &result); err != nil {
return nil, node, err
}
if result.Err != "" {
return nil, node, fmt.Errorf("%s", result.Err)
}
return result.Dat.Logs, result.Dat.Instance, nil
}

View File

@@ -8,9 +8,9 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
"golang.org/x/exp/slices"
)

View File

@@ -16,12 +16,12 @@ import (
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pushgw/pconf"
"github.com/ccfos/nightingale/v6/pushgw/writer"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/jinzhu/copier"
"github.com/pkg/errors"
"github.com/prometheus/prometheus/prompb"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
)
@@ -882,27 +882,3 @@ func (rt *Router) batchAlertRuleClone(c *gin.Context) {
ginx.NewRender(c).Data(reterr, nil)
}
func (rt *Router) timezonesGet(c *gin.Context) {
// 返回常用时区列表(按时差去重,每个时差只保留一个代表性时区)
timezones := []string{
"UTC",
"Asia/Shanghai", // UTC+8 (代表 Asia/Hong_Kong, Asia/Singapore 等)
"Asia/Tokyo", // UTC+9 (代表 Asia/Seoul 等)
"Asia/Dubai", // UTC+4
"Asia/Kolkata", // UTC+5:30
"Asia/Bangkok", // UTC+7 (代表 Asia/Jakarta 等)
"Europe/London", // UTC+0 (代表 UTC)
"Europe/Paris", // UTC+1 (代表 Europe/Berlin, Europe/Rome, Europe/Madrid 等)
"Europe/Moscow", // UTC+3
"America/New_York", // UTC-5 (代表 America/Toronto 等)
"America/Chicago", // UTC-6 (代表 America/Mexico_City 等)
"America/Denver", // UTC-7
"America/Los_Angeles", // UTC-8
"America/Sao_Paulo", // UTC-3
"Australia/Sydney", // UTC+10 (代表 Australia/Melbourne 等)
"Pacific/Auckland", // UTC+12
}
ginx.NewRender(c).Data(timezones, nil)
}

View File

@@ -9,9 +9,9 @@ import (
"github.com/ccfos/nightingale/v6/alert/common"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
)

View File

@@ -7,9 +7,9 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
)

View File

@@ -8,10 +8,10 @@ import (
"strings"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/file"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/runner"
)

View File

@@ -5,9 +5,9 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"gorm.io/gorm"
)

View File

@@ -3,8 +3,8 @@ package router
import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/prom"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) metricFilterGets(c *gin.Context) {

View File

@@ -7,9 +7,9 @@ import (
"github.com/ccfos/nightingale/v6/center/integration"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
)

View File

@@ -9,8 +9,8 @@ import (
"github.com/BurntSushi/toml"
"github.com/ccfos/nightingale/v6/center/integration"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
)

View File

@@ -5,9 +5,9 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)

View File

@@ -5,9 +5,9 @@ import (
"time"
"github.com/ccfos/nightingale/v6/storage"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
captcha "github.com/mojocn/base64Captcha"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)

View File

@@ -5,9 +5,9 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) chartShareGets(c *gin.Context) {

View File

@@ -4,9 +4,9 @@ import (
"encoding/json"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) notifyChannelsGets(c *gin.Context) {

View File

@@ -4,9 +4,9 @@ import (
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
const EMBEDDEDDASHBOARD = "embedded-dashboards"

View File

@@ -2,9 +2,9 @@ package router
import (
"github.com/ccfos/nightingale/v6/pkg/secu"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
type confPropCrypto struct {

View File

@@ -7,9 +7,9 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func checkAnnotationPermission(c *gin.Context, ctx *ctx.Context, dashboardId int64) {

View File

@@ -15,8 +15,8 @@ import (
"github.com/ccfos/nightingale/v6/datasource/opensearch"
"github.com/ccfos/nightingale/v6/dskit/clickhouse"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
"github.com/toolkits/pkg/logger"
)
@@ -276,7 +276,7 @@ func (rt *Router) datasourceUpsert(c *gin.Context) {
}
err = req.Add(rt.Ctx)
} else {
err = req.Update(rt.Ctx, "name", "identifier", "description", "cluster_name", "settings", "http", "auth", "updated_by", "updated_at", "is_default", "weight")
err = req.Update(rt.Ctx, "name", "identifier", "description", "cluster_name", "settings", "http", "auth", "updated_by", "updated_at", "is_default")
}
Render(c, nil, err)

View File

@@ -6,10 +6,10 @@ import (
"github.com/ccfos/nightingale/v6/dscache"
"github.com/ccfos/nightingale/v6/dskit/types"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/ccfos/nightingale/v6/pkg/logx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
func (rt *Router) ShowDatabases(c *gin.Context) {
@@ -18,7 +18,7 @@ func (rt *Router) ShowDatabases(c *gin.Context) {
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logx.Warningf(c.Request.Context(), "cluster:%d not exists", f.DatasourceId)
logger.Warningf("cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
@@ -48,7 +48,7 @@ func (rt *Router) ShowTables(c *gin.Context) {
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logx.Warningf(c.Request.Context(), "cluster:%d not exists", f.DatasourceId)
logger.Warningf("cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
@@ -78,7 +78,7 @@ func (rt *Router) DescribeTable(c *gin.Context) {
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logx.Warningf(c.Request.Context(), "cluster:%d not exists", f.DatasourceId)
logger.Warningf("cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
// 只接受一个入参

View File

@@ -5,9 +5,9 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) embeddedProductGets(c *gin.Context) {

View File

@@ -3,10 +3,10 @@ package router
import (
"github.com/ccfos/nightingale/v6/datasource/es"
"github.com/ccfos/nightingale/v6/dscache"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/ccfos/nightingale/v6/pkg/logx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
type IndexReq struct {
@@ -34,7 +34,7 @@ func (rt *Router) QueryIndices(c *gin.Context) {
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logx.Warningf(c.Request.Context(), "cluster:%d not exists", f.DatasourceId)
logger.Warningf("cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
@@ -50,7 +50,7 @@ func (rt *Router) QueryFields(c *gin.Context) {
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logx.Warningf(c.Request.Context(), "cluster:%d not exists", f.DatasourceId)
logger.Warningf("cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
@@ -66,7 +66,7 @@ func (rt *Router) QueryESVariable(c *gin.Context) {
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logx.Warningf(c.Request.Context(), "cluster:%d not exists", f.DatasourceId)
logger.Warningf("cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}

View File

@@ -5,8 +5,8 @@ import (
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
// 创建 ES Index Pattern

View File

@@ -1,149 +0,0 @@
package router
import (
"encoding/json"
"fmt"
"io"
"net/http"
"strconv"
"time"
"github.com/ccfos/nightingale/v6/alert/naming"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/loggrep"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
)
// eventDetailPage renders an HTML log viewer page (for pages group).
func (rt *Router) eventDetailPage(c *gin.Context) {
hash := ginx.UrlParamStr(c, "hash")
if !loggrep.IsValidHash(hash) {
c.String(http.StatusBadRequest, "invalid hash format")
return
}
logs, instance, err := rt.getEventLogs(hash)
if err != nil {
c.String(http.StatusInternalServerError, "Error: %v", err)
return
}
c.Header("Content-Type", "text/html; charset=utf-8")
err = loggrep.RenderHTML(c.Writer, loggrep.PageData{
Hash: hash,
Instance: instance,
Logs: logs,
Total: len(logs),
})
if err != nil {
c.String(http.StatusInternalServerError, "render error: %v", err)
}
}
// eventDetailJSON returns JSON (for service group).
func (rt *Router) eventDetailJSON(c *gin.Context) {
hash := ginx.UrlParamStr(c, "hash")
if !loggrep.IsValidHash(hash) {
ginx.Bomb(200, "invalid hash format")
}
logs, instance, err := rt.getEventLogs(hash)
ginx.Dangerous(err)
ginx.NewRender(c).Data(loggrep.EventDetailResp{
Logs: logs,
Instance: instance,
}, nil)
}
// getNodeForDatasource returns the alert engine instance responsible for the given
// datasource and primary key. It first checks the local hashring, and falls back
// to querying the database for active instances if the hashring is empty
// (e.g. when the datasource belongs to another engine cluster).
func (rt *Router) getNodeForDatasource(datasourceId int64, pk string) (string, error) {
dsIdStr := strconv.FormatInt(datasourceId, 10)
node, err := naming.DatasourceHashRing.GetNode(dsIdStr, pk)
if err == nil {
return node, nil
}
// Hashring is empty for this datasource (likely belongs to another engine cluster).
// Query the DB for active instances.
servers, dbErr := models.AlertingEngineGetsInstances(rt.Ctx,
"datasource_id = ? and clock > ?",
datasourceId, time.Now().Unix()-30)
if dbErr != nil {
return "", dbErr
}
if len(servers) == 0 {
return "", fmt.Errorf("no active instances for datasource %d", datasourceId)
}
ring := naming.NewConsistentHashRing(int32(naming.NodeReplicas), servers)
return ring.Get(pk)
}
// getEventLogs resolves the target instance and retrieves logs.
func (rt *Router) getEventLogs(hash string) ([]string, string, error) {
event, err := models.AlertHisEventGetByHash(rt.Ctx, hash)
if err != nil {
return nil, "", err
}
if event == nil {
return nil, "", fmt.Errorf("no such alert event")
}
ruleId := strconv.FormatInt(event.RuleId, 10)
instance := fmt.Sprintf("%s:%d", rt.Alert.Heartbeat.IP, rt.HTTP.Port)
node, err := rt.getNodeForDatasource(event.DatasourceId, ruleId)
if err != nil || node == instance {
// hashring not ready or target is self, handle locally
logs, err := loggrep.GrepLogDir(rt.LogDir, hash)
return logs, instance, err
}
// forward to the target alert instance
return rt.forwardEventDetail(node, hash)
}
func (rt *Router) forwardEventDetail(node, hash string) ([]string, string, error) {
url := fmt.Sprintf("http://%s/v1/n9e/event-detail/%s", node, hash)
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, node, err
}
for user, pass := range rt.HTTP.APIForService.BasicAuth {
req.SetBasicAuth(user, pass)
break
}
client := &http.Client{Timeout: 15 * time.Second}
resp, err := client.Do(req)
if err != nil {
return nil, node, fmt.Errorf("forward to %s failed: %v", node, err)
}
defer resp.Body.Close()
body, err := io.ReadAll(io.LimitReader(resp.Body, 10*1024*1024)) // 10MB limit
if err != nil {
return nil, node, err
}
var result struct {
Dat loggrep.EventDetailResp `json:"dat"`
Err string `json:"err"`
}
if err := json.Unmarshal(body, &result); err != nil {
return nil, node, err
}
if result.Err != "" {
return nil, node, fmt.Errorf("%s", result.Err)
}
return result.Dat.Logs, result.Dat.Instance, nil
}

View File

@@ -8,10 +8,10 @@ import (
"github.com/ccfos/nightingale/v6/alert/pipeline/engine"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
"github.com/toolkits/pkg/logger"
)

View File

@@ -7,9 +7,9 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
const defaultLimit = 300

View File

@@ -15,9 +15,9 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pushgw/idents"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)

View File

@@ -14,16 +14,16 @@ import (
"github.com/ccfos/nightingale/v6/pkg/dingtalk"
"github.com/ccfos/nightingale/v6/pkg/feishu"
"github.com/ccfos/nightingale/v6/pkg/ldapx"
"github.com/ccfos/nightingale/v6/pkg/logx"
"github.com/ccfos/nightingale/v6/pkg/oauth2x"
"github.com/ccfos/nightingale/v6/pkg/oidcx"
"github.com/ccfos/nightingale/v6/pkg/secu"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/dgrijalva/jwt-go"
"github.com/gin-gonic/gin"
"github.com/pelletier/go-toml/v2"
"github.com/pkg/errors"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
"gorm.io/gorm"
)
@@ -37,9 +37,7 @@ type loginForm struct {
func (rt *Router) loginPost(c *gin.Context) {
var f loginForm
ginx.BindJSON(c, &f)
rctx := c.Request.Context()
logx.Infof(rctx, "username:%s login from:%s", f.Username, c.ClientIP())
logger.Infof("username:%s login from:%s", f.Username, c.ClientIP())
if rt.HTTP.ShowCaptcha.Enable {
if !CaptchaVerify(f.Captchaid, f.Verifyvalue) {
@@ -52,25 +50,23 @@ func (rt *Router) loginPost(c *gin.Context) {
if rt.HTTP.RSA.OpenRSA {
decPassWord, err := secu.Decrypt(f.Password, rt.HTTP.RSA.RSAPrivateKey, rt.HTTP.RSA.RSAPassWord)
if err != nil {
logx.Errorf(rctx, "RSA Decrypt failed: %v username: %s", err, f.Username)
logger.Errorf("RSA Decrypt failed: %v username: %s", err, f.Username)
ginx.NewRender(c).Message(err)
return
}
authPassWord = decPassWord
}
reqCtx := rt.Ctx.WithContext(rctx)
var user *models.User
var err error
lc := rt.Sso.LDAP.Copy()
if lc.Enable {
user, err = ldapx.LdapLogin(reqCtx, f.Username, authPassWord, lc.DefaultRoles, lc.DefaultTeams, lc)
user, err = ldapx.LdapLogin(rt.Ctx, f.Username, authPassWord, lc.DefaultRoles, lc.DefaultTeams, lc)
if err != nil {
logx.Debugf(rctx, "ldap login failed: %v username: %s", err, f.Username)
logger.Debugf("ldap login failed: %v username: %s", err, f.Username)
var errLoginInN9e error
// to use n9e as the minimum guarantee for login
if user, errLoginInN9e = models.PassLogin(reqCtx, rt.Redis, f.Username, authPassWord); errLoginInN9e != nil {
if user, errLoginInN9e = models.PassLogin(rt.Ctx, rt.Redis, f.Username, authPassWord); errLoginInN9e != nil {
ginx.NewRender(c).Message("ldap login failed: %v; n9e login failed: %v", err, errLoginInN9e)
return
}
@@ -78,7 +74,7 @@ func (rt *Router) loginPost(c *gin.Context) {
user.RolesLst = strings.Fields(user.Roles)
}
} else {
user, err = models.PassLogin(reqCtx, rt.Redis, f.Username, authPassWord)
user, err = models.PassLogin(rt.Ctx, rt.Redis, f.Username, authPassWord)
ginx.Dangerous(err)
}
@@ -102,8 +98,7 @@ func (rt *Router) loginPost(c *gin.Context) {
}
func (rt *Router) logoutPost(c *gin.Context) {
rctx := c.Request.Context()
logx.Infof(rctx, "username:%s logout from:%s", c.GetString("username"), c.ClientIP())
logger.Infof("username:%s logout from:%s", c.GetString("username"), c.ClientIP())
metadata, err := rt.extractTokenMetadata(c.Request)
if err != nil {
ginx.NewRender(c, http.StatusBadRequest).Message("failed to parse jwt token")
@@ -122,7 +117,7 @@ func (rt *Router) logoutPost(c *gin.Context) {
// 获取用户的 id_token
idToken, err := rt.fetchIdToken(c.Request.Context(), user.Id)
if err != nil {
logx.Debugf(rctx, "fetch id_token failed: %v, user_id: %d", err, user.Id)
logger.Debugf("fetch id_token failed: %v, user_id: %d", err, user.Id)
idToken = "" // 如果获取失败,使用空字符串
}
@@ -225,7 +220,7 @@ func (rt *Router) refreshPost(c *gin.Context) {
// 注意:这里不会获取新的 id_token只是延长 Redis 中现有 id_token 的 TTL
if idToken, err := rt.fetchIdToken(c.Request.Context(), userid); err == nil && idToken != "" {
if err := rt.saveIdToken(c.Request.Context(), userid, idToken); err != nil {
logx.Debugf(c.Request.Context(), "refresh id_token ttl failed: %v, user_id: %d", err, userid)
logger.Debugf("refresh id_token ttl failed: %v, user_id: %d", err, userid)
}
}
@@ -276,13 +271,12 @@ type CallbackOutput struct {
}
func (rt *Router) loginCallback(c *gin.Context) {
rctx := c.Request.Context()
code := ginx.QueryStr(c, "code", "")
state := ginx.QueryStr(c, "state", "")
ret, err := rt.Sso.OIDC.Callback(rt.Redis, rctx, code, state)
ret, err := rt.Sso.OIDC.Callback(rt.Redis, c.Request.Context(), code, state)
if err != nil {
logx.Errorf(rctx, "sso_callback fail. code:%s, state:%s, get ret: %+v. error: %v", code, state, ret, err)
logger.Errorf("sso_callback fail. code:%s, state:%s, get ret: %+v. error: %v", code, state, ret, err)
ginx.NewRender(c).Data(CallbackOutput{}, err)
return
}
@@ -305,7 +299,7 @@ func (rt *Router) loginCallback(c *gin.Context) {
for _, gid := range rt.Sso.OIDC.DefaultTeams {
err = models.UserGroupMemberAdd(rt.Ctx, gid, user.Id)
if err != nil {
logx.Errorf(rctx, "user:%v UserGroupMemberAdd: %s", user, err)
logger.Errorf("user:%v UserGroupMemberAdd: %s", user, err)
}
}
}
@@ -315,12 +309,12 @@ func (rt *Router) loginCallback(c *gin.Context) {
userIdentity := fmt.Sprintf("%d-%s", user.Id, user.Username)
ts, err := rt.createTokens(rt.HTTP.JWTAuth.SigningKey, userIdentity)
ginx.Dangerous(err)
ginx.Dangerous(rt.createAuth(rctx, userIdentity, ts))
ginx.Dangerous(rt.createAuth(c.Request.Context(), userIdentity, ts))
// 保存 id_token 到 Redis用于登出时使用
if ret.IdToken != "" {
if err := rt.saveIdToken(rctx, user.Id, ret.IdToken); err != nil {
logx.Errorf(rctx, "save id_token failed: %v, user_id: %d", err, user.Id)
if err := rt.saveIdToken(c.Request.Context(), user.Id, ret.IdToken); err != nil {
logger.Errorf("save id_token failed: %v, user_id: %d", err, user.Id)
}
}
@@ -361,7 +355,7 @@ func (rt *Router) loginRedirectCas(c *gin.Context) {
}
if !rt.Sso.CAS.Enable {
logx.Errorf(c.Request.Context(), "cas is not enable")
logger.Error("cas is not enable")
ginx.NewRender(c).Data("", nil)
return
}
@@ -376,18 +370,17 @@ func (rt *Router) loginRedirectCas(c *gin.Context) {
}
func (rt *Router) loginCallbackCas(c *gin.Context) {
rctx := c.Request.Context()
ticket := ginx.QueryStr(c, "ticket", "")
state := ginx.QueryStr(c, "state", "")
ret, err := rt.Sso.CAS.ValidateServiceTicket(rctx, ticket, state, rt.Redis)
ret, err := rt.Sso.CAS.ValidateServiceTicket(c.Request.Context(), ticket, state, rt.Redis)
if err != nil {
logx.Errorf(rctx, "ValidateServiceTicket: %s", err)
logger.Errorf("ValidateServiceTicket: %s", err)
ginx.NewRender(c).Data("", err)
return
}
user, err := models.UserGet(rt.Ctx, "username=?", ret.Username)
if err != nil {
logx.Errorf(rctx, "UserGet: %s", err)
logger.Errorf("UserGet: %s", err)
}
ginx.Dangerous(err)
if user != nil {
@@ -406,10 +399,10 @@ func (rt *Router) loginCallbackCas(c *gin.Context) {
userIdentity := fmt.Sprintf("%d-%s", user.Id, user.Username)
ts, err := rt.createTokens(rt.HTTP.JWTAuth.SigningKey, userIdentity)
if err != nil {
logx.Errorf(rctx, "createTokens: %s", err)
logger.Errorf("createTokens: %s", err)
}
ginx.Dangerous(err)
ginx.Dangerous(rt.createAuth(rctx, userIdentity, ts))
ginx.Dangerous(rt.createAuth(c.Request.Context(), userIdentity, ts))
redirect := "/"
if ret.Redirect != "/login" {
@@ -482,13 +475,12 @@ func (rt *Router) loginRedirectDingTalk(c *gin.Context) {
}
func (rt *Router) loginCallbackDingTalk(c *gin.Context) {
rctx := c.Request.Context()
code := ginx.QueryStr(c, "code", "")
state := ginx.QueryStr(c, "state", "")
ret, err := rt.Sso.DingTalk.Callback(rt.Redis, rctx, code, state)
ret, err := rt.Sso.DingTalk.Callback(rt.Redis, c.Request.Context(), code, state)
if err != nil {
logx.Errorf(rctx, "sso_callback DingTalk fail. code:%s, state:%s, get ret: %+v. error: %v", code, state, ret, err)
logger.Errorf("sso_callback DingTalk fail. code:%s, state:%s, get ret: %+v. error: %v", code, state, ret, err)
ginx.NewRender(c).Data(CallbackOutput{}, err)
return
}
@@ -558,13 +550,12 @@ func (rt *Router) loginRedirectFeiShu(c *gin.Context) {
}
func (rt *Router) loginCallbackFeiShu(c *gin.Context) {
rctx := c.Request.Context()
code := ginx.QueryStr(c, "code", "")
state := ginx.QueryStr(c, "state", "")
ret, err := rt.Sso.FeiShu.Callback(rt.Redis, rctx, code, state)
ret, err := rt.Sso.FeiShu.Callback(rt.Redis, c.Request.Context(), code, state)
if err != nil {
logx.Errorf(rctx, "sso_callback FeiShu fail. code:%s, state:%s, get ret: %+v. error: %v", code, state, ret, err)
logger.Errorf("sso_callback FeiShu fail. code:%s, state:%s, get ret: %+v. error: %v", code, state, ret, err)
ginx.NewRender(c).Data(CallbackOutput{}, err)
return
}
@@ -592,7 +583,7 @@ func (rt *Router) loginCallbackFeiShu(c *gin.Context) {
if len(defaultUserGroups) > 0 {
err = user.AddToUserGroups(rt.Ctx, defaultUserGroups)
if err != nil {
logx.Errorf(rctx, "sso feishu add user group error %v %v", ret, err)
logger.Errorf("sso feishu add user group error %v", ret, err)
}
}
@@ -619,13 +610,12 @@ func (rt *Router) loginCallbackFeiShu(c *gin.Context) {
}
func (rt *Router) loginCallbackOAuth(c *gin.Context) {
rctx := c.Request.Context()
code := ginx.QueryStr(c, "code", "")
state := ginx.QueryStr(c, "state", "")
ret, err := rt.Sso.OAuth2.Callback(rt.Redis, rctx, code, state)
ret, err := rt.Sso.OAuth2.Callback(rt.Redis, c.Request.Context(), code, state)
if err != nil {
logx.Debugf(rctx, "sso.callback() get ret %+v error %v", ret, err)
logger.Debugf("sso.callback() get ret %+v error %v", ret, err)
ginx.NewRender(c).Data(CallbackOutput{}, err)
return
}

View File

@@ -12,10 +12,10 @@ import (
"github.com/ccfos/nightingale/v6/pkg/slice"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pkg/tplx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) messageTemplatesAdd(c *gin.Context) {

View File

@@ -2,9 +2,9 @@ package router
import (
"github.com/ccfos/nightingale/v6/center/cconf"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) metricsDescGetFile(c *gin.Context) {

View File

@@ -4,9 +4,9 @@ import (
"net/http"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
// no param

View File

@@ -9,9 +9,9 @@ import (
"github.com/ccfos/nightingale/v6/alert/mute"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
)

View File

@@ -11,11 +11,11 @@ import (
"github.com/ccfos/nightingale/v6/center/cstats"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/golang-jwt/jwt"
"github.com/google/uuid"
"github.com/toolkits/pkg/ginx"
)
const (

View File

@@ -6,9 +6,9 @@ import (
"github.com/ccfos/nightingale/v6/alert/sender"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)

View File

@@ -11,8 +11,8 @@ import (
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) notifyChannelsAdd(c *gin.Context) {

View File

@@ -10,10 +10,10 @@ import (
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/tplx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/pelletier/go-toml/v2"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/str"
)

View File

@@ -10,9 +10,9 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/slice"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
@@ -222,7 +222,7 @@ func SendNotifyChannelMessage(ctx *ctx.Context, userCache *memsto.UserCacheType,
return "", fmt.Errorf("failed to send flashduty notify: %v", err)
}
}
logger.Infof("channel_name: %v, event:%s, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0].Hash, tplContent, customParams, resp, err)
logger.Infof("channel_name: %v, event:%+v, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0], tplContent, customParams, resp, err)
return resp, nil
case "pagerduty":
client, err := models.GetHTTPClient(notifyChannel)
@@ -236,7 +236,7 @@ func SendNotifyChannelMessage(ctx *ctx.Context, userCache *memsto.UserCacheType,
return "", fmt.Errorf("failed to send pagerduty notify: %v", err)
}
}
logger.Infof("channel_name: %v, event:%s, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0].Hash, tplContent, customParams, resp, err)
logger.Infof("channel_name: %v, event:%+v, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0], tplContent, customParams, resp, err)
return resp, nil
case "http":
client, err := models.GetHTTPClient(notifyChannel)
@@ -254,7 +254,7 @@ func SendNotifyChannelMessage(ctx *ctx.Context, userCache *memsto.UserCacheType,
if dispatch.NeedBatchContacts(notifyChannel.RequestConfig.HTTPRequestConfig) || len(sendtos) == 0 {
resp, err = notifyChannel.SendHTTP(events, tplContent, customParams, sendtos, client)
logger.Infof("channel_name: %v, event:%s, sendtos:%+v, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0].Hash, sendtos, tplContent, customParams, resp, err)
logger.Infof("channel_name: %v, event:%+v, sendtos:%+v, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0], sendtos, tplContent, customParams, resp, err)
if err != nil {
return "", fmt.Errorf("failed to send http notify: %v", err)
}
@@ -262,7 +262,7 @@ func SendNotifyChannelMessage(ctx *ctx.Context, userCache *memsto.UserCacheType,
} else {
for i := range sendtos {
resp, err = notifyChannel.SendHTTP(events, tplContent, customParams, []string{sendtos[i]}, client)
logger.Infof("channel_name: %v, event:%s, tplContent:%s, customParams:%v, sendto:%+v, respBody: %v, err: %v", notifyChannel.Name, events[0].Hash, tplContent, customParams, sendtos[i], resp, err)
logger.Infof("channel_name: %v, event:%+v, tplContent:%s, customParams:%v, sendto:%+v, respBody: %v, err: %v", notifyChannel.Name, events[0], tplContent, customParams, sendtos[i], resp, err)
if err != nil {
return "", fmt.Errorf("failed to send http notify: %v", err)
}
@@ -281,7 +281,7 @@ func SendNotifyChannelMessage(ctx *ctx.Context, userCache *memsto.UserCacheType,
return resp, nil
case "script":
resp, _, err := notifyChannel.SendScript(events, tplContent, customParams, sendtos)
logger.Infof("channel_name: %v, event:%s, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0].Hash, tplContent, customParams, resp, err)
logger.Infof("channel_name: %v, event:%+v, tplContent:%s, customParams:%v, respBody: %v, err: %v", notifyChannel.Name, events[0], tplContent, customParams, resp, err)
return resp, err
default:
logger.Errorf("unsupported request type: %v", notifyChannel.RequestType)

View File

@@ -11,9 +11,9 @@ import (
"github.com/ccfos/nightingale/v6/center/cconf"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/tplx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/str"
)

View File

@@ -3,9 +3,9 @@ package router
import (
"github.com/ccfos/nightingale/v6/datasource/opensearch"
"github.com/ccfos/nightingale/v6/dscache"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)

View File

@@ -12,13 +12,12 @@ import (
"sync"
"time"
"github.com/ccfos/nightingale/v6/pkg/logx"
"github.com/ccfos/nightingale/v6/pkg/poster"
pkgprom "github.com/ccfos/nightingale/v6/pkg/prom"
"github.com/ccfos/nightingale/v6/prom"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/prometheus/common/model"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/net/httplib"
)
@@ -39,16 +38,15 @@ func (rt *Router) promBatchQueryRange(c *gin.Context) {
var f BatchQueryForm
ginx.Dangerous(c.BindJSON(&f))
lst, err := PromBatchQueryRange(c.Request.Context(), rt.PromClients, f)
lst, err := PromBatchQueryRange(rt.PromClients, f)
ginx.NewRender(c).Data(lst, err)
}
func PromBatchQueryRange(ctx context.Context, pc *prom.PromClientMap, f BatchQueryForm) ([]model.Value, error) {
func PromBatchQueryRange(pc *prom.PromClientMap, f BatchQueryForm) ([]model.Value, error) {
var lst []model.Value
cli := pc.GetCli(f.DatasourceId)
if cli == nil {
logx.Warningf(ctx, "no such datasource id: %d", f.DatasourceId)
return lst, fmt.Errorf("no such datasource id: %d", f.DatasourceId)
}
@@ -59,9 +57,8 @@ func PromBatchQueryRange(ctx context.Context, pc *prom.PromClientMap, f BatchQue
Step: time.Duration(item.Step) * time.Second,
}
resp, _, err := cli.QueryRange(ctx, item.Query, r)
resp, _, err := cli.QueryRange(context.Background(), item.Query, r)
if err != nil {
logx.Warningf(ctx, "query range error: query:%s err:%v", item.Query, err)
return lst, err
}
@@ -84,23 +81,22 @@ func (rt *Router) promBatchQueryInstant(c *gin.Context) {
var f BatchInstantForm
ginx.Dangerous(c.BindJSON(&f))
lst, err := PromBatchQueryInstant(c.Request.Context(), rt.PromClients, f)
lst, err := PromBatchQueryInstant(rt.PromClients, f)
ginx.NewRender(c).Data(lst, err)
}
func PromBatchQueryInstant(ctx context.Context, pc *prom.PromClientMap, f BatchInstantForm) ([]model.Value, error) {
func PromBatchQueryInstant(pc *prom.PromClientMap, f BatchInstantForm) ([]model.Value, error) {
var lst []model.Value
cli := pc.GetCli(f.DatasourceId)
if cli == nil {
logx.Warningf(ctx, "no such datasource id: %d", f.DatasourceId)
logger.Warningf("no such datasource id: %d", f.DatasourceId)
return lst, fmt.Errorf("no such datasource id: %d", f.DatasourceId)
}
for _, item := range f.Queries {
resp, _, err := cli.Query(ctx, item.Query, time.Unix(item.Time, 0))
resp, _, err := cli.Query(context.Background(), item.Query, time.Unix(item.Time, 0))
if err != nil {
logx.Warningf(ctx, "query instant error: query:%s err:%v", item.Query, err)
return lst, err
}
@@ -193,7 +189,7 @@ func (rt *Router) dsProxy(c *gin.Context) {
modifyResponse := func(r *http.Response) error {
if r.StatusCode == http.StatusUnauthorized {
logx.Warningf(c.Request.Context(), "proxy path:%s unauthorized access ", c.Request.URL.Path)
logger.Warningf("proxy path:%s unauthorized access ", c.Request.URL.Path)
return fmt.Errorf("unauthorized access")
}

View File

@@ -8,9 +8,9 @@ import (
"github.com/ccfos/nightingale/v6/alert/eval"
"github.com/ccfos/nightingale/v6/dscache"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/logx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
type CheckDsPermFunc func(c *gin.Context, dsId int64, cate string, q interface{}) bool
@@ -47,7 +47,6 @@ func QueryLogBatchConcurrently(anonymousAccess bool, ctx *gin.Context, f QueryFr
var mu sync.Mutex
var wg sync.WaitGroup
var errs []error
rctx := ctx.Request.Context()
for _, q := range f.Queries {
if !anonymousAccess && !CheckDsPerm(ctx, q.Did, q.DsCate, q) {
@@ -56,14 +55,14 @@ func QueryLogBatchConcurrently(anonymousAccess bool, ctx *gin.Context, f QueryFr
plug, exists := dscache.DsCache.Get(q.DsCate, q.Did)
if !exists {
logx.Warningf(rctx, "cluster:%d not exists query:%+v", q.Did, q)
logger.Warningf("cluster:%d not exists query:%+v", q.Did, q)
return LogResp{}, fmt.Errorf("cluster not exists")
}
// 根据数据源类型对 Query 进行模板渲染处理
err := eval.ExecuteQueryTemplate(q.DsCate, q.Query, nil)
if err != nil {
logx.Warningf(rctx, "query template execute error: %v", err)
logger.Warningf("query template execute error: %v", err)
return LogResp{}, fmt.Errorf("query template execute error: %v", err)
}
@@ -71,12 +70,12 @@ func QueryLogBatchConcurrently(anonymousAccess bool, ctx *gin.Context, f QueryFr
go func(query Query) {
defer wg.Done()
data, total, err := plug.QueryLog(rctx, query.Query)
data, total, err := plug.QueryLog(ctx.Request.Context(), query.Query)
mu.Lock()
defer mu.Unlock()
if err != nil {
errMsg := fmt.Sprintf("query data error: %v query:%v\n ", err, query)
logx.Warningf(rctx, "%s", errMsg)
logger.Warningf(errMsg)
errs = append(errs, err)
return
}
@@ -122,7 +121,6 @@ func QueryDataConcurrently(anonymousAccess bool, ctx *gin.Context, f models.Quer
var mu sync.Mutex
var wg sync.WaitGroup
var errs []error
rctx := ctx.Request.Context()
for _, q := range f.Queries {
if !anonymousAccess && !CheckDsPerm(ctx, f.DatasourceId, f.Cate, q) {
@@ -131,7 +129,7 @@ func QueryDataConcurrently(anonymousAccess bool, ctx *gin.Context, f models.Quer
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logx.Warningf(rctx, "cluster:%d not exists", f.DatasourceId)
logger.Warningf("cluster:%d not exists", f.DatasourceId)
return nil, fmt.Errorf("cluster not exists")
}
@@ -139,16 +137,16 @@ func QueryDataConcurrently(anonymousAccess bool, ctx *gin.Context, f models.Quer
go func(query interface{}) {
defer wg.Done()
data, err := plug.QueryData(rctx, query)
data, err := plug.QueryData(ctx.Request.Context(), query)
if err != nil {
logx.Warningf(rctx, "query data error: req:%+v err:%v", query, err)
logger.Warningf("query data error: req:%+v err:%v", query, err)
mu.Lock()
errs = append(errs, err)
mu.Unlock()
return
}
logx.Debugf(rctx, "query data: req:%+v resp:%+v", query, data)
logger.Debugf("query data: req:%+v resp:%+v", query, data)
mu.Lock()
resp = append(resp, data...)
mu.Unlock()
@@ -194,7 +192,6 @@ func QueryLogConcurrently(anonymousAccess bool, ctx *gin.Context, f models.Query
var mu sync.Mutex
var wg sync.WaitGroup
var errs []error
rctx := ctx.Request.Context()
for _, q := range f.Queries {
if !anonymousAccess && !CheckDsPerm(ctx, f.DatasourceId, f.Cate, q) {
@@ -203,7 +200,7 @@ func QueryLogConcurrently(anonymousAccess bool, ctx *gin.Context, f models.Query
plug, exists := dscache.DsCache.Get(f.Cate, f.DatasourceId)
if !exists {
logx.Warningf(rctx, "cluster:%d not exists query:%+v", f.DatasourceId, f)
logger.Warningf("cluster:%d not exists query:%+v", f.DatasourceId, f)
return LogResp{}, fmt.Errorf("cluster not exists")
}
@@ -211,11 +208,11 @@ func QueryLogConcurrently(anonymousAccess bool, ctx *gin.Context, f models.Query
go func(query interface{}) {
defer wg.Done()
data, total, err := plug.QueryLog(rctx, query)
logx.Debugf(rctx, "query log: req:%+v resp:%+v", query, data)
data, total, err := plug.QueryLog(ctx.Request.Context(), query)
logger.Debugf("query log: req:%+v resp:%+v", query, data)
if err != nil {
errMsg := fmt.Sprintf("query data error: %v query:%v\n ", err, query)
logx.Warningf(rctx, "%s", errMsg)
logger.Warningf(errMsg)
mu.Lock()
errs = append(errs, err)
mu.Unlock()
@@ -253,7 +250,6 @@ func (rt *Router) QueryLogV2(c *gin.Context) {
func (rt *Router) QueryLog(c *gin.Context) {
var f models.QueryParam
ginx.BindJSON(c, &f)
rctx := c.Request.Context()
var resp []interface{}
for _, q := range f.Queries {
@@ -263,13 +259,13 @@ func (rt *Router) QueryLog(c *gin.Context) {
plug, exists := dscache.DsCache.Get("elasticsearch", f.DatasourceId)
if !exists {
logx.Warningf(rctx, "cluster:%d not exists", f.DatasourceId)
logger.Warningf("cluster:%d not exists", f.DatasourceId)
ginx.Bomb(200, "cluster not exists")
}
data, _, err := plug.QueryLog(rctx, q)
data, _, err := plug.QueryLog(c.Request.Context(), q)
if err != nil {
logx.Warningf(rctx, "query data error: %v", err)
logger.Warningf("query data error: %v", err)
ginx.Bomb(200, "err:%v", err)
continue
}

View File

@@ -7,9 +7,9 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) recordingRuleGets(c *gin.Context) {

View File

@@ -6,9 +6,9 @@ import (
"github.com/ccfos/nightingale/v6/center/cconf"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) rolesGets(c *gin.Context) {

View File

@@ -5,8 +5,8 @@ import (
"github.com/ccfos/nightingale/v6/center/cconf"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
)

View File

@@ -5,9 +5,9 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/slice"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) savedViewGets(c *gin.Context) {

View File

@@ -5,10 +5,10 @@ import (
"github.com/ccfos/nightingale/v6/pkg/flashduty"
"github.com/ccfos/nightingale/v6/pkg/ormx"
"github.com/ccfos/nightingale/v6/pkg/secu"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/google/uuid"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)

View File

@@ -4,9 +4,9 @@ import (
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) serversGet(c *gin.Context) {

View File

@@ -5,10 +5,10 @@ import (
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/google/uuid"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
// sourceTokenAdd 生成新的源令牌

View File

@@ -13,10 +13,10 @@ import (
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pushgw/idents"
"github.com/ccfos/nightingale/v6/storage"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/prometheus/common/model"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
@@ -38,16 +38,6 @@ func (rt *Router) targetGetsByHostFilter(c *gin.Context) {
total, err := models.TargetCountByFilter(rt.Ctx, query)
ginx.Dangerous(err)
models.FillTargetsBeatTime(rt.Redis, hosts)
now := time.Now().Unix()
for i := 0; i < len(hosts); i++ {
if now-hosts[i].BeatTime < 60 {
hosts[i].TargetUp = 2
} else if now-hosts[i].BeatTime < 180 {
hosts[i].TargetUp = 1
}
}
ginx.NewRender(c).Data(gin.H{
"list": hosts,
"total": total,
@@ -91,24 +81,9 @@ func (rt *Router) targetGets(c *gin.Context) {
models.BuildTargetWhereWithBgids(bgids),
models.BuildTargetWhereWithDsIds(dsIds),
models.BuildTargetWhereWithQuery(query),
models.BuildTargetWhereWithDowntime(downtime),
models.BuildTargetWhereWithHosts(hosts),
}
// downtime 筛选:从缓存获取心跳时间,选择较小的集合用 IN 或 NOT IN 过滤
if downtime != 0 {
downtimeOpt, hasMatch := rt.downtimeFilter(downtime)
if !hasMatch {
ginx.NewRender(c).Data(gin.H{
"list": []*models.Target{},
"total": 0,
}, nil)
return
}
if downtimeOpt != nil {
options = append(options, downtimeOpt)
}
}
total, err := models.TargetTotal(rt.Ctx, options...)
ginx.Dangerous(err)
@@ -127,17 +102,14 @@ func (rt *Router) targetGets(c *gin.Context) {
now := time.Now()
cache := make(map[int64]*models.BusiGroup)
// 从 Redis 补全 BeatTime
models.FillTargetsBeatTime(rt.Redis, list)
var keys []string
for i := 0; i < len(list); i++ {
ginx.Dangerous(list[i].FillGroup(rt.Ctx, cache))
keys = append(keys, models.WrapIdent(list[i].Ident))
if now.Unix()-list[i].BeatTime < 60 {
if now.Unix()-list[i].UpdateAt < 60 {
list[i].TargetUp = 2
} else if now.Unix()-list[i].BeatTime < 180 {
} else if now.Unix()-list[i].UpdateAt < 180 {
list[i].TargetUp = 1
}
}
@@ -176,43 +148,6 @@ func (rt *Router) targetGets(c *gin.Context) {
}, nil)
}
// downtimeFilter 从缓存获取心跳时间,生成 downtime 筛选条件
// 选择匹配集和非匹配集中较小的一方,用 IN 或 NOT IN 来减少 SQL 参数量
// 返回值:
// - option: 筛选条件nil 表示所有 target 都符合条件(无需过滤)
// - hasMatch: 是否有符合条件的 targetfalse 表示无匹配应返回空结果
func (rt *Router) downtimeFilter(downtime int64) (option models.BuildTargetWhereOption, hasMatch bool) {
now := time.Now().Unix()
targets := rt.TargetCache.GetAll()
var matchIdents, nonMatchIdents []string
for _, target := range targets {
matched := false
if downtime > 0 {
matched = target.BeatTime < now-downtime
} else if downtime < 0 {
matched = target.BeatTime > now+downtime
}
if matched {
matchIdents = append(matchIdents, target.Ident)
} else {
nonMatchIdents = append(nonMatchIdents, target.Ident)
}
}
if len(matchIdents) == 0 {
return nil, false
}
if len(nonMatchIdents) == 0 {
return nil, true
}
if len(matchIdents) <= len(nonMatchIdents) {
return models.BuildTargetWhereWithIdents(matchIdents), true
}
return models.BuildTargetWhereExcludeIdents(nonMatchIdents), true
}
func (rt *Router) targetExtendInfoByIdent(c *gin.Context) {
ident := ginx.QueryStr(c, "ident", "")
key := models.WrapExtendIdent(ident)

View File

@@ -7,9 +7,9 @@ import (
"github.com/ccfos/nightingale/v6/alert/sender"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
)

View File

@@ -8,9 +8,9 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/i18n"
"github.com/toolkits/pkg/str"
)

View File

@@ -8,8 +8,8 @@ import (
"github.com/ccfos/nightingale/v6/datasource/tdengine"
"github.com/ccfos/nightingale/v6/dscache"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
type databasesQueryForm struct {

View File

@@ -1,136 +0,0 @@
package router
import (
"encoding/json"
"fmt"
"io"
"net/http"
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/ccfos/nightingale/v6/pkg/loggrep"
"github.com/toolkits/pkg/logger"
"github.com/gin-gonic/gin"
)
// traceLogsPage renders an HTML log viewer page for trace logs.
func (rt *Router) traceLogsPage(c *gin.Context) {
traceId := ginx.UrlParamStr(c, "traceid")
if !loggrep.IsValidTraceID(traceId) {
c.String(http.StatusBadRequest, "invalid trace id format")
return
}
logs, instance, err := rt.getTraceLogs(traceId)
if err != nil {
c.String(http.StatusInternalServerError, "Error: %v", err)
return
}
c.Header("Content-Type", "text/html; charset=utf-8")
err = loggrep.RenderTraceLogsHTML(c.Writer, loggrep.TraceLogsPageData{
TraceID: traceId,
Instance: instance,
Logs: logs,
Total: len(logs),
})
if err != nil {
c.String(http.StatusInternalServerError, "render error: %v", err)
}
}
// traceLogsJSON returns JSON for trace logs.
func (rt *Router) traceLogsJSON(c *gin.Context) {
traceId := ginx.UrlParamStr(c, "traceid")
if !loggrep.IsValidTraceID(traceId) {
ginx.Bomb(200, "invalid trace id format")
}
logs, instance, err := rt.getTraceLogs(traceId)
ginx.Dangerous(err)
ginx.NewRender(c).Data(loggrep.EventDetailResp{
Logs: logs,
Instance: instance,
}, nil)
}
// getTraceLogs finds the same-engine instances and queries each one
// until trace logs are found. Trace logs belong to a single instance.
func (rt *Router) getTraceLogs(traceId string) ([]string, string, error) {
keyword := "trace_id=" + traceId
instance := fmt.Sprintf("%s:%d", rt.Alert.Heartbeat.IP, rt.HTTP.Port)
engineName := rt.Alert.Heartbeat.EngineName
// try local first
logs, err := loggrep.GrepLatestLogFiles(rt.LogDir, keyword)
if err == nil && len(logs) > 0 {
return logs, instance, nil
}
// find all instances with the same engineName
servers, err := models.AlertingEngineGetsInstances(rt.Ctx,
"engine_cluster = ? and clock > ?",
engineName, time.Now().Unix()-30)
if err != nil {
return nil, "", err
}
// loop through remote instances until we find logs
for _, node := range servers {
if node == instance {
continue // already tried local
}
logs, nodeAddr, err := rt.forwardTraceLogs(node, traceId)
if err != nil {
logger.Errorf("forwardTraceLogs failed: %v", err)
continue
}
if len(logs) > 0 {
return logs, nodeAddr, nil
}
}
return nil, instance, nil
}
func (rt *Router) forwardTraceLogs(node, traceId string) ([]string, string, error) {
url := fmt.Sprintf("http://%s/v1/n9e/trace-logs/%s", node, traceId)
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, node, err
}
for user, pass := range rt.HTTP.APIForService.BasicAuth {
req.SetBasicAuth(user, pass)
break
}
client := &http.Client{Timeout: 15 * time.Second}
resp, err := client.Do(req)
if err != nil {
return nil, node, fmt.Errorf("forward to %s failed: %v", node, err)
}
defer resp.Body.Close()
body, err := io.ReadAll(io.LimitReader(resp.Body, 10*1024*1024))
if err != nil {
return nil, node, err
}
var result struct {
Dat loggrep.EventDetailResp `json:"dat"`
Err string `json:"err"`
}
if err := json.Unmarshal(body, &result); err != nil {
return nil, node, err
}
if result.Err != "" {
return nil, node, fmt.Errorf("%s", result.Err)
}
return result.Dat.Logs, result.Dat.Instance, nil
}

View File

@@ -9,9 +9,9 @@ import (
"github.com/ccfos/nightingale/v6/pkg/flashduty"
"github.com/ccfos/nightingale/v6/pkg/ormx"
"github.com/ccfos/nightingale/v6/pkg/secu"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
"gorm.io/gorm"
)

View File

@@ -7,9 +7,9 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/flashduty"
"github.com/ccfos/nightingale/v6/pkg/strx"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)

View File

@@ -5,9 +5,9 @@ import (
"time"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
func (rt *Router) userVariableConfigGets(context *gin.Context) {

View File

@@ -71,10 +71,7 @@ CREATE TABLE `datasource`
`updated_at` bigint not null default 0,
`updated_by` varchar(64) not null default '',
PRIMARY KEY (`id`)
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
-- datasource add weight field
alter table `datasource` add `weight` int not null default 0;
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
CREATE TABLE `builtin_cate` (
`id` bigint unsigned not null auto_increment,

View File

@@ -87,7 +87,7 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
alert.Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache,
alertRuleCache, notifyConfigCache, taskTplsCache, dsCache, ctx, promClients, userCache, userGroupCache, notifyRuleCache, notifyChannelCache, messageTemplateCache, configCvalCache)
alertrtRouter := alertrt.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors, config.Log.Dir)
alertrtRouter := alertrt.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors)
alertrtRouter.Config(r)

View File

@@ -12,8 +12,6 @@ import (
"github.com/mitchellh/mapstructure"
"github.com/toolkits/pkg/logger"
"github.com/ccfos/nightingale/v6/pkg/logx"
)
const (
@@ -180,9 +178,14 @@ func (c *Clickhouse) QueryData(ctx context.Context, query interface{}) ([]models
rows, err := c.QueryTimeseries(ctx, ckQueryParam)
if err != nil {
logx.Warningf(ctx, "query:%+v get data err:%v", ckQueryParam, err)
logger.Warningf("query:%+v get data err:%v", ckQueryParam, err)
return nil, err
}
if err != nil {
logger.Warningf("query:%+v get data err:%v", ckQueryParam, err)
return []models.DataResp{}, err
}
data := make([]models.DataResp, 0)
for i := range rows {
data = append(data, models.DataResp{
@@ -211,7 +214,7 @@ func (c *Clickhouse) QueryLog(ctx context.Context, query interface{}) ([]interfa
rows, err := c.Query(ctx, ckQueryParam)
if err != nil {
logx.Warningf(ctx, "query:%+v get data err:%v", ckQueryParam, err)
logger.Warningf("query:%+v get data err:%v", ckQueryParam, err)
return nil, 0, err
}

View File

@@ -17,7 +17,6 @@ import (
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/logx"
)
type FixedField string
@@ -391,8 +390,8 @@ func QueryData(ctx context.Context, queryParam interface{}, cliTimeout int64, ve
now := time.Now().Unix()
var start, end int64
if param.End != 0 && param.Start != 0 {
end = param.End
start = param.Start
end = param.End - param.End%param.Interval
start = param.Start - param.Start%param.Interval
} else {
end = now
start = end - param.Interval
@@ -544,7 +543,7 @@ func QueryData(ctx context.Context, queryParam interface{}, cliTimeout int64, ve
source, _ := queryString.Source()
b, _ := json.Marshal(source)
logx.Debugf(ctx, "query_data q:%+v indexArr:%+v tsAggr:%+v query_string:%s", param, indexArr, tsAggr, string(b))
logger.Debugf("query_data q:%+v indexArr:%+v tsAggr:%+v query_string:%s", param, indexArr, tsAggr, string(b))
searchSource := elastic.NewSearchSource().
Query(queryString).
@@ -552,29 +551,21 @@ func QueryData(ctx context.Context, queryParam interface{}, cliTimeout int64, ve
searchSourceString, err := searchSource.Source()
if err != nil {
logx.Warningf(ctx, "query_data searchSource:%s to string error:%v", searchSourceString, err)
logger.Warningf("query_data searchSource:%s to string error:%v", searchSourceString, err)
}
jsonSearchSource, err := json.Marshal(searchSourceString)
if err != nil {
logx.Warningf(ctx, "query_data searchSource:%s to json error:%v", searchSourceString, err)
logger.Warningf("query_data searchSource:%s to json error:%v", searchSourceString, err)
}
result, err := search(ctx, indexArr, searchSource, param.Timeout, param.MaxShard)
if err != nil {
logx.Warningf(ctx, "query_data searchSource:%s query_data error:%v", searchSourceString, err)
logger.Warningf("query_data searchSource:%s query_data error:%v", searchSourceString, err)
return nil, err
}
// 检查是否有 shard failures有部分数据时仅记录警告继续处理
if shardErr := checkShardFailures(ctx, result.Shards, "query_data", searchSourceString); shardErr != nil {
if len(result.Aggregations["ts"]) == 0 {
return nil, shardErr
}
// 有部分数据checkShardFailures 已记录警告,继续处理
}
logx.Infof(ctx, "query_data searchSource:%s resp:%s", string(jsonSearchSource), string(result.Aggregations["ts"]))
logger.Debugf("query_data searchSource:%s resp:%s", string(jsonSearchSource), string(result.Aggregations["ts"]))
js, err := simplejson.NewJson(result.Aggregations["ts"])
if err != nil {
@@ -611,40 +602,6 @@ func QueryData(ctx context.Context, queryParam interface{}, cliTimeout int64, ve
return items, nil
}
// checkShardFailures 检查 ES 查询结果中的 shard failures返回格式化的错误信息
func checkShardFailures(ctx context.Context, shards *elastic.ShardsInfo, logPrefix string, queryContext interface{}) error {
if shards == nil || shards.Failed == 0 || len(shards.Failures) == 0 {
return nil
}
var failureReasons []string
for _, failure := range shards.Failures {
reason := ""
if failure.Reason != nil {
if reasonType, ok := failure.Reason["type"].(string); ok {
reason = reasonType
}
if reasonMsg, ok := failure.Reason["reason"].(string); ok {
if reason != "" {
reason += ": " + reasonMsg
} else {
reason = reasonMsg
}
}
}
if reason != "" {
failureReasons = append(failureReasons, fmt.Sprintf("index=%s shard=%d: %s", failure.Index, failure.Shard, reason))
}
}
if len(failureReasons) > 0 {
errMsg := fmt.Sprintf("elasticsearch shard failures (%d/%d failed): %s", shards.Failed, shards.Total, strings.Join(failureReasons, "; "))
logx.Warningf(ctx, "%s query:%v %s", logPrefix, queryContext, errMsg)
return fmt.Errorf("%s", errMsg)
}
return nil
}
func HitFilter(typ string) bool {
switch typ {
case "keyword", "date", "long", "integer", "short", "byte", "double", "float", "half_float", "scaled_float", "unsigned_long":
@@ -721,34 +678,28 @@ func QueryLog(ctx context.Context, queryParam interface{}, timeout int64, versio
} else {
source = source.From(param.P).Sort(param.DateField, param.Ascending)
}
sourceBytes, _ := json.Marshal(source)
result, err := search(ctx, indexArr, source, param.Timeout, param.MaxShard)
if err != nil {
logx.Warningf(ctx, "query_log source:%s error:%v", string(sourceBytes), err)
logger.Warningf("query data error:%v", err)
return nil, 0, err
}
// 检查是否有 shard failures有部分数据时仅记录警告继续处理
if shardErr := checkShardFailures(ctx, result.Shards, "query_log", string(sourceBytes)); shardErr != nil {
if len(result.Hits.Hits) == 0 {
return nil, 0, shardErr
}
// 有部分数据checkShardFailures 已记录警告,继续处理
}
total := result.TotalHits()
var ret []interface{}
logx.Debugf(ctx, "query_log source:%s len:%d total:%d", string(sourceBytes), len(result.Hits.Hits), total)
b, _ := json.Marshal(source)
logger.Debugf("query data result query source:%s len:%d total:%d", string(b), len(result.Hits.Hits), total)
resultBytes, _ := json.Marshal(result)
logx.Debugf(ctx, "query_log source:%s result:%s", string(sourceBytes), string(resultBytes))
logger.Debugf("query data result query source:%s result:%s", string(b), string(resultBytes))
if strings.HasPrefix(version, "6") {
for i := 0; i < len(result.Hits.Hits); i++ {
var x map[string]interface{}
err := json.Unmarshal(result.Hits.Hits[i].Source, &x)
if err != nil {
logx.Warningf(ctx, "Unmarshal source error:%v", err)
logger.Warningf("Unmarshal source error:%v", err)
continue
}

View File

@@ -133,5 +133,4 @@ type DatasourceInfo struct {
CreatedAt int64 `json:"created_at"`
UpdatedAt int64 `json:"updated_at"`
IsDefault bool `json:"is_default"`
Weight int `json:"weight"`
}

View File

@@ -14,8 +14,6 @@ import (
"github.com/mitchellh/mapstructure"
"github.com/toolkits/pkg/logger"
"github.com/ccfos/nightingale/v6/pkg/logx"
)
const (
@@ -150,7 +148,7 @@ func (d *Doris) QueryData(ctx context.Context, query interface{}) ([]models.Data
}
}
items, err := d.QueryTimeseries(ctx, &doris.QueryParam{
items, err := d.QueryTimeseries(context.TODO(), &doris.QueryParam{
Database: dorisQueryParam.Database,
Sql: dorisQueryParam.SQL,
Keys: types.Keys{
@@ -161,7 +159,7 @@ func (d *Doris) QueryData(ctx context.Context, query interface{}) ([]models.Data
},
})
if err != nil {
logx.Warningf(ctx, "query:%+v get data err:%v", dorisQueryParam, err)
logger.Warningf("query:%+v get data err:%v", dorisQueryParam, err)
return []models.DataResp{}, err
}
data := make([]models.DataResp, 0)
@@ -174,7 +172,7 @@ func (d *Doris) QueryData(ctx context.Context, query interface{}) ([]models.Data
}
// parse resp to time series data
logx.Infof(ctx, "req:%+v keys:%+v \n data:%v", dorisQueryParam, dorisQueryParam.Keys, data)
logger.Infof("req:%+v keys:%+v \n data:%v", dorisQueryParam, dorisQueryParam.Keys, data)
return data, nil
}
@@ -210,7 +208,7 @@ func (d *Doris) QueryLog(ctx context.Context, query interface{}) ([]interface{},
Sql: dorisQueryParam.SQL,
})
if err != nil {
logx.Warningf(ctx, "query:%+v get data err:%v", dorisQueryParam, err)
logger.Warningf("query:%+v get data err:%v", dorisQueryParam, err)
return []interface{}{}, 0, err
}
logs := make([]interface{}, 0)

View File

@@ -19,8 +19,7 @@ import (
"github.com/mitchellh/mapstructure"
"github.com/olivere/elastic/v7"
"github.com/ccfos/nightingale/v6/pkg/logx"
"github.com/toolkits/pkg/logger"
)
const (
@@ -381,14 +380,14 @@ func (e *Elasticsearch) QueryMapData(ctx context.Context, query interface{}) ([]
var result []map[string]string
for _, item := range res {
logx.Debugf(ctx, "query:%v item:%v", query, item)
logger.Debugf("query:%v item:%v", query, item)
if itemMap, ok := item.(*elastic.SearchHit); ok {
mItem := make(map[string]string)
// 遍历 fields 字段的每个键值对
sourceMap := make(map[string]interface{})
err := json.Unmarshal(itemMap.Source, &sourceMap)
if err != nil {
logx.Warningf(ctx, "unmarshal source%s error:%v", string(itemMap.Source), err)
logger.Warningf("unmarshal source%s error:%v", string(itemMap.Source), err)
continue
}

View File

@@ -15,8 +15,6 @@ import (
"github.com/mitchellh/mapstructure"
"github.com/toolkits/pkg/logger"
"github.com/ccfos/nightingale/v6/pkg/logx"
)
const (
@@ -167,7 +165,7 @@ func (m *MySQL) QueryData(ctx context.Context, query interface{}) ([]models.Data
})
if err != nil {
logx.Warningf(ctx, "query:%+v get data err:%v", mysqlQueryParam, err)
logger.Warningf("query:%+v get data err:%v", mysqlQueryParam, err)
return []models.DataResp{}, err
}
data := make([]models.DataResp, 0)
@@ -209,7 +207,7 @@ func (m *MySQL) QueryLog(ctx context.Context, query interface{}) ([]interface{},
})
if err != nil {
logx.Warningf(ctx, "query:%+v get data err:%v", mysqlQueryParam, err)
logger.Warningf("query:%+v get data err:%v", mysqlQueryParam, err)
return []interface{}{}, 0, err
}
logs := make([]interface{}, 0)

View File

@@ -16,8 +16,6 @@ import (
"github.com/ccfos/nightingale/v6/models"
"github.com/mitchellh/mapstructure"
"github.com/toolkits/pkg/logger"
"github.com/ccfos/nightingale/v6/pkg/logx"
)
const (
@@ -199,7 +197,7 @@ func (p *PostgreSQL) QueryData(ctx context.Context, query interface{}) ([]models
})
if err != nil {
logx.Warningf(ctx, "query:%+v get data err:%v", postgresqlQueryParam, err)
logger.Warningf("query:%+v get data err:%v", postgresqlQueryParam, err)
return []models.DataResp{}, err
}
data := make([]models.DataResp, 0)
@@ -212,7 +210,7 @@ func (p *PostgreSQL) QueryData(ctx context.Context, query interface{}) ([]models
}
// parse resp to time series data
logx.Infof(ctx, "req:%+v keys:%+v \n data:%v", postgresqlQueryParam, postgresqlQueryParam.Keys, data)
logger.Infof("req:%+v keys:%+v \n data:%v", postgresqlQueryParam, postgresqlQueryParam.Keys, data)
return data, nil
}
@@ -251,7 +249,7 @@ func (p *PostgreSQL) QueryLog(ctx context.Context, query interface{}) ([]interfa
Sql: postgresqlQueryParam.SQL,
})
if err != nil {
logx.Warningf(ctx, "query:%+v get data err:%v", postgresqlQueryParam, err)
logger.Warningf("query:%+v get data err:%v", postgresqlQueryParam, err)
return []interface{}{}, 0, err
}
logs := make([]interface{}, 0)

View File

@@ -12,8 +12,6 @@ import (
"github.com/prometheus/common/model"
"github.com/toolkits/pkg/logger"
"github.com/ccfos/nightingale/v6/pkg/logx"
"github.com/ccfos/nightingale/v6/datasource"
td "github.com/ccfos/nightingale/v6/dskit/tdengine"
"github.com/ccfos/nightingale/v6/models"
@@ -120,7 +118,7 @@ func (td *TDengine) MakeTSQuery(ctx context.Context, query interface{}, eventTag
}
func (td *TDengine) QueryData(ctx context.Context, queryParam interface{}) ([]models.DataResp, error) {
return td.Query(ctx, queryParam, 0)
return td.Query(queryParam, 0)
}
func (td *TDengine) QueryLog(ctx context.Context, queryParam interface{}) ([]interface{}, int64, error) {
@@ -172,7 +170,7 @@ func (td *TDengine) QueryMapData(ctx context.Context, query interface{}) ([]map[
return nil, nil
}
func (td *TDengine) Query(ctx context.Context, query interface{}, delay ...int) ([]models.DataResp, error) {
func (td *TDengine) Query(query interface{}, delay ...int) ([]models.DataResp, error) {
b, err := json.Marshal(query)
if err != nil {
return nil, err
@@ -214,7 +212,7 @@ func (td *TDengine) Query(ctx context.Context, query interface{}, delay ...int)
if err != nil {
return nil, err
}
logx.Debugf(ctx, "tdengine query:%s result: %+v", q.Query, data)
logger.Debugf("tdengine query:%s result: %+v", q.Query, data)
return ConvertToTStData(data, q.Keys, q.Ref)
}

View File

@@ -321,7 +321,6 @@ CREATE TABLE alert_rule (
create_by varchar(64) not null default '',
update_at bigint not null default 0,
update_by varchar(64) not null default '',
time_zone varchar(64) not null default '',
PRIMARY KEY (id)
) ;
CREATE INDEX alert_rule_group_id_idx ON alert_rule (group_id);
@@ -739,7 +738,6 @@ CREATE TABLE datasource
http varchar(4096) not null default '',
auth varchar(8192) not null default '',
is_default boolean not null default false,
weight int not null default 0,
created_at bigint not null default 0,
created_by varchar(64) not null default '',
updated_at bigint not null default 0,

View File

@@ -293,7 +293,6 @@ CREATE TABLE `alert_rule` (
`update_at` bigint not null default 0,
`update_by` varchar(64) not null default '',
`cron_pattern` varchar(64),
`time_zone` varchar(64) not null default '',
`datasource_queries` text,
PRIMARY KEY (`id`),
KEY (`group_id`),
@@ -656,7 +655,6 @@ CREATE TABLE `datasource`
`http` varchar(4096) not null default '',
`auth` varchar(8192) not null default '',
`is_default` boolean COMMENT 'is default datasource',
`weight` int not null default 0,
`created_at` bigint not null default 0,
`created_by` varchar(64) not null default '',
`updated_at` bigint not null default 0,

View File

@@ -331,36 +331,3 @@ CREATE TABLE `event_pipeline_execution` (
ALTER TABLE `builtin_metrics` ADD COLUMN `expression_type` varchar(32) NOT NULL DEFAULT 'promql' COMMENT 'expression type: metric_name or promql';
ALTER TABLE `builtin_metrics` ADD COLUMN `metric_type` varchar(191) NOT NULL DEFAULT '' COMMENT 'metric type like counter/gauge';
ALTER TABLE `builtin_metrics` ADD COLUMN `extra_fields` text COMMENT 'custom extra fields';
/* v9 2026-01-16 saved_view */
CREATE TABLE `saved_view` (
`id` bigint NOT NULL AUTO_INCREMENT,
`name` varchar(255) NOT NULL COMMENT 'view name',
`page` varchar(64) NOT NULL COMMENT 'page identifier',
`filter` text COMMENT 'filter config (JSON)',
`public_cate` int NOT NULL DEFAULT 0 COMMENT 'public category: 0-self, 1-team, 2-all',
`gids` text COMMENT 'team group ids (JSON)',
`create_at` bigint NOT NULL DEFAULT 0 COMMENT 'create timestamp',
`create_by` varchar(64) NOT NULL DEFAULT '' COMMENT 'creator',
`update_at` bigint NOT NULL DEFAULT 0 COMMENT 'update timestamp',
`update_by` varchar(64) NOT NULL DEFAULT '' COMMENT 'updater',
PRIMARY KEY (`id`),
KEY `idx_page` (`page`),
KEY `idx_create_by` (`create_by`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='saved views for pages';
CREATE TABLE `user_view_favorite` (
`id` bigint NOT NULL AUTO_INCREMENT,
`view_id` bigint NOT NULL COMMENT 'saved view id',
`user_id` bigint NOT NULL COMMENT 'user id',
`create_at` bigint NOT NULL DEFAULT 0 COMMENT 'create timestamp',
PRIMARY KEY (`id`),
KEY `idx_view_id` (`view_id`),
KEY `idx_user_id` (`user_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='user favorite views';
/* v9 2026-01-20 datasource weight */
ALTER TABLE `datasource` ADD COLUMN `weight` int not null default 0 COMMENT 'weight for sorting';
/* v9 2026-01-20 alert_rule time_zone support */
ALTER TABLE `alert_rule` ADD COLUMN `time_zone` varchar(64) not null default '';

View File

@@ -253,7 +253,6 @@ CREATE TABLE `alert_rule` (
`update_at` bigint not null default 0,
`update_by` varchar(64) not null default '',
`cron_pattern` varchar(64),
`time_zone` varchar(64) not null default '',
`datasource_queries` text
);
CREATE INDEX `idx_alert_rule_group_id` ON `alert_rule` (`group_id` asc);
@@ -590,7 +589,6 @@ CREATE TABLE `datasource`
`http` varchar(4096) not null default '',
`auth` varchar(8192) not null default '',
`is_default` tinyint not null default 0,
`weight` int not null default 0,
`created_at` bigint not null default 0,
`created_by` varchar(64) not null default '',
`updated_at` bigint not null default 0,

View File

@@ -90,7 +90,7 @@ func getDatasourcesFromDBLoop(ctx *ctx.Context, fromAPI bool) {
foundDefaultDatasource = true
}
// logger.Debugf("get datasource: %+v", item)
logger.Debugf("get datasource: %+v", item)
ds := datasource.DatasourceInfo{
Id: item.Id,
Name: item.Name,
@@ -104,7 +104,6 @@ func getDatasourcesFromDBLoop(ctx *ctx.Context, fromAPI bool) {
AuthJson: item.AuthJson,
Status: item.Status,
IsDefault: item.IsDefault,
Weight: item.Weight,
}
if item.PluginType == "elasticsearch" {
@@ -237,5 +236,5 @@ func PutDatasources(items []datasource.DatasourceInfo) {
}
}
// logger.Debugf("get plugin by type success Ids:%v", ids)
logger.Debugf("get plugin by type success Ids:%v", ids)
}

View File

@@ -140,7 +140,7 @@ func (ncc *NotifyChannelCacheType) addOrUpdateChannels(newChannels map[int64]*mo
logger.Infof("updating channel %d (new: %t)", chID, !exists)
ncc.stopChannelResources(chID)
} else {
logger.Debugf("channel %d config not changed", chID)
logger.Infof("channel %d config not changed", chID)
continue
}
}
@@ -284,8 +284,8 @@ func (ncc *NotifyChannelCacheType) processNotifyTask(task *NotifyTask) {
start := time.Now()
resp, err := task.NotifyChannel.SendHTTP(task.Events, task.TplContent, task.CustomParams, task.Sendtos, httpClient)
resp = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), resp)
logger.Infof("http_sendernotify_id: %d, channel_name: %v, event:%s, tplContent:%v, customParams:%v, userInfo:%+v, respBody: %v, err: %v",
task.NotifyRuleId, task.NotifyChannel.Name, task.Events[0].Hash, task.TplContent, task.CustomParams, task.Sendtos, resp, err)
logger.Infof("http_sendernotify_id: %d, channel_name: %v, event:%+v, tplContent:%v, customParams:%v, userInfo:%+v, respBody: %v, err: %v",
task.NotifyRuleId, task.NotifyChannel.Name, task.Events[0], task.TplContent, task.CustomParams, task.Sendtos, resp, err)
// 调用通知记录回调函数
if ncc.notifyRecordFunc != nil {
@@ -296,8 +296,8 @@ func (ncc *NotifyChannelCacheType) processNotifyTask(task *NotifyTask) {
start := time.Now()
resp, err := task.NotifyChannel.SendHTTP(task.Events, task.TplContent, task.CustomParams, []string{task.Sendtos[i]}, httpClient)
resp = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), resp)
logger.Infof("http_sender notify_id: %d, channel_name: %v, event:%s, tplContent:%v, customParams:%v, userInfo:%+v, respBody: %v, err: %v",
task.NotifyRuleId, task.NotifyChannel.Name, task.Events[0].Hash, task.TplContent, task.CustomParams, task.Sendtos[i], resp, err)
logger.Infof("http_sender notify_id: %d, channel_name: %v, event:%+v, tplContent:%v, customParams:%v, userInfo:%+v, respBody: %v, err: %v",
task.NotifyRuleId, task.NotifyChannel.Name, task.Events[0], task.TplContent, task.CustomParams, task.Sendtos[i], resp, err)
// 调用通知记录回调函数
if ncc.notifyRecordFunc != nil {

View File

@@ -27,8 +27,7 @@ type TargetCacheType struct {
redis storage.Redis
sync.RWMutex
targets map[string]*models.Target // key: ident
targetsIndex map[string][]string // key: ip, value: ident list
targets map[string]*models.Target // key: ident
}
func NewTargetCache(ctx *ctx.Context, stats *Stats, redis storage.Redis) *TargetCacheType {
@@ -39,7 +38,6 @@ func NewTargetCache(ctx *ctx.Context, stats *Stats, redis storage.Redis) *Target
stats: stats,
redis: redis,
targets: make(map[string]*models.Target),
targetsIndex: make(map[string][]string),
}
tc.SyncTargets()
@@ -53,7 +51,6 @@ func (tc *TargetCacheType) Reset() {
tc.statTotal = -1
tc.statLastUpdated = -1
tc.targets = make(map[string]*models.Target)
tc.targetsIndex = make(map[string][]string)
}
func (tc *TargetCacheType) StatChanged(total, lastUpdated int64) bool {
@@ -65,17 +62,8 @@ func (tc *TargetCacheType) StatChanged(total, lastUpdated int64) bool {
}
func (tc *TargetCacheType) Set(m map[string]*models.Target, total, lastUpdated int64) {
idx := make(map[string][]string, len(m))
for ident, target := range m {
if _, ok := idx[target.HostIp]; !ok {
idx[target.HostIp] = []string{}
}
idx[target.HostIp] = append(idx[target.HostIp], ident)
}
tc.Lock()
tc.targets = m
tc.targetsIndex = idx
tc.Unlock()
// only one goroutine used, so no need lock
@@ -90,75 +78,6 @@ func (tc *TargetCacheType) Get(ident string) (*models.Target, bool) {
return val, has
}
func (tc *TargetCacheType) GetByIp(ip string) ([]*models.Target, bool) {
tc.RLock()
defer tc.RUnlock()
idents, has := tc.targetsIndex[ip]
if !has {
return nil, false
}
targs := make([]*models.Target, 0, len(idents))
for _, ident := range idents {
if val, has := tc.targets[ident]; has {
targs = append(targs, val)
}
}
return targs, len(targs) > 0
}
func (tc *TargetCacheType) GetAll() []*models.Target {
tc.RLock()
defer tc.RUnlock()
lst := make([]*models.Target, 0, len(tc.targets))
for _, target := range tc.targets {
lst = append(lst, target)
}
return lst
}
// GetAllBeatTime 返回所有 target 的心跳时间 mapkey 为 identvalue 为 BeatTime
func (tc *TargetCacheType) GetAllBeatTime() map[string]int64 {
tc.RLock()
defer tc.RUnlock()
beatTimeMap := make(map[string]int64, len(tc.targets))
for ident, target := range tc.targets {
beatTimeMap[ident] = target.BeatTime
}
return beatTimeMap
}
// refreshBeatTime 从 Redis 刷新缓存中所有 target 的 BeatTime
func (tc *TargetCacheType) refreshBeatTime() {
if tc.redis == nil {
return
}
// 快照 ident 列表,避免持锁访问 Redis
tc.RLock()
idents := make([]string, 0, len(tc.targets))
for ident := range tc.targets {
idents = append(idents, ident)
}
tc.RUnlock()
if len(idents) == 0 {
return
}
beatTimes := models.FetchBeatTimesFromRedis(tc.redis, idents)
if len(beatTimes) == 0 {
return
}
tc.Lock()
for ident, ts := range beatTimes {
if target, ok := tc.targets[ident]; ok {
target.BeatTime = ts
}
}
tc.Unlock()
}
func (tc *TargetCacheType) Gets(idents []string) []*models.Target {
tc.RLock()
defer tc.RUnlock()
@@ -186,7 +105,7 @@ func (tc *TargetCacheType) GetOffsetHost(targets []*models.Target, now, offset i
continue
}
if now-target.BeatTime > 120 {
if now-target.UpdateAt > 120 {
// means this target is not a active host, do not check offset
continue
}
@@ -228,7 +147,6 @@ func (tc *TargetCacheType) syncTargets() error {
}
if !tc.StatChanged(stat.Total, stat.LastUpdated) {
tc.refreshBeatTime()
tc.stats.GaugeCronDuration.WithLabelValues("sync_targets").Set(0)
tc.stats.GaugeSyncNumber.WithLabelValues("sync_targets").Set(0)
dumper.PutSyncRecord("targets", start.Unix(), -1, -1, "not changed")
@@ -252,9 +170,6 @@ func (tc *TargetCacheType) syncTargets() error {
}
}
// 从 Redis 批量获取心跳时间填充 BeatTime
models.FillTargetsBeatTime(tc.redis, lst)
for i := 0; i < len(lst); i++ {
m[lst[i].Ident] = lst[i]
}
@@ -271,18 +186,57 @@ func (tc *TargetCacheType) syncTargets() error {
// get host update time
func (tc *TargetCacheType) GetHostUpdateTime(targets []string) map[string]int64 {
metaMap := make(map[string]int64)
if tc.redis == nil {
return make(map[string]int64)
return metaMap
}
metaMap := models.FetchBeatTimesFromRedis(tc.redis, targets)
num := 0
var keys []string
for i := 0; i < len(targets); i++ {
keys = append(keys, models.WrapIdentUpdateTime(targets[i]))
num++
if num == 100 {
vals := storage.MGet(context.Background(), tc.redis, keys)
for _, value := range vals {
var hostUpdateTime models.HostUpdateTime
if value == nil {
continue
}
err := json.Unmarshal(value, &hostUpdateTime)
if err != nil {
logger.Errorf("failed to unmarshal host meta: %s value:%v", err, value)
continue
}
metaMap[hostUpdateTime.Ident] = hostUpdateTime.UpdateTime
}
keys = keys[:0]
num = 0
}
}
vals := storage.MGet(context.Background(), tc.redis, keys)
for _, value := range vals {
var hostUpdateTime models.HostUpdateTime
if value == nil {
continue
}
err := json.Unmarshal(value, &hostUpdateTime)
if err != nil {
logger.Warningf("failed to unmarshal host err:%v value:%s", err, string(value))
continue
}
metaMap[hostUpdateTime.Ident] = hostUpdateTime.UpdateTime
}
for _, ident := range targets {
if _, ok := metaMap[ident]; !ok {
// if not exists, get from cache
target, exists := tc.Get(ident)
if exists {
metaMap[ident] = target.BeatTime
metaMap[ident] = target.UpdateAt
}
}
}

View File

@@ -15,8 +15,8 @@ import (
"github.com/ccfos/nightingale/v6/pkg/poster"
"github.com/ccfos/nightingale/v6/pkg/tplx"
"github.com/ccfos/nightingale/v6/pkg/unit"
"github.com/ccfos/nightingale/v6/pkg/ginx"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)

View File

@@ -251,18 +251,6 @@ func AlertHisEventGetById(ctx *ctx.Context, id int64) (*AlertHisEvent, error) {
return AlertHisEventGet(ctx, "id=?", id)
}
func AlertHisEventGetByHash(ctx *ctx.Context, hash string) (*AlertHisEvent, error) {
var lst []*AlertHisEvent
err := DB(ctx).Where("hash = ?", hash).Order("trigger_time desc").Limit(1).Find(&lst).Error
if err != nil {
return nil, err
}
if len(lst) == 0 {
return nil, nil
}
return lst[0], nil
}
func AlertHisEventBatchDelete(ctx *ctx.Context, timestamp int64, severities []int, limit int) (int64, error) {
db := DB(ctx).Where("last_eval_time < ?", timestamp)
if len(severities) > 0 {

View File

@@ -119,7 +119,6 @@ type AlertRule struct {
CurEventCount int64 `json:"cur_event_count" gorm:"-"`
UpdateByNickname string `json:"update_by_nickname" gorm:"-"` // for fe
CronPattern string `json:"cron_pattern"`
TimeZone string `json:"time_zone" gorm:"default:''"` // timezone for alert rule, e.g. "Asia/Shanghai", "UTC", empty for default
NotifyRuleIds []int64 `json:"notify_rule_ids" gorm:"serializer:json"`
PipelineConfigs []PipelineConfig `json:"pipeline_configs" gorm:"serializer:json"`
NotifyVersion int `json:"notify_version"` // 0: old, 1: new
@@ -483,13 +482,6 @@ func (ar *AlertRule) Verify() error {
return errors.New("name is blank")
}
if ar.TimeZone != "" {
_, err := time.LoadLocation(ar.TimeZone)
if err != nil {
return fmt.Errorf("invalid timezone: %s", ar.TimeZone)
}
}
if str.Dangerous(ar.Name) {
return errors.New("Name has invalid characters")
}
@@ -517,16 +509,10 @@ func (ar *AlertRule) Verify() error {
ar.AppendTags = strings.TrimSpace(ar.AppendTags)
arr := strings.Fields(ar.AppendTags)
appendTagKeys := make(map[string]struct{})
for i := 0; i < len(arr); i++ {
if !strings.Contains(arr[i], "=") {
return fmt.Errorf("AppendTags(%s) invalid", arr[i])
}
pair := strings.SplitN(arr[i], "=", 2)
if _, exists := appendTagKeys[pair[0]]; exists {
return fmt.Errorf("AppendTags has duplicate key: %s", pair[0])
}
appendTagKeys[pair[0]] = struct{}{}
}
gids := strings.Fields(ar.NotifyGroups)

View File

@@ -45,7 +45,6 @@ type Datasource struct {
CreatedBy string `json:"created_by"`
UpdatedBy string `json:"updated_by"`
IsDefault bool `json:"is_default"`
Weight int `json:"weight"`
Transport *http.Transport `json:"-" gorm:"-"`
ForceSave bool `json:"force_save" gorm:"-"`
}

View File

@@ -174,7 +174,6 @@ func columnHasIndex(db *gorm.DB, dst interface{}, indexColumn string) bool {
type AlertRule struct {
ExtraConfig string `gorm:"type:text;column:extra_config"`
CronPattern string `gorm:"type:varchar(64);column:cron_pattern"`
TimeZone string `gorm:"type:varchar(64);column:time_zone;not null;default:''"`
DatasourceQueries []models.DatasourceQuery `gorm:"datasource_queries;type:text;serializer:json"` // datasource queries
NotifyRuleIds []int64 `gorm:"column:notify_rule_ids;type:varchar(1024)"`
NotifyVersion int `gorm:"column:notify_version;type:int;default:0"`
@@ -235,7 +234,6 @@ type Target struct {
type Datasource struct {
IsDefault bool `gorm:"column:is_default;type:boolean;comment:is default datasource"`
Identifier string `gorm:"column:identifier;type:varchar(255);default:'';comment:identifier"`
Weight int `gorm:"column:weight;type:int;default:0;comment:weight for sorting"`
}
type Configs struct {

Some files were not shown because too many files have changed in this diff Show More