mirror of
https://github.com/ccfos/nightingale.git
synced 2026-03-13 19:38:59 +00:00
Compare commits
34 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fa49449588 | ||
|
|
876f1d1084 | ||
|
|
678830be37 | ||
|
|
5e30f3a00d | ||
|
|
7f1eefd033 | ||
|
|
c8dd26ca4c | ||
|
|
37c57e66ea | ||
|
|
878e940325 | ||
|
|
cbc715305d | ||
|
|
5011766c70 | ||
|
|
b3ed8a1e8c | ||
|
|
814ded90b6 | ||
|
|
43e89040eb | ||
|
|
3d339fe03c | ||
|
|
7618858912 | ||
|
|
15b4ef8611 | ||
|
|
5083a5cc96 | ||
|
|
d51e83d7d4 | ||
|
|
601d4f0c95 | ||
|
|
90fac12953 | ||
|
|
19d76824d9 | ||
|
|
1341554bbc | ||
|
|
fd3ce338cb | ||
|
|
b8f36ce3cb | ||
|
|
037112a9e6 | ||
|
|
c6e75d31a1 | ||
|
|
bd24f5b056 | ||
|
|
89551c8edb | ||
|
|
042b44940d | ||
|
|
8cd8674848 | ||
|
|
7bb6ac8a03 | ||
|
|
76b35276af | ||
|
|
439a21b784 | ||
|
|
47e70a2dba |
22
.github/workflows/issue-translator.yml
vendored
Normal file
22
.github/workflows/issue-translator.yml
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
name: 'Issue Translator'
|
||||
|
||||
on:
|
||||
issues:
|
||||
types: [opened]
|
||||
|
||||
jobs:
|
||||
translate:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
issues: write
|
||||
contents: read
|
||||
steps:
|
||||
- name: Translate Issues
|
||||
uses: usthe/issues-translate-action@v2.7
|
||||
with:
|
||||
# 是否翻译 issue 标题
|
||||
IS_MODIFY_TITLE: true
|
||||
# GitHub Token
|
||||
BOT_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
# 自定义翻译标注(可选)
|
||||
# CUSTOM_BOT_NOTE: "Translation by bot"
|
||||
10
README.md
10
README.md
@@ -47,7 +47,7 @@ Nightingale itself does not provide monitoring data collection capabilities. We
|
||||
|
||||
For certain edge data centers with poor network connectivity to the central Nightingale server, we offer a distributed deployment mode for the alerting engine. In this mode, even if the network is disconnected, the alerting functionality remains unaffected.
|
||||
|
||||

|
||||

|
||||
|
||||
> In the above diagram, Data Center A has a good network with the central data center, so it uses the Nightingale process in the central data center as the alerting engine. Data Center B has a poor network with the central data center, so it deploys `n9e-edge` as the alerting engine to handle alerting for its own data sources.
|
||||
|
||||
@@ -68,7 +68,7 @@ Then Nightingale is not suitable. It is recommended that you choose on-call prod
|
||||
|
||||
## 🔑 Key Features
|
||||
|
||||

|
||||

|
||||
|
||||
- Nightingale supports alerting rules, mute rules, subscription rules, and notification rules. It natively supports 20 types of notification media and allows customization of message templates.
|
||||
- It supports event pipelines for Pipeline processing of alarms, facilitating automated integration with in-house systems. For example, it can append metadata to alarms or perform relabeling on events.
|
||||
@@ -76,19 +76,19 @@ Then Nightingale is not suitable. It is recommended that you choose on-call prod
|
||||
- Many databases and middleware come with built-in alert rules that can be directly imported and used. It also supports direct import of Prometheus alerting rules.
|
||||
- It supports alerting self-healing, which automatically triggers a script to execute predefined logic after an alarm is generated—such as cleaning up disk space or capturing the current system state.
|
||||
|
||||

|
||||

|
||||
|
||||
- Nightingale archives historical alarms and supports multi-dimensional query and statistics.
|
||||
- It supports flexible aggregation grouping, allowing a clear view of the distribution of alarms across the company.
|
||||
|
||||

|
||||

|
||||
|
||||
- Nightingale has built-in metric descriptions, dashboards, and alerting rules for common operating systems, middleware, and databases, which are contributed by the community with varying quality.
|
||||
- It directly receives data via multiple protocols such as Remote Write, OpenTSDB, Datadog, and Falcon, integrates with various Agents.
|
||||
- It supports data sources like Prometheus, ElasticSearch, Loki, ClickHouse, MySQL, Postgres, allowing alerting based on data from these sources.
|
||||
- Nightingale can be easily embedded into internal enterprise systems (e.g. Grafana, CMDB), and even supports configuring menu visibility for these embedded systems.
|
||||
|
||||

|
||||

|
||||
|
||||
- Nightingale supports dashboard functionality, including common chart types, and comes with pre-built dashboards. The image above is a screenshot of one of these dashboards.
|
||||
- If you are already accustomed to Grafana, it is recommended to continue using Grafana for visualization, as Grafana has deeper expertise in this area.
|
||||
|
||||
@@ -118,7 +118,7 @@ func Start(alertc aconf.Alert, pushgwc pconf.Pushgw, syncStats *memsto.Stats, al
|
||||
eventProcessorCache := memsto.NewEventProcessorCache(ctx, syncStats)
|
||||
|
||||
dp := dispatch.NewDispatch(alertRuleCache, userCache, userGroupCache, alertSubscribeCache, targetCache, notifyConfigCache, taskTplsCache, notifyRuleCache, notifyChannelCache, messageTemplateCache, eventProcessorCache, alertc.Alerting, ctx, alertStats)
|
||||
consumer := dispatch.NewConsumer(alertc.Alerting, ctx, dp, promClients)
|
||||
consumer := dispatch.NewConsumer(alertc.Alerting, ctx, dp, promClients, alertMuteCache)
|
||||
|
||||
notifyRecordComsumer := sender.NewNotifyRecordConsumer(ctx)
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/alert/aconf"
|
||||
"github.com/ccfos/nightingale/v6/alert/common"
|
||||
"github.com/ccfos/nightingale/v6/alert/queue"
|
||||
"github.com/ccfos/nightingale/v6/memsto"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/poster"
|
||||
@@ -26,10 +27,15 @@ type Consumer struct {
|
||||
alerting aconf.Alerting
|
||||
ctx *ctx.Context
|
||||
|
||||
dispatch *Dispatch
|
||||
promClients *prom.PromClientMap
|
||||
dispatch *Dispatch
|
||||
promClients *prom.PromClientMap
|
||||
alertMuteCache *memsto.AlertMuteCacheType
|
||||
}
|
||||
|
||||
type EventMuteHookFunc func(event *models.AlertCurEvent) bool
|
||||
|
||||
var EventMuteHook EventMuteHookFunc = func(event *models.AlertCurEvent) bool { return false }
|
||||
|
||||
func InitRegisterQueryFunc(promClients *prom.PromClientMap) {
|
||||
tplx.RegisterQueryFunc(func(datasourceID int64, promql string) model.Value {
|
||||
if promClients.IsNil(datasourceID) {
|
||||
@@ -43,12 +49,14 @@ func InitRegisterQueryFunc(promClients *prom.PromClientMap) {
|
||||
}
|
||||
|
||||
// 创建一个 Consumer 实例
|
||||
func NewConsumer(alerting aconf.Alerting, ctx *ctx.Context, dispatch *Dispatch, promClients *prom.PromClientMap) *Consumer {
|
||||
func NewConsumer(alerting aconf.Alerting, ctx *ctx.Context, dispatch *Dispatch, promClients *prom.PromClientMap, alertMuteCache *memsto.AlertMuteCacheType) *Consumer {
|
||||
return &Consumer{
|
||||
alerting: alerting,
|
||||
ctx: ctx,
|
||||
dispatch: dispatch,
|
||||
promClients: promClients,
|
||||
|
||||
alertMuteCache: alertMuteCache,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -28,6 +28,8 @@ var ShouldSkipNotify func(*ctx.Context, *models.AlertCurEvent, int64) bool
|
||||
var SendByNotifyRule func(*ctx.Context, *memsto.UserCacheType, *memsto.UserGroupCacheType, *memsto.NotifyChannelCacheType,
|
||||
[]*models.AlertCurEvent, int64, *models.NotifyConfig, *models.NotifyChannelConfig, *models.MessageTemplate)
|
||||
|
||||
var EventProcessorCache *memsto.EventProcessorCacheType
|
||||
|
||||
func init() {
|
||||
ShouldSkipNotify = shouldSkipNotify
|
||||
SendByNotifyRule = SendNotifyRuleMessage
|
||||
@@ -90,6 +92,7 @@ func NewDispatch(alertRuleCache *memsto.AlertRuleCacheType, userCache *memsto.Us
|
||||
}
|
||||
|
||||
pipeline.Init()
|
||||
EventProcessorCache = eventProcessorCache
|
||||
|
||||
// 设置通知记录回调函数
|
||||
notifyChannelCache.SetNotifyRecordFunc(sender.NotifyRecord)
|
||||
@@ -177,39 +180,7 @@ func (e *Dispatch) HandleEventWithNotifyRule(eventOrigin *models.AlertCurEvent)
|
||||
eventCopy.NotifyRuleId = notifyRuleId
|
||||
eventCopy.NotifyRuleName = notifyRule.Name
|
||||
|
||||
var processors []models.Processor
|
||||
for _, pipelineConfig := range notifyRule.PipelineConfigs {
|
||||
if !pipelineConfig.Enable {
|
||||
continue
|
||||
}
|
||||
|
||||
eventPipeline := e.eventProcessorCache.Get(pipelineConfig.PipelineId)
|
||||
if eventPipeline == nil {
|
||||
logger.Warningf("notify_id: %d, event:%+v, processor not found", notifyRuleId, eventCopy)
|
||||
continue
|
||||
}
|
||||
|
||||
if !pipelineApplicable(eventPipeline, eventCopy) {
|
||||
logger.Debugf("notify_id: %d, event:%+v, pipeline_id: %d, not applicable", notifyRuleId, eventCopy, pipelineConfig.PipelineId)
|
||||
continue
|
||||
}
|
||||
|
||||
processors = append(processors, e.eventProcessorCache.GetProcessorsById(pipelineConfig.PipelineId)...)
|
||||
}
|
||||
|
||||
for _, processor := range processors {
|
||||
var res string
|
||||
var err error
|
||||
logger.Infof("before processor notify_id: %d, event:%+v, processor:%+v", notifyRuleId, eventCopy, processor)
|
||||
eventCopy, res, err = processor.Process(e.ctx, eventCopy)
|
||||
if eventCopy == nil {
|
||||
logger.Warningf("after processor notify_id: %d, event:%+v, processor:%+v, event is nil", notifyRuleId, eventCopy, processor)
|
||||
sender.NotifyRecord(e.ctx, []*models.AlertCurEvent{eventOrigin}, notifyRuleId, "", "", res, errors.New("drop by processor"))
|
||||
break
|
||||
}
|
||||
logger.Infof("after processor notify_id: %d, event:%+v, processor:%+v, res:%v, err:%v", notifyRuleId, eventCopy, processor, res, err)
|
||||
}
|
||||
|
||||
eventCopy = HandleEventPipeline(notifyRule.PipelineConfigs, eventOrigin, eventCopy, e.eventProcessorCache, e.ctx, notifyRuleId, "notify_rule")
|
||||
if ShouldSkipNotify(e.ctx, eventCopy, notifyRuleId) {
|
||||
logger.Infof("notify_id: %d, event:%+v, should skip notify", notifyRuleId, eventCopy)
|
||||
continue
|
||||
@@ -257,7 +228,48 @@ func shouldSkipNotify(ctx *ctx.Context, event *models.AlertCurEvent, notifyRuleI
|
||||
return false
|
||||
}
|
||||
|
||||
func pipelineApplicable(pipeline *models.EventPipeline, event *models.AlertCurEvent) bool {
|
||||
func HandleEventPipeline(pipelineConfigs []models.PipelineConfig, eventOrigin, event *models.AlertCurEvent, eventProcessorCache *memsto.EventProcessorCacheType, ctx *ctx.Context, id int64, from string) *models.AlertCurEvent {
|
||||
for _, pipelineConfig := range pipelineConfigs {
|
||||
if !pipelineConfig.Enable {
|
||||
continue
|
||||
}
|
||||
|
||||
eventPipeline := eventProcessorCache.Get(pipelineConfig.PipelineId)
|
||||
if eventPipeline == nil {
|
||||
logger.Warningf("processor_by_%s_id:%d pipeline_id:%d, event pipeline not found, event: %+v", from, id, pipelineConfig.PipelineId, event)
|
||||
continue
|
||||
}
|
||||
|
||||
if !PipelineApplicable(eventPipeline, event) {
|
||||
logger.Debugf("processor_by_%s_id:%d pipeline_id:%d, event pipeline not applicable, event: %+v", from, id, pipelineConfig.PipelineId, event)
|
||||
continue
|
||||
}
|
||||
|
||||
processors := eventProcessorCache.GetProcessorsById(pipelineConfig.PipelineId)
|
||||
for _, processor := range processors {
|
||||
var res string
|
||||
var err error
|
||||
logger.Infof("processor_by_%s_id:%d pipeline_id:%d, before processor:%+v, event: %+v", from, id, pipelineConfig.PipelineId, processor, event)
|
||||
event, res, err = processor.Process(ctx, event)
|
||||
if event == nil {
|
||||
logger.Infof("processor_by_%s_id:%d pipeline_id:%d, event dropped, after processor:%+v, event: %+v", from, id, pipelineConfig.PipelineId, processor, eventOrigin)
|
||||
|
||||
if from == "notify_rule" {
|
||||
// alert_rule 获取不到 eventId 记录没有意义
|
||||
sender.NotifyRecord(ctx, []*models.AlertCurEvent{eventOrigin}, id, "", "", res, fmt.Errorf("processor_by_%s_id:%d pipeline_id:%d, drop by processor", from, id, pipelineConfig.PipelineId))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
logger.Infof("processor_by_%s_id:%d pipeline_id:%d, after processor:%+v, event: %+v, res:%v, err:%v", from, id, pipelineConfig.PipelineId, processor, event, res, err)
|
||||
}
|
||||
}
|
||||
|
||||
event.FE2DB()
|
||||
event.FillTagsMap()
|
||||
return event
|
||||
}
|
||||
|
||||
func PipelineApplicable(pipeline *models.EventPipeline, event *models.AlertCurEvent) bool {
|
||||
if pipeline == nil {
|
||||
return true
|
||||
}
|
||||
@@ -496,7 +508,7 @@ func SendNotifyRuleMessage(ctx *ctx.Context, userCache *memsto.UserCacheType, us
|
||||
start := time.Now()
|
||||
respBody, err := notifyChannel.SendFlashDuty(events, flashDutyChannelIDs[i], notifyChannelCache.GetHttpClient(notifyChannel.ID))
|
||||
respBody = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), respBody)
|
||||
logger.Infof("notify_id: %d, channel_name: %v, event:%+v, IntegrationUrl: %v dutychannel_id: %v, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0], notifyChannel.RequestConfig.FlashDutyRequestConfig.IntegrationUrl, flashDutyChannelIDs[i], respBody, err)
|
||||
logger.Infof("duty_sender notify_id: %d, channel_name: %v, event:%+v, IntegrationUrl: %v dutychannel_id: %v, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0], notifyChannel.RequestConfig.FlashDutyRequestConfig.IntegrationUrl, flashDutyChannelIDs[i], respBody, err)
|
||||
sender.NotifyRecord(ctx, events, notifyRuleId, notifyChannel.Name, strconv.FormatInt(flashDutyChannelIDs[i], 10), respBody, err)
|
||||
}
|
||||
|
||||
@@ -527,7 +539,7 @@ func SendNotifyRuleMessage(ctx *ctx.Context, userCache *memsto.UserCacheType, us
|
||||
start := time.Now()
|
||||
target, res, err := notifyChannel.SendScript(events, tplContent, customParams, sendtos)
|
||||
res = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), res)
|
||||
logger.Infof("notify_id: %d, channel_name: %v, event:%+v, tplContent:%s, customParams:%v, target:%s, res:%s, err:%v", notifyRuleId, notifyChannel.Name, events[0], tplContent, customParams, target, res, err)
|
||||
logger.Infof("script_sender notify_id: %d, channel_name: %v, event:%+v, tplContent:%s, customParams:%v, target:%s, res:%s, err:%v", notifyRuleId, notifyChannel.Name, events[0], tplContent, customParams, target, res, err)
|
||||
sender.NotifyRecord(ctx, events, notifyRuleId, notifyChannel.Name, target, res, err)
|
||||
default:
|
||||
logger.Warningf("notify_id: %d, channel_name: %v, event:%+v send type not found", notifyRuleId, notifyChannel.Name, events[0])
|
||||
|
||||
@@ -286,7 +286,7 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
|
||||
continue
|
||||
}
|
||||
|
||||
if query.VarEnabled {
|
||||
if query.VarEnabled && strings.Contains(query.PromQl, "$") {
|
||||
var anomalyPoints []models.AnomalyPoint
|
||||
if hasLabelLossAggregator(query) || notExactMatch(query) {
|
||||
// 若有聚合函数或非精确匹配则需要先填充变量然后查询,这个方式效率较低
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package mute
|
||||
|
||||
import (
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -153,13 +154,7 @@ func MatchMute(event *models.AlertCurEvent, mute *models.AlertMute, clock ...int
|
||||
|
||||
// 如果不是全局的,判断 匹配的 datasource id
|
||||
if len(mute.DatasourceIdsJson) != 0 && mute.DatasourceIdsJson[0] != 0 && event.DatasourceId != 0 {
|
||||
idm := make(map[int64]struct{}, len(mute.DatasourceIdsJson))
|
||||
for i := 0; i < len(mute.DatasourceIdsJson); i++ {
|
||||
idm[mute.DatasourceIdsJson[i]] = struct{}{}
|
||||
}
|
||||
|
||||
// 判断 event.datasourceId 是否包含在 idm 中
|
||||
if _, has := idm[event.DatasourceId]; !has {
|
||||
if !slices.Contains(mute.DatasourceIdsJson, event.DatasourceId) {
|
||||
return false, errors.New("datasource id not match")
|
||||
}
|
||||
}
|
||||
@@ -198,7 +193,7 @@ func MatchMute(event *models.AlertCurEvent, mute *models.AlertMute, clock ...int
|
||||
return false, errors.New("event severity not match mute severity")
|
||||
}
|
||||
|
||||
if mute.ITags == nil || len(mute.ITags) == 0 {
|
||||
if len(mute.ITags) == 0 {
|
||||
return true, nil
|
||||
}
|
||||
if !common.MatchTags(event.TagsMap, mute.ITags) {
|
||||
|
||||
@@ -26,8 +26,6 @@ import (
|
||||
"github.com/toolkits/pkg/str"
|
||||
)
|
||||
|
||||
type EventMuteHookFunc func(event *models.AlertCurEvent) bool
|
||||
|
||||
type ExternalProcessorsType struct {
|
||||
ExternalLock sync.RWMutex
|
||||
Processors map[string]*Processor
|
||||
@@ -76,7 +74,6 @@ type Processor struct {
|
||||
|
||||
HandleFireEventHook HandleEventFunc
|
||||
HandleRecoverEventHook HandleEventFunc
|
||||
EventMuteHook EventMuteHookFunc
|
||||
|
||||
ScheduleEntry cron.Entry
|
||||
PromEvalInterval int
|
||||
@@ -121,7 +118,6 @@ func NewProcessor(engineName string, rule *models.AlertRule, datasourceId int64,
|
||||
|
||||
HandleFireEventHook: func(event *models.AlertCurEvent) {},
|
||||
HandleRecoverEventHook: func(event *models.AlertCurEvent) {},
|
||||
EventMuteHook: func(event *models.AlertCurEvent) bool { return false },
|
||||
}
|
||||
|
||||
p.mayHandleGroup()
|
||||
@@ -155,9 +151,19 @@ func (p *Processor) Handle(anomalyPoints []models.AnomalyPoint, from string, inh
|
||||
// 如果 event 被 mute 了,本质也是 fire 的状态,这里无论如何都添加到 alertingKeys 中,防止 fire 的事件自动恢复了
|
||||
hash := event.Hash
|
||||
alertingKeys[hash] = struct{}{}
|
||||
|
||||
// event processor
|
||||
eventCopy := event.DeepCopy()
|
||||
event = dispatch.HandleEventPipeline(cachedRule.PipelineConfigs, eventCopy, event, dispatch.EventProcessorCache, p.ctx, cachedRule.Id, "alert_rule")
|
||||
if event == nil {
|
||||
logger.Infof("rule_eval:%s is muted drop by pipeline event:%v", p.Key(), eventCopy)
|
||||
continue
|
||||
}
|
||||
|
||||
// event mute
|
||||
isMuted, detail, muteId := mute.IsMuted(cachedRule, event, p.TargetCache, p.alertMuteCache)
|
||||
if isMuted {
|
||||
logger.Debugf("rule_eval:%s event:%v is muted, detail:%s", p.Key(), event, detail)
|
||||
logger.Infof("rule_eval:%s is muted, detail:%s event:%v", p.Key(), detail, event)
|
||||
p.Stats.CounterMuteTotal.WithLabelValues(
|
||||
fmt.Sprintf("%v", event.GroupName),
|
||||
fmt.Sprintf("%v", p.rule.Id),
|
||||
@@ -167,8 +173,8 @@ func (p *Processor) Handle(anomalyPoints []models.AnomalyPoint, from string, inh
|
||||
continue
|
||||
}
|
||||
|
||||
if p.EventMuteHook(event) {
|
||||
logger.Debugf("rule_eval:%s event:%v is muted by hook", p.Key(), event)
|
||||
if dispatch.EventMuteHook(event) {
|
||||
logger.Infof("rule_eval:%s is muted by hook event:%v", p.Key(), event)
|
||||
p.Stats.CounterMuteTotal.WithLabelValues(
|
||||
fmt.Sprintf("%v", event.GroupName),
|
||||
fmt.Sprintf("%v", p.rule.Id),
|
||||
|
||||
@@ -25,6 +25,7 @@ func (rt *Router) pushEventToQueue(c *gin.Context) {
|
||||
if event.RuleId == 0 {
|
||||
ginx.Bomb(200, "event is illegal")
|
||||
}
|
||||
event.FE2DB()
|
||||
|
||||
event.TagsMap = make(map[string]string)
|
||||
for i := 0; i < len(event.TagsJSON); i++ {
|
||||
@@ -40,7 +41,7 @@ func (rt *Router) pushEventToQueue(c *gin.Context) {
|
||||
|
||||
event.TagsMap[arr[0]] = arr[1]
|
||||
}
|
||||
hit, _ := mute.EventMuteStrategy(event, rt.AlertMuteCache)
|
||||
hit, _ := mute.EventMuteStrategy(event, rt.AlertMuteCache)
|
||||
if hit {
|
||||
logger.Infof("event_muted: rule_id=%d %s", event.RuleId, event.Hash)
|
||||
ginx.NewRender(c).Message(nil)
|
||||
|
||||
@@ -331,6 +331,30 @@ func (b *BuiltinPayloadInFileType) AddBuiltinPayload(bp *models.BuiltinPayload)
|
||||
b.IndexData[bp.UUID] = bp
|
||||
}
|
||||
|
||||
func (b *BuiltinPayloadInFileType) GetComponentIdentByCate(typ, cate string) string {
|
||||
|
||||
for _, source := range b.Data {
|
||||
if source == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
typeMap, exists := source[typ]
|
||||
if !exists {
|
||||
continue
|
||||
}
|
||||
|
||||
payloads, exists := typeMap[cate]
|
||||
if !exists {
|
||||
continue
|
||||
}
|
||||
|
||||
if len(payloads) > 0 {
|
||||
return payloads[0].Component
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (b *BuiltinPayloadInFileType) GetBuiltinPayload(typ, cate, query string, componentId uint64) ([]*models.BuiltinPayload, error) {
|
||||
|
||||
var result []*models.BuiltinPayload
|
||||
|
||||
@@ -628,6 +628,7 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
service.GET("/recording-rules", rt.recordingRuleGetsByService)
|
||||
|
||||
service.GET("/alert-mutes", rt.alertMuteGets)
|
||||
service.GET("/active-alert-mutes", rt.activeAlertMuteGets)
|
||||
service.POST("/alert-mutes", rt.alertMuteAddByService)
|
||||
service.DELETE("/alert-mutes", rt.alertMuteDel)
|
||||
|
||||
@@ -680,6 +681,10 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
// 手机号加密存储配置接口
|
||||
service.POST("/users/phone/encrypt", rt.usersPhoneEncrypt)
|
||||
service.POST("/users/phone/decrypt", rt.usersPhoneDecrypt)
|
||||
service.POST("/users/phone/refresh-encryption-config", rt.usersPhoneDecryptRefresh)
|
||||
|
||||
service.GET("/builtin-components", rt.builtinComponentsGets)
|
||||
service.GET("/builtin-payloads", rt.builtinPayloadsGets)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -107,9 +107,20 @@ func (rt *Router) logoutPost(c *gin.Context) {
|
||||
|
||||
var logoutAddr string
|
||||
user := c.MustGet("user").(*models.User)
|
||||
|
||||
// 获取用户的 id_token
|
||||
idToken, err := rt.fetchIdToken(c.Request.Context(), user.Id)
|
||||
if err != nil {
|
||||
logger.Debugf("fetch id_token failed: %v, user_id: %d", err, user.Id)
|
||||
idToken = "" // 如果获取失败,使用空字符串
|
||||
}
|
||||
|
||||
// 删除 id_token
|
||||
rt.deleteIdToken(c.Request.Context(), user.Id)
|
||||
|
||||
switch user.Belong {
|
||||
case "oidc":
|
||||
logoutAddr = rt.Sso.OIDC.GetSsoLogoutAddr()
|
||||
logoutAddr = rt.Sso.OIDC.GetSsoLogoutAddr(idToken)
|
||||
case "cas":
|
||||
logoutAddr = rt.Sso.CAS.GetSsoLogoutAddr()
|
||||
case "oauth2":
|
||||
@@ -199,6 +210,14 @@ func (rt *Router) refreshPost(c *gin.Context) {
|
||||
ginx.Dangerous(err)
|
||||
ginx.Dangerous(rt.createAuth(c.Request.Context(), userIdentity, ts))
|
||||
|
||||
// 延长 id_token 的过期时间,使其与新的 refresh token 生命周期保持一致
|
||||
// 注意:这里不会获取新的 id_token,只是延长 Redis 中现有 id_token 的 TTL
|
||||
if idToken, err := rt.fetchIdToken(c.Request.Context(), userid); err == nil && idToken != "" {
|
||||
if err := rt.saveIdToken(c.Request.Context(), userid, idToken); err != nil {
|
||||
logger.Debugf("refresh id_token ttl failed: %v, user_id: %d", err, userid)
|
||||
}
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(gin.H{
|
||||
"access_token": ts.AccessToken,
|
||||
"refresh_token": ts.RefreshToken,
|
||||
@@ -286,6 +305,13 @@ func (rt *Router) loginCallback(c *gin.Context) {
|
||||
ginx.Dangerous(err)
|
||||
ginx.Dangerous(rt.createAuth(c.Request.Context(), userIdentity, ts))
|
||||
|
||||
// 保存 id_token 到 Redis,用于登出时使用
|
||||
if ret.IdToken != "" {
|
||||
if err := rt.saveIdToken(c.Request.Context(), user.Id, ret.IdToken); err != nil {
|
||||
logger.Errorf("save id_token failed: %v, user_id: %d", err, user.Id)
|
||||
}
|
||||
}
|
||||
|
||||
redirect := "/"
|
||||
if ret.Redirect != "/login" {
|
||||
redirect = ret.Redirect
|
||||
|
||||
@@ -20,7 +20,8 @@ func (rt *Router) alertMuteGetsByBG(c *gin.Context) {
|
||||
bgid := ginx.UrlParamInt64(c, "id")
|
||||
prods := strings.Fields(ginx.QueryStr(c, "prods", ""))
|
||||
query := ginx.QueryStr(c, "query", "")
|
||||
lst, err := models.AlertMuteGets(rt.Ctx, prods, bgid, -1, query)
|
||||
expired := ginx.QueryInt(c, "expired", -1)
|
||||
lst, err := models.AlertMuteGets(rt.Ctx, prods, bgid, -1, expired, query)
|
||||
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
@@ -55,11 +56,17 @@ func (rt *Router) alertMuteGets(c *gin.Context) {
|
||||
bgid := ginx.QueryInt64(c, "bgid", -1)
|
||||
query := ginx.QueryStr(c, "query", "")
|
||||
disabled := ginx.QueryInt(c, "disabled", -1)
|
||||
lst, err := models.AlertMuteGets(rt.Ctx, prods, bgid, disabled, query)
|
||||
expired := ginx.QueryInt(c, "expired", -1)
|
||||
lst, err := models.AlertMuteGets(rt.Ctx, prods, bgid, disabled, expired, query)
|
||||
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
|
||||
func (rt *Router) activeAlertMuteGets(c *gin.Context) {
|
||||
lst, err := models.AlertMuteGetsAll(rt.Ctx)
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
|
||||
func (rt *Router) alertMuteAdd(c *gin.Context) {
|
||||
|
||||
var f models.AlertMute
|
||||
@@ -69,7 +76,9 @@ func (rt *Router) alertMuteAdd(c *gin.Context) {
|
||||
f.CreateBy = username
|
||||
f.UpdateBy = username
|
||||
f.GroupId = ginx.UrlParamInt64(c, "id")
|
||||
ginx.NewRender(c).Message(f.Add(rt.Ctx))
|
||||
|
||||
ginx.Dangerous(f.Add(rt.Ctx))
|
||||
ginx.NewRender(c).Data(f.Id, nil)
|
||||
}
|
||||
|
||||
type MuteTestForm struct {
|
||||
|
||||
@@ -453,6 +453,30 @@ func (rt *Router) wrapJwtKey(key string) string {
|
||||
return rt.HTTP.JWTAuth.RedisKeyPrefix + key
|
||||
}
|
||||
|
||||
func (rt *Router) wrapIdTokenKey(userId int64) string {
|
||||
return fmt.Sprintf("n9e_id_token_%d", userId)
|
||||
}
|
||||
|
||||
// saveIdToken 保存用户的 id_token 到 Redis
|
||||
func (rt *Router) saveIdToken(ctx context.Context, userId int64, idToken string) error {
|
||||
if idToken == "" {
|
||||
return nil
|
||||
}
|
||||
// id_token 的过期时间应该与 RefreshToken 保持一致,确保在整个会话期间都可用于登出
|
||||
expiration := time.Minute * time.Duration(rt.HTTP.JWTAuth.RefreshExpired)
|
||||
return rt.Redis.Set(ctx, rt.wrapIdTokenKey(userId), idToken, expiration).Err()
|
||||
}
|
||||
|
||||
// fetchIdToken 从 Redis 获取用户的 id_token
|
||||
func (rt *Router) fetchIdToken(ctx context.Context, userId int64) (string, error) {
|
||||
return rt.Redis.Get(ctx, rt.wrapIdTokenKey(userId)).Result()
|
||||
}
|
||||
|
||||
// deleteIdToken 从 Redis 删除用户的 id_token
|
||||
func (rt *Router) deleteIdToken(ctx context.Context, userId int64) error {
|
||||
return rt.Redis.Del(ctx, rt.wrapIdTokenKey(userId)).Err()
|
||||
}
|
||||
|
||||
type TokenDetails struct {
|
||||
AccessToken string
|
||||
RefreshToken string
|
||||
|
||||
@@ -235,6 +235,16 @@ func (rt *Router) userDel(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
// 如果要删除的用户是 admin 角色,检查是否是最后一个 admin
|
||||
if target.IsAdmin() {
|
||||
adminCount, err := models.CountAdminUsers(rt.Ctx)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if adminCount <= 1 {
|
||||
ginx.Bomb(http.StatusBadRequest, "Cannot delete the last admin user")
|
||||
}
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Message(target.Del(rt.Ctx))
|
||||
}
|
||||
|
||||
@@ -345,6 +355,16 @@ func (rt *Router) usersPhoneEncrypt(c *gin.Context) {
|
||||
}, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) usersPhoneDecryptRefresh(c *gin.Context) {
|
||||
err := models.RefreshPhoneEncryptionCache(rt.Ctx)
|
||||
if err != nil {
|
||||
ginx.NewRender(c).Message(fmt.Errorf("refresh phone encryption cache failed: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Message(nil)
|
||||
}
|
||||
|
||||
// usersPhoneDecrypt 统一手机号解密
|
||||
func (rt *Router) usersPhoneDecrypt(c *gin.Context) {
|
||||
// 先关闭手机号加密功能
|
||||
|
||||
@@ -27,6 +27,40 @@ func (rt *Router) userGroupGets(c *gin.Context) {
|
||||
|
||||
me := c.MustGet("user").(*models.User)
|
||||
lst, err := me.UserGroups(rt.Ctx, limit, query)
|
||||
if err != nil {
|
||||
ginx.Dangerous(err)
|
||||
}
|
||||
|
||||
allBusiGroups, err := models.BusiGroupGetAll(rt.Ctx)
|
||||
if err != nil {
|
||||
ginx.Dangerous(err)
|
||||
}
|
||||
|
||||
var allMembers []models.BusiGroupMember
|
||||
if err = models.DB(rt.Ctx).Model(&models.BusiGroupMember{}).Find(&allMembers).Error; err != nil {
|
||||
ginx.Dangerous(err)
|
||||
}
|
||||
|
||||
busiGroupMap := make(map[int64]*models.BusiGroup)
|
||||
for _, bg := range allBusiGroups {
|
||||
busiGroupMap[bg.Id] = bg
|
||||
}
|
||||
|
||||
userGroupToBusiGroupsMap := make(map[int64][]*models.BusiGroupRes)
|
||||
for _, member := range allMembers {
|
||||
if bg, ok := busiGroupMap[member.BusiGroupId]; ok {
|
||||
userGroupToBusiGroupsMap[member.UserGroupId] = append(userGroupToBusiGroupsMap[member.UserGroupId], &models.BusiGroupRes{
|
||||
Id: bg.Id,
|
||||
Name: bg.Name,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
for i := 0; i < len(lst); i++ {
|
||||
if busiGroups, ok := userGroupToBusiGroupsMap[lst[i].Id]; ok {
|
||||
lst[i].BusiGroupsRes = busiGroups
|
||||
}
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
|
||||
@@ -441,10 +441,32 @@ func QueryData(ctx context.Context, queryParam interface{}, cliTimeout int64, ve
|
||||
Field(param.DateField).
|
||||
MinDocCount(1)
|
||||
|
||||
if strings.HasPrefix(version, "7") {
|
||||
versionParts := strings.Split(version, ".")
|
||||
major := 0
|
||||
if len(versionParts) > 0 {
|
||||
if m, err := strconv.Atoi(versionParts[0]); err == nil {
|
||||
major = m
|
||||
}
|
||||
}
|
||||
minor := 0
|
||||
if len(versionParts) > 1 {
|
||||
if m, err := strconv.Atoi(versionParts[1]); err == nil {
|
||||
minor = m
|
||||
}
|
||||
}
|
||||
|
||||
if major >= 7 {
|
||||
// 添加偏移量,使第一个分桶bucket的左边界对齐为 start 时间
|
||||
offset := (start % param.Interval) - param.Interval
|
||||
tsAggr.FixedInterval(fmt.Sprintf("%ds", param.Interval)).Offset(fmt.Sprintf("%ds", offset))
|
||||
|
||||
// 使用 fixed_interval 的条件:ES 7.2+ 或者任何 major > 7(例如 ES8)
|
||||
if (major > 7) || (major == 7 && minor >= 2) {
|
||||
// ES 7.2+ 以及 ES8+ 使用 fixed_interval
|
||||
tsAggr.FixedInterval(fmt.Sprintf("%ds", param.Interval)).Offset(fmt.Sprintf("%ds", offset))
|
||||
} else {
|
||||
// 7.0-7.1 使用 interval(带 offset)
|
||||
tsAggr.Interval(fmt.Sprintf("%ds", param.Interval)).Offset(fmt.Sprintf("%ds", offset))
|
||||
}
|
||||
} else {
|
||||
// 兼容 7.0 以下的版本
|
||||
// OpenSearch 也使用这个字段
|
||||
|
||||
@@ -106,6 +106,29 @@ func (e *Elasticsearch) InitClient() error {
|
||||
options = append(options, elastic.SetHealthcheck(false))
|
||||
|
||||
e.Client, err = elastic.NewClient(options...)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if e.Client != nil {
|
||||
for _, addr := range e.Nodes {
|
||||
if addr == "" {
|
||||
continue
|
||||
}
|
||||
if ver, verr := e.Client.ElasticsearchVersion(addr); verr == nil {
|
||||
logger.Infof("detected elasticsearch version from %s: %s", addr, ver)
|
||||
e.Version = ver
|
||||
e.Addr = addr
|
||||
break
|
||||
} else {
|
||||
logger.Debugf("detect version failed from %s: %v", addr, verr)
|
||||
}
|
||||
}
|
||||
if e.Version == "" {
|
||||
logger.Warning("failed to detect elasticsearch version from configured nodes, keep configured version")
|
||||
}
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -183,7 +206,6 @@ func (e *Elasticsearch) MakeTSQuery(ctx context.Context, query interface{}, even
|
||||
}
|
||||
|
||||
func (e *Elasticsearch) QueryData(ctx context.Context, queryParam interface{}) ([]models.DataResp, error) {
|
||||
|
||||
search := func(ctx context.Context, indices []string, source interface{}, timeout int, maxShard int) (*elastic.SearchResult, error) {
|
||||
return e.Client.Search().
|
||||
Index(indices...).
|
||||
@@ -193,7 +215,6 @@ func (e *Elasticsearch) QueryData(ctx context.Context, queryParam interface{}) (
|
||||
MaxConcurrentShardRequests(maxShard).
|
||||
Do(ctx)
|
||||
}
|
||||
|
||||
return eslike.QueryData(ctx, queryParam, e.Timeout, e.Version, search)
|
||||
}
|
||||
|
||||
|
||||
BIN
doc/img/readme/active-events-en.png
Normal file
BIN
doc/img/readme/active-events-en.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 481 KiB |
BIN
doc/img/readme/alerting-rules-en.png
Normal file
BIN
doc/img/readme/alerting-rules-en.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 508 KiB |
BIN
doc/img/readme/dashboard-en.png
Normal file
BIN
doc/img/readme/dashboard-en.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 386 KiB |
BIN
doc/img/readme/integration-components-en.png
Normal file
BIN
doc/img/readme/integration-components-en.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 424 KiB |
BIN
doc/img/readme/multi-region-arch.png
Normal file
BIN
doc/img/readme/multi-region-arch.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 78 KiB |
@@ -89,8 +89,6 @@ MaxLifetime = 7200
|
||||
MaxOpenConns = 150
|
||||
# max idle connections
|
||||
MaxIdleConns = 50
|
||||
# enable auto migrate or not
|
||||
# EnableAutoMigrate = false
|
||||
|
||||
[Redis]
|
||||
# address, ip:port or ip1:port,ip2:port for cluster and sentinel(SentinelAddrs)
|
||||
|
||||
@@ -86,8 +86,6 @@ MaxLifetime = 7200
|
||||
MaxOpenConns = 150
|
||||
# max idle connections
|
||||
MaxIdleConns = 50
|
||||
# enable auto migrate or not
|
||||
# EnableAutoMigrate = false
|
||||
|
||||
[Redis]
|
||||
# address, ip:port or ip1:port,ip2:port for cluster and sentinel(SentinelAddrs)
|
||||
|
||||
@@ -86,8 +86,6 @@ MaxLifetime = 7200
|
||||
MaxOpenConns = 150
|
||||
# max idle connections
|
||||
MaxIdleConns = 50
|
||||
# enable auto migrate or not
|
||||
# EnableAutoMigrate = false
|
||||
|
||||
[Redis]
|
||||
# address, ip:port or ip1:port,ip2:port for cluster and sentinel(SentinelAddrs)
|
||||
|
||||
@@ -90,8 +90,6 @@ MaxLifetime = 7200
|
||||
MaxOpenConns = 150
|
||||
# max idle connections
|
||||
MaxIdleConns = 50
|
||||
# enable auto migrate or not
|
||||
# EnableAutoMigrate = false
|
||||
|
||||
[Redis]
|
||||
# address, ip:port or ip1:port,ip2:port for cluster and sentinel(SentinelAddrs)
|
||||
|
||||
@@ -272,8 +272,6 @@ CREATE TABLE `source_token` (
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
|
||||
|
||||
|
||||
/* Add translation column for builtin metrics */
|
||||
ALTER TABLE `builtin_metrics` ADD COLUMN `translation` TEXT COMMENT 'translation of metric' AFTER `lang`;
|
||||
|
||||
/* v8.0.0-beta.12 2025-06-03 */
|
||||
ALTER TABLE `alert_his_event` ADD COLUMN `notify_rule_ids` text COMMENT 'notify rule ids';
|
||||
@@ -283,3 +281,11 @@ ALTER TABLE `alert_cur_event` ADD COLUMN `notify_rule_ids` text COMMENT 'notify
|
||||
-- 删除 builtin_metrics 表的 idx_collector_typ_name 唯一索引
|
||||
DROP INDEX IF EXISTS `idx_collector_typ_name` ON `builtin_metrics`;
|
||||
|
||||
/* v8.0.0 2025-07-03 */
|
||||
ALTER TABLE `builtin_metrics` ADD COLUMN `translation` TEXT COMMENT 'translation of metric' AFTER `lang`;
|
||||
|
||||
/* v8.4.0 2025-10-15 */
|
||||
ALTER TABLE `notify_rule` ADD COLUMN `extra_config` text COMMENT 'extra config';
|
||||
|
||||
/* v8.4.1 2025-11-10 */
|
||||
ALTER TABLE `alert_rule` ADD COLUMN `pipeline_configs` text COMMENT 'pipeline configs';
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@@ -117,7 +118,8 @@ func (p *PostgreSQL) NewConn(ctx context.Context, database string) (*gorm.DB, er
|
||||
}()
|
||||
|
||||
// Simplified connection logic for PostgreSQL
|
||||
dsn := fmt.Sprintf("postgres://%s:%s@%s/%s?sslmode=disable&TimeZone=Asia/Shanghai", p.Shard.User, p.Shard.Password, p.Shard.Addr, database)
|
||||
dsn := fmt.Sprintf("postgres://%s:%s@%s/%s?sslmode=disable&TimeZone=Asia/Shanghai", url.QueryEscape(p.Shard.User), url.QueryEscape(p.Shard.Password), p.Shard.Addr, database)
|
||||
|
||||
db, err = sqlbase.NewDB(
|
||||
ctx,
|
||||
postgres.Open(dsn),
|
||||
|
||||
@@ -90,8 +90,6 @@ MaxLifetime = 7200
|
||||
MaxOpenConns = 150
|
||||
# max idle connections
|
||||
MaxIdleConns = 50
|
||||
# enable auto migrate or not
|
||||
# EnableAutoMigrate = false
|
||||
|
||||
[Redis]
|
||||
# address, ip:port or ip1:port,ip2:port for cluster and sentinel(SentinelAddrs)
|
||||
|
||||
2
go.mod
2
go.mod
@@ -161,4 +161,6 @@ require (
|
||||
|
||||
replace golang.org/x/exp v0.0.0-20231006140011-7918f672742d => golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1
|
||||
|
||||
replace github.com/olivere/elastic/v7 => github.com/n9e/elastic/v7 v7.0.33-0.20251031061708-f480a2dfcfa7
|
||||
|
||||
// replace github.com/flashcatcloud/ibex => ../github.com/flashcatcloud/ibex
|
||||
|
||||
4
go.sum
4
go.sum
@@ -259,10 +259,10 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
|
||||
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU=
|
||||
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
|
||||
github.com/n9e/elastic/v7 v7.0.33-0.20251031061708-f480a2dfcfa7 h1:fPs1GClmnQZ6E/nzrJCieQKJNM46eqMkHaBg3SoHcgY=
|
||||
github.com/n9e/elastic/v7 v7.0.33-0.20251031061708-f480a2dfcfa7/go.mod h1:/kVskIy0Pd8nAiKtPtcI4XnzOM+pM6MWQ+zP6YqPVFI=
|
||||
github.com/oklog/ulid v1.3.1 h1:EGfNDEx6MqHz8B3uNV6QAib1UR2Lm97sHi3ocA6ESJ4=
|
||||
github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U=
|
||||
github.com/olivere/elastic/v7 v7.0.32 h1:R7CXvbu8Eq+WlsLgxmKVKPox0oOwAE/2T9Si5BnvK6E=
|
||||
github.com/olivere/elastic/v7 v7.0.32/go.mod h1:c7PVmLe3Fxq77PIfY/bZmxY/TAamBhCzZ8xDOE09a9k=
|
||||
github.com/opensearch-project/opensearch-go/v2 v2.3.0 h1:nQIEMr+A92CkhHrZgUhcfsrZjibvB3APXf2a1VwCmMQ=
|
||||
github.com/opensearch-project/opensearch-go/v2 v2.3.0/go.mod h1:8LDr9FCgUTVoT+5ESjc2+iaZuldqE+23Iq0r1XeNue8=
|
||||
github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc=
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,4 +1,5 @@
|
||||
{
|
||||
[
|
||||
{
|
||||
"name": "JMX - Kubernetes",
|
||||
"tags": "Prometheus JMX Kubernetes",
|
||||
"configs": {
|
||||
@@ -1870,4 +1871,5 @@
|
||||
"version": "3.0.0"
|
||||
},
|
||||
"uuid": 1755595969673000
|
||||
}
|
||||
}
|
||||
]
|
||||
@@ -42,6 +42,11 @@ func (c *CvalCache) initSyncConfigs() {
|
||||
log.Fatalln("failed to sync configs:", err)
|
||||
}
|
||||
|
||||
err = models.RefreshPhoneEncryptionCache(c.ctx)
|
||||
if err != nil {
|
||||
logger.Errorf("failed to refresh phone encryption cache: %v", err)
|
||||
}
|
||||
|
||||
go c.loopSyncConfigs()
|
||||
}
|
||||
|
||||
|
||||
@@ -284,7 +284,7 @@ func (ncc *NotifyChannelCacheType) processNotifyTask(task *NotifyTask) {
|
||||
start := time.Now()
|
||||
resp, err := task.NotifyChannel.SendHTTP(task.Events, task.TplContent, task.CustomParams, task.Sendtos, httpClient)
|
||||
resp = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), resp)
|
||||
logger.Infof("notify_id: %d, channel_name: %v, event:%+v, tplContent:%v, customParams:%v, userInfo:%+v, respBody: %v, err: %v",
|
||||
logger.Infof("http_sendernotify_id: %d, channel_name: %v, event:%+v, tplContent:%v, customParams:%v, userInfo:%+v, respBody: %v, err: %v",
|
||||
task.NotifyRuleId, task.NotifyChannel.Name, task.Events[0], task.TplContent, task.CustomParams, task.Sendtos, resp, err)
|
||||
|
||||
// 调用通知记录回调函数
|
||||
@@ -296,7 +296,7 @@ func (ncc *NotifyChannelCacheType) processNotifyTask(task *NotifyTask) {
|
||||
start := time.Now()
|
||||
resp, err := task.NotifyChannel.SendHTTP(task.Events, task.TplContent, task.CustomParams, []string{task.Sendtos[i]}, httpClient)
|
||||
resp = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), resp)
|
||||
logger.Infof("notify_id: %d, channel_name: %v, event:%+v, tplContent:%v, customParams:%v, userInfo:%+v, respBody: %v, err: %v",
|
||||
logger.Infof("http_sender notify_id: %d, channel_name: %v, event:%+v, tplContent:%v, customParams:%v, userInfo:%+v, respBody: %v, err: %v",
|
||||
task.NotifyRuleId, task.NotifyChannel.Name, task.Events[0], task.TplContent, task.CustomParams, task.Sendtos[i], resp, err)
|
||||
|
||||
// 调用通知记录回调函数
|
||||
|
||||
@@ -498,6 +498,23 @@ func (e *AlertCurEvent) FE2DB() {
|
||||
|
||||
}
|
||||
|
||||
func (e *AlertCurEvent) FillTagsMap() {
|
||||
e.TagsMap = make(map[string]string)
|
||||
for i := 0; i < len(e.TagsJSON); i++ {
|
||||
pair := strings.TrimSpace(e.TagsJSON[i])
|
||||
if pair == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
arr := strings.SplitN(pair, "=", 2)
|
||||
if len(arr) != 2 {
|
||||
continue
|
||||
}
|
||||
|
||||
e.TagsMap[arr[0]] = arr[1]
|
||||
}
|
||||
}
|
||||
|
||||
func (e *AlertCurEvent) DB2Mem() {
|
||||
e.IsRecovered = false
|
||||
e.NotifyGroupsJSON = strings.Fields(e.NotifyGroups)
|
||||
|
||||
@@ -230,7 +230,7 @@ func AlertMuteGet(ctx *ctx.Context, where string, args ...interface{}) (*AlertMu
|
||||
return lst[0], err
|
||||
}
|
||||
|
||||
func AlertMuteGets(ctx *ctx.Context, prods []string, bgid int64, disabled int, query string) (lst []AlertMute, err error) {
|
||||
func AlertMuteGets(ctx *ctx.Context, prods []string, bgid int64, disabled int, expired int, query string) (lst []AlertMute, err error) {
|
||||
session := DB(ctx)
|
||||
|
||||
if bgid != -1 {
|
||||
@@ -249,6 +249,15 @@ func AlertMuteGets(ctx *ctx.Context, prods []string, bgid int64, disabled int, q
|
||||
}
|
||||
}
|
||||
|
||||
if expired != -1 {
|
||||
now := time.Now().Unix()
|
||||
if expired == 1 {
|
||||
session = session.Where("mute_time_type = ? AND etime < ?", TimeRange, now)
|
||||
} else {
|
||||
session = session.Where("(mute_time_type = ? AND etime >= ?) OR mute_time_type = ?", TimeRange, now, Periodic)
|
||||
}
|
||||
}
|
||||
|
||||
if query != "" {
|
||||
arr := strings.Fields(query)
|
||||
for i := 0; i < len(arr); i++ {
|
||||
@@ -478,7 +487,7 @@ func AlertMuteGetsAll(ctx *ctx.Context) ([]*AlertMute, error) {
|
||||
// get my cluster's mutes
|
||||
var lst []*AlertMute
|
||||
if !ctx.IsCenter {
|
||||
lst, err := poster.GetByUrls[[]*AlertMute](ctx, "/v1/n9e/alert-mutes?disabled=0")
|
||||
lst, err := poster.GetByUrls[[]*AlertMute](ctx, "/v1/n9e/active-alert-mutes")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -490,6 +499,10 @@ func AlertMuteGetsAll(ctx *ctx.Context) ([]*AlertMute, error) {
|
||||
|
||||
session := DB(ctx).Model(&AlertMute{}).Where("disabled = 0")
|
||||
|
||||
// 只筛选在生效时间内的屏蔽规则, 这里 btime < now+10 是为了避免同步期间有规则满足了生效时间条件
|
||||
now := time.Now().Unix()
|
||||
session = session.Where("(mute_time_type = ? AND btime <= ? AND etime >= ?) OR mute_time_type = ?", TimeRange, now+10, now, Periodic)
|
||||
|
||||
err := session.Find(&lst).Error
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
||||
@@ -119,6 +119,7 @@ type AlertRule struct {
|
||||
UpdateByNickname string `json:"update_by_nickname" gorm:"-"` // for fe
|
||||
CronPattern string `json:"cron_pattern"`
|
||||
NotifyRuleIds []int64 `json:"notify_rule_ids" gorm:"serializer:json"`
|
||||
PipelineConfigs []PipelineConfig `json:"pipeline_configs" gorm:"serializer:json"`
|
||||
NotifyVersion int `json:"notify_version"` // 0: old, 1: new
|
||||
}
|
||||
|
||||
|
||||
@@ -172,6 +172,7 @@ type AlertRule struct {
|
||||
DatasourceQueries []models.DatasourceQuery `gorm:"datasource_queries;type:text;serializer:json"` // datasource queries
|
||||
NotifyRuleIds []int64 `gorm:"column:notify_rule_ids;type:varchar(1024)"`
|
||||
NotifyVersion int `gorm:"column:notify_version;type:int;default:0"`
|
||||
PipelineConfigs []models.PipelineConfig `gorm:"column:pipeline_configs;type:text;serializer:json"`
|
||||
}
|
||||
|
||||
type AlertSubscribe struct {
|
||||
@@ -259,7 +260,7 @@ type BoardBusigroup struct {
|
||||
}
|
||||
|
||||
type Users struct {
|
||||
Belong string `gorm:"column:belong;varchar(16);default:'';comment:belong"`
|
||||
Belong string `gorm:"column:belong;type:varchar(16);default:'';comment:belong"`
|
||||
LastActiveTime int64 `gorm:"column:last_active_time;type:int;default:0;comment:last_active_time"`
|
||||
Phone string `gorm:"column:phone;type:varchar(1024);not null;default:''"`
|
||||
}
|
||||
|
||||
@@ -545,7 +545,7 @@ func (ncc *NotifyChannelConfig) SendHTTP(events []*AlertCurEvent, tpl map[string
|
||||
if err != nil {
|
||||
logger.Errorf("send_http: failed to send http notify. url=%s request_body=%s error=%v", url, string(body), err)
|
||||
lastErrorMessage = err.Error()
|
||||
time.Sleep(time.Duration(httpConfig.RetryInterval) * time.Second)
|
||||
time.Sleep(time.Duration(httpConfig.RetryInterval) * time.Millisecond)
|
||||
continue
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
@@ -49,11 +49,11 @@ func ConvertAlert(rule PromRule, interval string, datasouceQueries []DatasourceQ
|
||||
appendTags = append(appendTags, fmt.Sprintf("%s=%s", strings.ReplaceAll(k, " ", ""), strings.ReplaceAll(v, " ", "")))
|
||||
} else {
|
||||
switch v {
|
||||
case "critical":
|
||||
case "critical", "Critical", "CRITICAL", "error", "Error", "ERROR", "fatal", "Fatal", "FATAL", "page", "Page", "PAGE", "sev1", "SEV1", "Severity1", "severity1", "SEVERITY1":
|
||||
severity = 1
|
||||
case "warning":
|
||||
case "warning", "Warning", "WARNING", "warn", "Warn", "WARN", "sev2", "SEV2", "Severity2", "severity2", "SEVERITY2":
|
||||
severity = 2
|
||||
case "info":
|
||||
case "info", "Info", "INFO", "notice", "Notice", "NOTICE", "sev3", "SEV3", "Severity3", "severity3", "SEVERITY3":
|
||||
severity = 3
|
||||
}
|
||||
ruleName += "-" + v
|
||||
|
||||
@@ -369,6 +369,12 @@ func UserGetById(ctx *ctx.Context, id int64) (*User, error) {
|
||||
return UserGet(ctx, "id=?", id)
|
||||
}
|
||||
|
||||
func CountAdminUsers(ctx *ctx.Context) (int64, error) {
|
||||
var count int64
|
||||
err := DB(ctx).Model(&User{}).Where("roles LIKE ?", "%"+AdminRole+"%").Count(&count).Error
|
||||
return count, err
|
||||
}
|
||||
|
||||
func UsersGetByGroupIds(ctx *ctx.Context, groupIds []int64) ([]User, error) {
|
||||
if len(groupIds) == 0 {
|
||||
return nil, nil
|
||||
|
||||
@@ -12,15 +12,16 @@ import (
|
||||
)
|
||||
|
||||
type UserGroup struct {
|
||||
Id int64 `json:"id" gorm:"primaryKey"`
|
||||
Name string `json:"name"`
|
||||
Note string `json:"note"`
|
||||
CreateAt int64 `json:"create_at"`
|
||||
CreateBy string `json:"create_by"`
|
||||
UpdateAt int64 `json:"update_at"`
|
||||
UpdateBy string `json:"update_by"`
|
||||
UserIds []int64 `json:"-" gorm:"-"`
|
||||
Users []User `json:"users" gorm:"-"`
|
||||
Id int64 `json:"id" gorm:"primaryKey"`
|
||||
Name string `json:"name"`
|
||||
Note string `json:"note"`
|
||||
CreateAt int64 `json:"create_at"`
|
||||
CreateBy string `json:"create_by"`
|
||||
UpdateAt int64 `json:"update_at"`
|
||||
UpdateBy string `json:"update_by"`
|
||||
UserIds []int64 `json:"-" gorm:"-"`
|
||||
Users []User `json:"users" gorm:"-"`
|
||||
BusiGroupsRes []*BusiGroupRes `json:"busi_groups" gorm:"-"`
|
||||
}
|
||||
|
||||
func (ug *UserGroup) TableName() string {
|
||||
|
||||
@@ -192,6 +192,15 @@ var I18N = `{
|
||||
"View Alerting Engines": "查看告警引擎列表",
|
||||
"View Product Version": "查看产品版本",
|
||||
|
||||
"Some alert rules still in the BusiGroup": "业务组中仍有告警规则",
|
||||
"Some alert mutes still in the BusiGroup": "业务组中仍有屏蔽规则",
|
||||
"Some alert subscribes still in the BusiGroup": "业务组中仍有订阅规则",
|
||||
"Some Board still in the BusiGroup": "业务组中仍有仪表盘",
|
||||
"Some targets still in the BusiGroup": "业务组中仍有监控对象",
|
||||
"Some recording rules still in the BusiGroup": "业务组中仍有记录规则",
|
||||
"Some recovery scripts still in the BusiGroup": "业务组中仍有自愈脚本",
|
||||
"Some target busigroups still in the BusiGroup": "业务组中仍有监控对象",
|
||||
|
||||
"---------zh_CN--------": "---------zh_CN--------"
|
||||
},
|
||||
"zh_HK": {
|
||||
@@ -387,6 +396,15 @@ var I18N = `{
|
||||
"View Alerting Engines": "查看告警引擎列表",
|
||||
"View Product Version": "查看產品版本",
|
||||
|
||||
"Some alert rules still in the BusiGroup": "業務組中仍有告警規則",
|
||||
"Some alert mutes still in the BusiGroup": "業務組中仍有屏蔽規則",
|
||||
"Some alert subscribes still in the BusiGroup": "業務組中仍有訂閱規則",
|
||||
"Some Board still in the BusiGroup": "業務組中仍有儀表板",
|
||||
"Some targets still in the BusiGroup": "業務組中仍有監控對象",
|
||||
"Some recording rules still in the BusiGroup": "業務組中仍有記錄規則",
|
||||
"Some recovery scripts still in the BusiGroup": "業務組中仍有自愈腳本",
|
||||
"Some target busigroups still in the BusiGroup": "業務組中仍有監控對象",
|
||||
|
||||
"---------zh_HK--------": "---------zh_HK--------"
|
||||
},
|
||||
"ja_JP": {
|
||||
@@ -579,6 +597,15 @@ var I18N = `{
|
||||
"View Alerting Engines": "アラートエンジンの表示",
|
||||
"View Product Version": "製品のバージョンを見る",
|
||||
|
||||
"Some alert rules still in the BusiGroup": "ビジネスグループにまだアラートルールがあります",
|
||||
"Some alert mutes still in the BusiGroup": "ビジネスグループにまだミュートルールがあります",
|
||||
"Some alert subscribes still in the BusiGroup": "ビジネスグループにまだサブスクライブルールがあります",
|
||||
"Some Board still in the BusiGroup": "ビジネスグループにまだダッシュボードがあります",
|
||||
"Some targets still in the BusiGroup": "ビジネスグループにまだ監視対象があります",
|
||||
"Some recording rules still in the BusiGroup": "ビジネスグループにまだ記録ルールがあります",
|
||||
"Some recovery scripts still in the BusiGroup": "ビジネスグループにまだ自己回復スクリプトがあります",
|
||||
"Some target busigroups still in the BusiGroup": "ビジネスグループにまだ監視対象があります",
|
||||
|
||||
"---------ja_JP--------": "---------ja_JP--------"
|
||||
},
|
||||
"ru_RU": {
|
||||
@@ -771,6 +798,15 @@ var I18N = `{
|
||||
"View Alerting Engines": "Просмотр списка алертинг-инженеров",
|
||||
"View Product Version": "Просмотр версии продукта",
|
||||
|
||||
"Some alert rules still in the BusiGroup": "В бизнес-группе еще есть правила оповещений",
|
||||
"Some alert mutes still in the BusiGroup": "В бизнес-группе еще есть правила отключения оповещений",
|
||||
"Some alert subscribes still in the BusiGroup": "В бизнес-группе еще есть правила подписки",
|
||||
"Some Board still in the BusiGroup": "В бизнес-группе еще есть панели мониторинга",
|
||||
"Some targets still in the BusiGroup": "В бизнес-группе еще есть объекты мониторинга",
|
||||
"Some recording rules still in the BusiGroup": "В бизнес-группе еще есть правила записи",
|
||||
"Some recovery scripts still in the BusiGroup": "В бизнес-группе еще есть скрипты самоисцеления",
|
||||
"Some target busigroups still in the BusiGroup": "В бизнес-группе еще есть объекты мониторинга",
|
||||
|
||||
"---------ru_RU--------": "---------ru_RU--------"
|
||||
}
|
||||
}`
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"crypto/tls"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@@ -140,14 +141,22 @@ func (s *SsoClient) GetDisplayName() string {
|
||||
return s.DisplayName
|
||||
}
|
||||
|
||||
func (s *SsoClient) GetSsoLogoutAddr() string {
|
||||
func (s *SsoClient) GetSsoLogoutAddr(idToken string) string {
|
||||
s.RLock()
|
||||
defer s.RUnlock()
|
||||
if !s.Enable {
|
||||
return ""
|
||||
}
|
||||
|
||||
return s.SsoLogoutAddr
|
||||
return s.replaceIdTokenTemplate(s.SsoLogoutAddr, idToken)
|
||||
}
|
||||
|
||||
// replaceIdTokenTemplate 替换登出 URL 中的 {{$__id_token__}} 模板变量
|
||||
func (s *SsoClient) replaceIdTokenTemplate(logoutAddr, idToken string) string {
|
||||
if idToken == "" {
|
||||
return logoutAddr
|
||||
}
|
||||
return strings.ReplaceAll(logoutAddr, "{{$__id_token__}}", idToken)
|
||||
}
|
||||
|
||||
func wrapStateKey(key string) string {
|
||||
@@ -201,6 +210,7 @@ type CallbackOutput struct {
|
||||
Redirect string `json:"redirect"`
|
||||
Msg string `json:"msg"`
|
||||
AccessToken string `json:"accessToken"`
|
||||
IdToken string `json:"idToken"`
|
||||
Username string `json:"username"`
|
||||
Nickname string `json:"nickname"`
|
||||
Phone string `yaml:"phone"`
|
||||
@@ -245,6 +255,7 @@ func (s *SsoClient) exchangeUser(code string) (*CallbackOutput, error) {
|
||||
|
||||
output := &CallbackOutput{
|
||||
AccessToken: oauth2Token.AccessToken,
|
||||
IdToken: rawIDToken,
|
||||
Username: extractClaim(data, s.Attributes.Username),
|
||||
Nickname: extractClaim(data, s.Attributes.Nickname),
|
||||
Phone: extractClaim(data, s.Attributes.Phone),
|
||||
|
||||
@@ -52,6 +52,10 @@ func (po *PromOption) Equal(target PromOption) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
if po.InsecureSkipVerify != target.InsecureSkipVerify {
|
||||
return false
|
||||
}
|
||||
|
||||
if len(po.Headers) != len(target.Headers) {
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -181,7 +181,30 @@ func (w WriterType) Post(req []byte, headers ...map[string]string) error {
|
||||
}
|
||||
|
||||
if resp.StatusCode >= 400 && resp.StatusCode < 500 {
|
||||
logger.Warningf("push data with remote write:%s request got status code: %v, response body: %s", url, resp.StatusCode, string(body))
|
||||
// 解码并解析 req 以便打印指标信息
|
||||
decoded, decodeErr := snappy.Decode(nil, req)
|
||||
metricsInfo := "failed to decode request"
|
||||
if decodeErr == nil {
|
||||
var writeReq prompb.WriteRequest
|
||||
if unmarshalErr := proto.Unmarshal(decoded, &writeReq); unmarshalErr == nil {
|
||||
metricsInfo = fmt.Sprintf("timeseries count: %d", len(writeReq.Timeseries))
|
||||
logger.Warningf("push data with remote write:%s request got status code: %v, response body: %s, %s", url, resp.StatusCode, string(body), metricsInfo)
|
||||
// 只打印前几条样本,避免日志泛滥
|
||||
sampleCount := 5
|
||||
if sampleCount > len(writeReq.Timeseries) {
|
||||
sampleCount = len(writeReq.Timeseries)
|
||||
}
|
||||
for i := 0; i < sampleCount; i++ {
|
||||
logger.Warningf("push data with remote write:%s timeseries: [%d] %s", url, i, writeReq.Timeseries[i].String())
|
||||
}
|
||||
} else {
|
||||
metricsInfo = fmt.Sprintf("failed to unmarshal: %v", unmarshalErr)
|
||||
logger.Warningf("push data with remote write:%s request got status code: %v, response body: %s, metrics: %s", url, resp.StatusCode, string(body), metricsInfo)
|
||||
}
|
||||
} else {
|
||||
metricsInfo = fmt.Sprintf("failed to decode: %v", decodeErr)
|
||||
logger.Warningf("push data with remote write:%s request got status code: %v, response body: %s, metrics: %s", url, resp.StatusCode, string(body), metricsInfo)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user