Compare commits

..

34 Commits

Author SHA1 Message Date
ning
fa49449588 fix: user phone encrypt 2025-11-11 16:47:49 +08:00
ning
876f1d1084 fix prom datasource update 2025-11-10 10:51:44 +08:00
ning
678830be37 refactor: add /builtin-component service api 2025-11-10 10:29:14 +08:00
ning
5e30f3a00d Merge branch 'main' of github.com:ccfos/nightingale 2025-11-06 01:39:58 +08:00
ning
7f1eefd033 fix: dsn 2025-11-05 15:08:08 +08:00
Wenyu Su
c8dd26ca4c fix(notify): http callback retry interval in millisecond (#2928) 2025-11-04 18:11:30 +08:00
ning
37c57e66ea refactor: optimize db dsn 2025-11-04 15:09:03 +08:00
ning
878e940325 docs: update migrate.sql 2025-11-03 19:16:00 +08:00
Ulric Qin
cbc715305d Merge branch 'main' of https://github.com/ccfos/nightingale 2025-10-31 18:35:39 +08:00
Ulric Qin
5011766c70 delete single config dir 2025-10-31 18:35:32 +08:00
Snowykami
b3ed8a1e8c fix: elasticsearch v9 range query compatibility (#2692) (#2922) 2025-10-31 15:05:45 +08:00
Ulric Qin
814ded90b6 remove no use configuration: EnableAutoMigrate and add etc/single/config.toml 2025-10-31 11:38:17 +08:00
ning
43e89040eb refactor: update oidc logout 2025-10-31 10:26:41 +08:00
710leo
3d339fe03c update issue-translator 2025-10-30 22:50:53 +08:00
710leo
7618858912 update issue-translator 2025-10-30 22:24:49 +08:00
710leo
15b4ef8611 update issue-translator 2025-10-30 22:19:20 +08:00
710leo
5083a5cc96 add issue-translator 2025-10-30 22:10:09 +08:00
Yening Qin
d51e83d7d4 feat: alert rule add event process (#2921) 2025-10-28 16:18:12 +08:00
ning
601d4f0c95 update built in 2025-10-28 15:42:37 +08:00
Ulric Qin
90fac12953 update readme 2025-10-26 22:25:16 +08:00
ning
19d76824d9 update user group api 2025-10-25 16:45:31 +08:00
ning
1341554bbc refactor: optimize push data log 2025-10-19 12:12:06 +08:00
Ulric Qin
fd3ce338cb Merge branch 'main' of https://github.com/ccfos/nightingale 2025-10-15 17:21:58 +08:00
Ulric Qin
b8f36ce3cb add more prometheus rule severity choices 2025-10-15 17:21:52 +08:00
ning
037112a9e6 refactor: update alert mute api 2025-10-15 15:19:01 +08:00
zjxpsetp
c6e75d31a1 update jmx dashboard to support multi gc mode 2025-10-15 12:41:01 +08:00
zjxpsetp
bd24f5b056 Merge remote-tracking branch 'origin/main'
# Conflicts:
#	integrations/Java/dashboards/jmx_by_kubernetes.json
2025-10-15 12:36:14 +08:00
zjxpsetp
89551c8edb update jmx dashboard to support multi gc mode 2025-10-15 12:29:11 +08:00
ning
042b44940d docs: update i18n 2025-10-14 17:38:25 +08:00
ning
8cd8674848 refactor: optimize alert mute match 2025-10-14 16:59:57 +08:00
ning
7bb6ac8a03 refactor: update alert mute sync 2025-10-14 16:45:15 +08:00
ning
76b35276af refactor: update event api 2025-10-13 20:24:52 +08:00
SaladDay
439a21b784 feat: add expired filter for alert mute rules (#2907) 2025-10-13 14:04:32 +08:00
Yening Qin
47e70a2dba fix: sql order (#2908) 2025-10-13 12:18:16 +08:00
48 changed files with 1857 additions and 1526 deletions

22
.github/workflows/issue-translator.yml vendored Normal file
View File

@@ -0,0 +1,22 @@
name: 'Issue Translator'
on:
issues:
types: [opened]
jobs:
translate:
runs-on: ubuntu-latest
permissions:
issues: write
contents: read
steps:
- name: Translate Issues
uses: usthe/issues-translate-action@v2.7
with:
# 是否翻译 issue 标题
IS_MODIFY_TITLE: true
# GitHub Token
BOT_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# 自定义翻译标注(可选)
# CUSTOM_BOT_NOTE: "Translation by bot"

View File

@@ -47,7 +47,7 @@ Nightingale itself does not provide monitoring data collection capabilities. We
For certain edge data centers with poor network connectivity to the central Nightingale server, we offer a distributed deployment mode for the alerting engine. In this mode, even if the network is disconnected, the alerting functionality remains unaffected.
![Edge Deployment Mode](doc/img/readme/20240222102119.png)
![Edge Deployment Mode](doc/img/readme/multi-region-arch.png)
> In the above diagram, Data Center A has a good network with the central data center, so it uses the Nightingale process in the central data center as the alerting engine. Data Center B has a poor network with the central data center, so it deploys `n9e-edge` as the alerting engine to handle alerting for its own data sources.
@@ -68,7 +68,7 @@ Then Nightingale is not suitable. It is recommended that you choose on-call prod
## 🔑 Key Features
![Nightingale Alerting rules](doc/img/readme/2025-05-23_18-43-37.png)
![Nightingale Alerting rules](doc/img/readme/alerting-rules-en.png)
- Nightingale supports alerting rules, mute rules, subscription rules, and notification rules. It natively supports 20 types of notification media and allows customization of message templates.
- It supports event pipelines for Pipeline processing of alarms, facilitating automated integration with in-house systems. For example, it can append metadata to alarms or perform relabeling on events.
@@ -76,19 +76,19 @@ Then Nightingale is not suitable. It is recommended that you choose on-call prod
- Many databases and middleware come with built-in alert rules that can be directly imported and used. It also supports direct import of Prometheus alerting rules.
- It supports alerting self-healing, which automatically triggers a script to execute predefined logic after an alarm is generated—such as cleaning up disk space or capturing the current system state.
![Nightingale Alarm Dashboard](doc/img/readme/2025-05-30_08-49-28.png)
![Nightingale Alarm Dashboard](doc/img/readme/active-events-en.png)
- Nightingale archives historical alarms and supports multi-dimensional query and statistics.
- It supports flexible aggregation grouping, allowing a clear view of the distribution of alarms across the company.
![Nightingale Integration Center](doc/img/readme/2025-05-23_18-46-06.png)
![Nightingale Integration Center](doc/img/readme/integration-components-en.png)
- Nightingale has built-in metric descriptions, dashboards, and alerting rules for common operating systems, middleware, and databases, which are contributed by the community with varying quality.
- It directly receives data via multiple protocols such as Remote Write, OpenTSDB, Datadog, and Falcon, integrates with various Agents.
- It supports data sources like Prometheus, ElasticSearch, Loki, ClickHouse, MySQL, Postgres, allowing alerting based on data from these sources.
- Nightingale can be easily embedded into internal enterprise systems (e.g. Grafana, CMDB), and even supports configuring menu visibility for these embedded systems.
![Nightingale dashboards](doc/img/readme/2025-05-23_18-49-02.png)
![Nightingale dashboards](doc/img/readme/dashboard-en.png)
- Nightingale supports dashboard functionality, including common chart types, and comes with pre-built dashboards. The image above is a screenshot of one of these dashboards.
- If you are already accustomed to Grafana, it is recommended to continue using Grafana for visualization, as Grafana has deeper expertise in this area.

View File

@@ -118,7 +118,7 @@ func Start(alertc aconf.Alert, pushgwc pconf.Pushgw, syncStats *memsto.Stats, al
eventProcessorCache := memsto.NewEventProcessorCache(ctx, syncStats)
dp := dispatch.NewDispatch(alertRuleCache, userCache, userGroupCache, alertSubscribeCache, targetCache, notifyConfigCache, taskTplsCache, notifyRuleCache, notifyChannelCache, messageTemplateCache, eventProcessorCache, alertc.Alerting, ctx, alertStats)
consumer := dispatch.NewConsumer(alertc.Alerting, ctx, dp, promClients)
consumer := dispatch.NewConsumer(alertc.Alerting, ctx, dp, promClients, alertMuteCache)
notifyRecordComsumer := sender.NewNotifyRecordConsumer(ctx)

View File

@@ -10,6 +10,7 @@ import (
"github.com/ccfos/nightingale/v6/alert/aconf"
"github.com/ccfos/nightingale/v6/alert/common"
"github.com/ccfos/nightingale/v6/alert/queue"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/poster"
@@ -26,10 +27,15 @@ type Consumer struct {
alerting aconf.Alerting
ctx *ctx.Context
dispatch *Dispatch
promClients *prom.PromClientMap
dispatch *Dispatch
promClients *prom.PromClientMap
alertMuteCache *memsto.AlertMuteCacheType
}
type EventMuteHookFunc func(event *models.AlertCurEvent) bool
var EventMuteHook EventMuteHookFunc = func(event *models.AlertCurEvent) bool { return false }
func InitRegisterQueryFunc(promClients *prom.PromClientMap) {
tplx.RegisterQueryFunc(func(datasourceID int64, promql string) model.Value {
if promClients.IsNil(datasourceID) {
@@ -43,12 +49,14 @@ func InitRegisterQueryFunc(promClients *prom.PromClientMap) {
}
// 创建一个 Consumer 实例
func NewConsumer(alerting aconf.Alerting, ctx *ctx.Context, dispatch *Dispatch, promClients *prom.PromClientMap) *Consumer {
func NewConsumer(alerting aconf.Alerting, ctx *ctx.Context, dispatch *Dispatch, promClients *prom.PromClientMap, alertMuteCache *memsto.AlertMuteCacheType) *Consumer {
return &Consumer{
alerting: alerting,
ctx: ctx,
dispatch: dispatch,
promClients: promClients,
alertMuteCache: alertMuteCache,
}
}

View File

@@ -28,6 +28,8 @@ var ShouldSkipNotify func(*ctx.Context, *models.AlertCurEvent, int64) bool
var SendByNotifyRule func(*ctx.Context, *memsto.UserCacheType, *memsto.UserGroupCacheType, *memsto.NotifyChannelCacheType,
[]*models.AlertCurEvent, int64, *models.NotifyConfig, *models.NotifyChannelConfig, *models.MessageTemplate)
var EventProcessorCache *memsto.EventProcessorCacheType
func init() {
ShouldSkipNotify = shouldSkipNotify
SendByNotifyRule = SendNotifyRuleMessage
@@ -90,6 +92,7 @@ func NewDispatch(alertRuleCache *memsto.AlertRuleCacheType, userCache *memsto.Us
}
pipeline.Init()
EventProcessorCache = eventProcessorCache
// 设置通知记录回调函数
notifyChannelCache.SetNotifyRecordFunc(sender.NotifyRecord)
@@ -177,39 +180,7 @@ func (e *Dispatch) HandleEventWithNotifyRule(eventOrigin *models.AlertCurEvent)
eventCopy.NotifyRuleId = notifyRuleId
eventCopy.NotifyRuleName = notifyRule.Name
var processors []models.Processor
for _, pipelineConfig := range notifyRule.PipelineConfigs {
if !pipelineConfig.Enable {
continue
}
eventPipeline := e.eventProcessorCache.Get(pipelineConfig.PipelineId)
if eventPipeline == nil {
logger.Warningf("notify_id: %d, event:%+v, processor not found", notifyRuleId, eventCopy)
continue
}
if !pipelineApplicable(eventPipeline, eventCopy) {
logger.Debugf("notify_id: %d, event:%+v, pipeline_id: %d, not applicable", notifyRuleId, eventCopy, pipelineConfig.PipelineId)
continue
}
processors = append(processors, e.eventProcessorCache.GetProcessorsById(pipelineConfig.PipelineId)...)
}
for _, processor := range processors {
var res string
var err error
logger.Infof("before processor notify_id: %d, event:%+v, processor:%+v", notifyRuleId, eventCopy, processor)
eventCopy, res, err = processor.Process(e.ctx, eventCopy)
if eventCopy == nil {
logger.Warningf("after processor notify_id: %d, event:%+v, processor:%+v, event is nil", notifyRuleId, eventCopy, processor)
sender.NotifyRecord(e.ctx, []*models.AlertCurEvent{eventOrigin}, notifyRuleId, "", "", res, errors.New("drop by processor"))
break
}
logger.Infof("after processor notify_id: %d, event:%+v, processor:%+v, res:%v, err:%v", notifyRuleId, eventCopy, processor, res, err)
}
eventCopy = HandleEventPipeline(notifyRule.PipelineConfigs, eventOrigin, eventCopy, e.eventProcessorCache, e.ctx, notifyRuleId, "notify_rule")
if ShouldSkipNotify(e.ctx, eventCopy, notifyRuleId) {
logger.Infof("notify_id: %d, event:%+v, should skip notify", notifyRuleId, eventCopy)
continue
@@ -257,7 +228,48 @@ func shouldSkipNotify(ctx *ctx.Context, event *models.AlertCurEvent, notifyRuleI
return false
}
func pipelineApplicable(pipeline *models.EventPipeline, event *models.AlertCurEvent) bool {
func HandleEventPipeline(pipelineConfigs []models.PipelineConfig, eventOrigin, event *models.AlertCurEvent, eventProcessorCache *memsto.EventProcessorCacheType, ctx *ctx.Context, id int64, from string) *models.AlertCurEvent {
for _, pipelineConfig := range pipelineConfigs {
if !pipelineConfig.Enable {
continue
}
eventPipeline := eventProcessorCache.Get(pipelineConfig.PipelineId)
if eventPipeline == nil {
logger.Warningf("processor_by_%s_id:%d pipeline_id:%d, event pipeline not found, event: %+v", from, id, pipelineConfig.PipelineId, event)
continue
}
if !PipelineApplicable(eventPipeline, event) {
logger.Debugf("processor_by_%s_id:%d pipeline_id:%d, event pipeline not applicable, event: %+v", from, id, pipelineConfig.PipelineId, event)
continue
}
processors := eventProcessorCache.GetProcessorsById(pipelineConfig.PipelineId)
for _, processor := range processors {
var res string
var err error
logger.Infof("processor_by_%s_id:%d pipeline_id:%d, before processor:%+v, event: %+v", from, id, pipelineConfig.PipelineId, processor, event)
event, res, err = processor.Process(ctx, event)
if event == nil {
logger.Infof("processor_by_%s_id:%d pipeline_id:%d, event dropped, after processor:%+v, event: %+v", from, id, pipelineConfig.PipelineId, processor, eventOrigin)
if from == "notify_rule" {
// alert_rule 获取不到 eventId 记录没有意义
sender.NotifyRecord(ctx, []*models.AlertCurEvent{eventOrigin}, id, "", "", res, fmt.Errorf("processor_by_%s_id:%d pipeline_id:%d, drop by processor", from, id, pipelineConfig.PipelineId))
}
return nil
}
logger.Infof("processor_by_%s_id:%d pipeline_id:%d, after processor:%+v, event: %+v, res:%v, err:%v", from, id, pipelineConfig.PipelineId, processor, event, res, err)
}
}
event.FE2DB()
event.FillTagsMap()
return event
}
func PipelineApplicable(pipeline *models.EventPipeline, event *models.AlertCurEvent) bool {
if pipeline == nil {
return true
}
@@ -496,7 +508,7 @@ func SendNotifyRuleMessage(ctx *ctx.Context, userCache *memsto.UserCacheType, us
start := time.Now()
respBody, err := notifyChannel.SendFlashDuty(events, flashDutyChannelIDs[i], notifyChannelCache.GetHttpClient(notifyChannel.ID))
respBody = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), respBody)
logger.Infof("notify_id: %d, channel_name: %v, event:%+v, IntegrationUrl: %v dutychannel_id: %v, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0], notifyChannel.RequestConfig.FlashDutyRequestConfig.IntegrationUrl, flashDutyChannelIDs[i], respBody, err)
logger.Infof("duty_sender notify_id: %d, channel_name: %v, event:%+v, IntegrationUrl: %v dutychannel_id: %v, respBody: %v, err: %v", notifyRuleId, notifyChannel.Name, events[0], notifyChannel.RequestConfig.FlashDutyRequestConfig.IntegrationUrl, flashDutyChannelIDs[i], respBody, err)
sender.NotifyRecord(ctx, events, notifyRuleId, notifyChannel.Name, strconv.FormatInt(flashDutyChannelIDs[i], 10), respBody, err)
}
@@ -527,7 +539,7 @@ func SendNotifyRuleMessage(ctx *ctx.Context, userCache *memsto.UserCacheType, us
start := time.Now()
target, res, err := notifyChannel.SendScript(events, tplContent, customParams, sendtos)
res = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), res)
logger.Infof("notify_id: %d, channel_name: %v, event:%+v, tplContent:%s, customParams:%v, target:%s, res:%s, err:%v", notifyRuleId, notifyChannel.Name, events[0], tplContent, customParams, target, res, err)
logger.Infof("script_sender notify_id: %d, channel_name: %v, event:%+v, tplContent:%s, customParams:%v, target:%s, res:%s, err:%v", notifyRuleId, notifyChannel.Name, events[0], tplContent, customParams, target, res, err)
sender.NotifyRecord(ctx, events, notifyRuleId, notifyChannel.Name, target, res, err)
default:
logger.Warningf("notify_id: %d, channel_name: %v, event:%+v send type not found", notifyRuleId, notifyChannel.Name, events[0])

View File

@@ -286,7 +286,7 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) ([]models.Ano
continue
}
if query.VarEnabled {
if query.VarEnabled && strings.Contains(query.PromQl, "$") {
var anomalyPoints []models.AnomalyPoint
if hasLabelLossAggregator(query) || notExactMatch(query) {
// 若有聚合函数或非精确匹配则需要先填充变量然后查询,这个方式效率较低

View File

@@ -1,6 +1,7 @@
package mute
import (
"slices"
"strconv"
"strings"
"time"
@@ -153,13 +154,7 @@ func MatchMute(event *models.AlertCurEvent, mute *models.AlertMute, clock ...int
// 如果不是全局的,判断 匹配的 datasource id
if len(mute.DatasourceIdsJson) != 0 && mute.DatasourceIdsJson[0] != 0 && event.DatasourceId != 0 {
idm := make(map[int64]struct{}, len(mute.DatasourceIdsJson))
for i := 0; i < len(mute.DatasourceIdsJson); i++ {
idm[mute.DatasourceIdsJson[i]] = struct{}{}
}
// 判断 event.datasourceId 是否包含在 idm 中
if _, has := idm[event.DatasourceId]; !has {
if !slices.Contains(mute.DatasourceIdsJson, event.DatasourceId) {
return false, errors.New("datasource id not match")
}
}
@@ -198,7 +193,7 @@ func MatchMute(event *models.AlertCurEvent, mute *models.AlertMute, clock ...int
return false, errors.New("event severity not match mute severity")
}
if mute.ITags == nil || len(mute.ITags) == 0 {
if len(mute.ITags) == 0 {
return true, nil
}
if !common.MatchTags(event.TagsMap, mute.ITags) {

View File

@@ -26,8 +26,6 @@ import (
"github.com/toolkits/pkg/str"
)
type EventMuteHookFunc func(event *models.AlertCurEvent) bool
type ExternalProcessorsType struct {
ExternalLock sync.RWMutex
Processors map[string]*Processor
@@ -76,7 +74,6 @@ type Processor struct {
HandleFireEventHook HandleEventFunc
HandleRecoverEventHook HandleEventFunc
EventMuteHook EventMuteHookFunc
ScheduleEntry cron.Entry
PromEvalInterval int
@@ -121,7 +118,6 @@ func NewProcessor(engineName string, rule *models.AlertRule, datasourceId int64,
HandleFireEventHook: func(event *models.AlertCurEvent) {},
HandleRecoverEventHook: func(event *models.AlertCurEvent) {},
EventMuteHook: func(event *models.AlertCurEvent) bool { return false },
}
p.mayHandleGroup()
@@ -155,9 +151,19 @@ func (p *Processor) Handle(anomalyPoints []models.AnomalyPoint, from string, inh
// 如果 event 被 mute 了,本质也是 fire 的状态,这里无论如何都添加到 alertingKeys 中,防止 fire 的事件自动恢复了
hash := event.Hash
alertingKeys[hash] = struct{}{}
// event processor
eventCopy := event.DeepCopy()
event = dispatch.HandleEventPipeline(cachedRule.PipelineConfigs, eventCopy, event, dispatch.EventProcessorCache, p.ctx, cachedRule.Id, "alert_rule")
if event == nil {
logger.Infof("rule_eval:%s is muted drop by pipeline event:%v", p.Key(), eventCopy)
continue
}
// event mute
isMuted, detail, muteId := mute.IsMuted(cachedRule, event, p.TargetCache, p.alertMuteCache)
if isMuted {
logger.Debugf("rule_eval:%s event:%v is muted, detail:%s", p.Key(), event, detail)
logger.Infof("rule_eval:%s is muted, detail:%s event:%v", p.Key(), detail, event)
p.Stats.CounterMuteTotal.WithLabelValues(
fmt.Sprintf("%v", event.GroupName),
fmt.Sprintf("%v", p.rule.Id),
@@ -167,8 +173,8 @@ func (p *Processor) Handle(anomalyPoints []models.AnomalyPoint, from string, inh
continue
}
if p.EventMuteHook(event) {
logger.Debugf("rule_eval:%s event:%v is muted by hook", p.Key(), event)
if dispatch.EventMuteHook(event) {
logger.Infof("rule_eval:%s is muted by hook event:%v", p.Key(), event)
p.Stats.CounterMuteTotal.WithLabelValues(
fmt.Sprintf("%v", event.GroupName),
fmt.Sprintf("%v", p.rule.Id),

View File

@@ -25,6 +25,7 @@ func (rt *Router) pushEventToQueue(c *gin.Context) {
if event.RuleId == 0 {
ginx.Bomb(200, "event is illegal")
}
event.FE2DB()
event.TagsMap = make(map[string]string)
for i := 0; i < len(event.TagsJSON); i++ {
@@ -40,7 +41,7 @@ func (rt *Router) pushEventToQueue(c *gin.Context) {
event.TagsMap[arr[0]] = arr[1]
}
hit, _ := mute.EventMuteStrategy(event, rt.AlertMuteCache)
hit, _ := mute.EventMuteStrategy(event, rt.AlertMuteCache)
if hit {
logger.Infof("event_muted: rule_id=%d %s", event.RuleId, event.Hash)
ginx.NewRender(c).Message(nil)

View File

@@ -331,6 +331,30 @@ func (b *BuiltinPayloadInFileType) AddBuiltinPayload(bp *models.BuiltinPayload)
b.IndexData[bp.UUID] = bp
}
func (b *BuiltinPayloadInFileType) GetComponentIdentByCate(typ, cate string) string {
for _, source := range b.Data {
if source == nil {
continue
}
typeMap, exists := source[typ]
if !exists {
continue
}
payloads, exists := typeMap[cate]
if !exists {
continue
}
if len(payloads) > 0 {
return payloads[0].Component
}
}
return ""
}
func (b *BuiltinPayloadInFileType) GetBuiltinPayload(typ, cate, query string, componentId uint64) ([]*models.BuiltinPayload, error) {
var result []*models.BuiltinPayload

View File

@@ -628,6 +628,7 @@ func (rt *Router) Config(r *gin.Engine) {
service.GET("/recording-rules", rt.recordingRuleGetsByService)
service.GET("/alert-mutes", rt.alertMuteGets)
service.GET("/active-alert-mutes", rt.activeAlertMuteGets)
service.POST("/alert-mutes", rt.alertMuteAddByService)
service.DELETE("/alert-mutes", rt.alertMuteDel)
@@ -680,6 +681,10 @@ func (rt *Router) Config(r *gin.Engine) {
// 手机号加密存储配置接口
service.POST("/users/phone/encrypt", rt.usersPhoneEncrypt)
service.POST("/users/phone/decrypt", rt.usersPhoneDecrypt)
service.POST("/users/phone/refresh-encryption-config", rt.usersPhoneDecryptRefresh)
service.GET("/builtin-components", rt.builtinComponentsGets)
service.GET("/builtin-payloads", rt.builtinPayloadsGets)
}
}

View File

@@ -107,9 +107,20 @@ func (rt *Router) logoutPost(c *gin.Context) {
var logoutAddr string
user := c.MustGet("user").(*models.User)
// 获取用户的 id_token
idToken, err := rt.fetchIdToken(c.Request.Context(), user.Id)
if err != nil {
logger.Debugf("fetch id_token failed: %v, user_id: %d", err, user.Id)
idToken = "" // 如果获取失败,使用空字符串
}
// 删除 id_token
rt.deleteIdToken(c.Request.Context(), user.Id)
switch user.Belong {
case "oidc":
logoutAddr = rt.Sso.OIDC.GetSsoLogoutAddr()
logoutAddr = rt.Sso.OIDC.GetSsoLogoutAddr(idToken)
case "cas":
logoutAddr = rt.Sso.CAS.GetSsoLogoutAddr()
case "oauth2":
@@ -199,6 +210,14 @@ func (rt *Router) refreshPost(c *gin.Context) {
ginx.Dangerous(err)
ginx.Dangerous(rt.createAuth(c.Request.Context(), userIdentity, ts))
// 延长 id_token 的过期时间,使其与新的 refresh token 生命周期保持一致
// 注意:这里不会获取新的 id_token只是延长 Redis 中现有 id_token 的 TTL
if idToken, err := rt.fetchIdToken(c.Request.Context(), userid); err == nil && idToken != "" {
if err := rt.saveIdToken(c.Request.Context(), userid, idToken); err != nil {
logger.Debugf("refresh id_token ttl failed: %v, user_id: %d", err, userid)
}
}
ginx.NewRender(c).Data(gin.H{
"access_token": ts.AccessToken,
"refresh_token": ts.RefreshToken,
@@ -286,6 +305,13 @@ func (rt *Router) loginCallback(c *gin.Context) {
ginx.Dangerous(err)
ginx.Dangerous(rt.createAuth(c.Request.Context(), userIdentity, ts))
// 保存 id_token 到 Redis用于登出时使用
if ret.IdToken != "" {
if err := rt.saveIdToken(c.Request.Context(), user.Id, ret.IdToken); err != nil {
logger.Errorf("save id_token failed: %v, user_id: %d", err, user.Id)
}
}
redirect := "/"
if ret.Redirect != "/login" {
redirect = ret.Redirect

View File

@@ -20,7 +20,8 @@ func (rt *Router) alertMuteGetsByBG(c *gin.Context) {
bgid := ginx.UrlParamInt64(c, "id")
prods := strings.Fields(ginx.QueryStr(c, "prods", ""))
query := ginx.QueryStr(c, "query", "")
lst, err := models.AlertMuteGets(rt.Ctx, prods, bgid, -1, query)
expired := ginx.QueryInt(c, "expired", -1)
lst, err := models.AlertMuteGets(rt.Ctx, prods, bgid, -1, expired, query)
ginx.NewRender(c).Data(lst, err)
}
@@ -55,11 +56,17 @@ func (rt *Router) alertMuteGets(c *gin.Context) {
bgid := ginx.QueryInt64(c, "bgid", -1)
query := ginx.QueryStr(c, "query", "")
disabled := ginx.QueryInt(c, "disabled", -1)
lst, err := models.AlertMuteGets(rt.Ctx, prods, bgid, disabled, query)
expired := ginx.QueryInt(c, "expired", -1)
lst, err := models.AlertMuteGets(rt.Ctx, prods, bgid, disabled, expired, query)
ginx.NewRender(c).Data(lst, err)
}
func (rt *Router) activeAlertMuteGets(c *gin.Context) {
lst, err := models.AlertMuteGetsAll(rt.Ctx)
ginx.NewRender(c).Data(lst, err)
}
func (rt *Router) alertMuteAdd(c *gin.Context) {
var f models.AlertMute
@@ -69,7 +76,9 @@ func (rt *Router) alertMuteAdd(c *gin.Context) {
f.CreateBy = username
f.UpdateBy = username
f.GroupId = ginx.UrlParamInt64(c, "id")
ginx.NewRender(c).Message(f.Add(rt.Ctx))
ginx.Dangerous(f.Add(rt.Ctx))
ginx.NewRender(c).Data(f.Id, nil)
}
type MuteTestForm struct {

View File

@@ -453,6 +453,30 @@ func (rt *Router) wrapJwtKey(key string) string {
return rt.HTTP.JWTAuth.RedisKeyPrefix + key
}
func (rt *Router) wrapIdTokenKey(userId int64) string {
return fmt.Sprintf("n9e_id_token_%d", userId)
}
// saveIdToken 保存用户的 id_token 到 Redis
func (rt *Router) saveIdToken(ctx context.Context, userId int64, idToken string) error {
if idToken == "" {
return nil
}
// id_token 的过期时间应该与 RefreshToken 保持一致,确保在整个会话期间都可用于登出
expiration := time.Minute * time.Duration(rt.HTTP.JWTAuth.RefreshExpired)
return rt.Redis.Set(ctx, rt.wrapIdTokenKey(userId), idToken, expiration).Err()
}
// fetchIdToken 从 Redis 获取用户的 id_token
func (rt *Router) fetchIdToken(ctx context.Context, userId int64) (string, error) {
return rt.Redis.Get(ctx, rt.wrapIdTokenKey(userId)).Result()
}
// deleteIdToken 从 Redis 删除用户的 id_token
func (rt *Router) deleteIdToken(ctx context.Context, userId int64) error {
return rt.Redis.Del(ctx, rt.wrapIdTokenKey(userId)).Err()
}
type TokenDetails struct {
AccessToken string
RefreshToken string

View File

@@ -235,6 +235,16 @@ func (rt *Router) userDel(c *gin.Context) {
return
}
// 如果要删除的用户是 admin 角色,检查是否是最后一个 admin
if target.IsAdmin() {
adminCount, err := models.CountAdminUsers(rt.Ctx)
ginx.Dangerous(err)
if adminCount <= 1 {
ginx.Bomb(http.StatusBadRequest, "Cannot delete the last admin user")
}
}
ginx.NewRender(c).Message(target.Del(rt.Ctx))
}
@@ -345,6 +355,16 @@ func (rt *Router) usersPhoneEncrypt(c *gin.Context) {
}, nil)
}
func (rt *Router) usersPhoneDecryptRefresh(c *gin.Context) {
err := models.RefreshPhoneEncryptionCache(rt.Ctx)
if err != nil {
ginx.NewRender(c).Message(fmt.Errorf("refresh phone encryption cache failed: %v", err))
return
}
ginx.NewRender(c).Message(nil)
}
// usersPhoneDecrypt 统一手机号解密
func (rt *Router) usersPhoneDecrypt(c *gin.Context) {
// 先关闭手机号加密功能

View File

@@ -27,6 +27,40 @@ func (rt *Router) userGroupGets(c *gin.Context) {
me := c.MustGet("user").(*models.User)
lst, err := me.UserGroups(rt.Ctx, limit, query)
if err != nil {
ginx.Dangerous(err)
}
allBusiGroups, err := models.BusiGroupGetAll(rt.Ctx)
if err != nil {
ginx.Dangerous(err)
}
var allMembers []models.BusiGroupMember
if err = models.DB(rt.Ctx).Model(&models.BusiGroupMember{}).Find(&allMembers).Error; err != nil {
ginx.Dangerous(err)
}
busiGroupMap := make(map[int64]*models.BusiGroup)
for _, bg := range allBusiGroups {
busiGroupMap[bg.Id] = bg
}
userGroupToBusiGroupsMap := make(map[int64][]*models.BusiGroupRes)
for _, member := range allMembers {
if bg, ok := busiGroupMap[member.BusiGroupId]; ok {
userGroupToBusiGroupsMap[member.UserGroupId] = append(userGroupToBusiGroupsMap[member.UserGroupId], &models.BusiGroupRes{
Id: bg.Id,
Name: bg.Name,
})
}
}
for i := 0; i < len(lst); i++ {
if busiGroups, ok := userGroupToBusiGroupsMap[lst[i].Id]; ok {
lst[i].BusiGroupsRes = busiGroups
}
}
ginx.NewRender(c).Data(lst, err)
}

View File

@@ -441,10 +441,32 @@ func QueryData(ctx context.Context, queryParam interface{}, cliTimeout int64, ve
Field(param.DateField).
MinDocCount(1)
if strings.HasPrefix(version, "7") {
versionParts := strings.Split(version, ".")
major := 0
if len(versionParts) > 0 {
if m, err := strconv.Atoi(versionParts[0]); err == nil {
major = m
}
}
minor := 0
if len(versionParts) > 1 {
if m, err := strconv.Atoi(versionParts[1]); err == nil {
minor = m
}
}
if major >= 7 {
// 添加偏移量使第一个分桶bucket的左边界对齐为 start 时间
offset := (start % param.Interval) - param.Interval
tsAggr.FixedInterval(fmt.Sprintf("%ds", param.Interval)).Offset(fmt.Sprintf("%ds", offset))
// 使用 fixed_interval 的条件ES 7.2+ 或者任何 major > 7例如 ES8
if (major > 7) || (major == 7 && minor >= 2) {
// ES 7.2+ 以及 ES8+ 使用 fixed_interval
tsAggr.FixedInterval(fmt.Sprintf("%ds", param.Interval)).Offset(fmt.Sprintf("%ds", offset))
} else {
// 7.0-7.1 使用 interval带 offset
tsAggr.Interval(fmt.Sprintf("%ds", param.Interval)).Offset(fmt.Sprintf("%ds", offset))
}
} else {
// 兼容 7.0 以下的版本
// OpenSearch 也使用这个字段

View File

@@ -106,6 +106,29 @@ func (e *Elasticsearch) InitClient() error {
options = append(options, elastic.SetHealthcheck(false))
e.Client, err = elastic.NewClient(options...)
if err != nil {
return err
}
if e.Client != nil {
for _, addr := range e.Nodes {
if addr == "" {
continue
}
if ver, verr := e.Client.ElasticsearchVersion(addr); verr == nil {
logger.Infof("detected elasticsearch version from %s: %s", addr, ver)
e.Version = ver
e.Addr = addr
break
} else {
logger.Debugf("detect version failed from %s: %v", addr, verr)
}
}
if e.Version == "" {
logger.Warning("failed to detect elasticsearch version from configured nodes, keep configured version")
}
}
return err
}
@@ -183,7 +206,6 @@ func (e *Elasticsearch) MakeTSQuery(ctx context.Context, query interface{}, even
}
func (e *Elasticsearch) QueryData(ctx context.Context, queryParam interface{}) ([]models.DataResp, error) {
search := func(ctx context.Context, indices []string, source interface{}, timeout int, maxShard int) (*elastic.SearchResult, error) {
return e.Client.Search().
Index(indices...).
@@ -193,7 +215,6 @@ func (e *Elasticsearch) QueryData(ctx context.Context, queryParam interface{}) (
MaxConcurrentShardRequests(maxShard).
Do(ctx)
}
return eslike.QueryData(ctx, queryParam, e.Timeout, e.Version, search)
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 481 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 508 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 386 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 424 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 78 KiB

View File

@@ -89,8 +89,6 @@ MaxLifetime = 7200
MaxOpenConns = 150
# max idle connections
MaxIdleConns = 50
# enable auto migrate or not
# EnableAutoMigrate = false
[Redis]
# address, ip:port or ip1:port,ip2:port for cluster and sentinel(SentinelAddrs)

View File

@@ -86,8 +86,6 @@ MaxLifetime = 7200
MaxOpenConns = 150
# max idle connections
MaxIdleConns = 50
# enable auto migrate or not
# EnableAutoMigrate = false
[Redis]
# address, ip:port or ip1:port,ip2:port for cluster and sentinel(SentinelAddrs)

View File

@@ -86,8 +86,6 @@ MaxLifetime = 7200
MaxOpenConns = 150
# max idle connections
MaxIdleConns = 50
# enable auto migrate or not
# EnableAutoMigrate = false
[Redis]
# address, ip:port or ip1:port,ip2:port for cluster and sentinel(SentinelAddrs)

View File

@@ -90,8 +90,6 @@ MaxLifetime = 7200
MaxOpenConns = 150
# max idle connections
MaxIdleConns = 50
# enable auto migrate or not
# EnableAutoMigrate = false
[Redis]
# address, ip:port or ip1:port,ip2:port for cluster and sentinel(SentinelAddrs)

View File

@@ -272,8 +272,6 @@ CREATE TABLE `source_token` (
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
/* Add translation column for builtin metrics */
ALTER TABLE `builtin_metrics` ADD COLUMN `translation` TEXT COMMENT 'translation of metric' AFTER `lang`;
/* v8.0.0-beta.12 2025-06-03 */
ALTER TABLE `alert_his_event` ADD COLUMN `notify_rule_ids` text COMMENT 'notify rule ids';
@@ -283,3 +281,11 @@ ALTER TABLE `alert_cur_event` ADD COLUMN `notify_rule_ids` text COMMENT 'notify
-- 删除 builtin_metrics 表的 idx_collector_typ_name 唯一索引
DROP INDEX IF EXISTS `idx_collector_typ_name` ON `builtin_metrics`;
/* v8.0.0 2025-07-03 */
ALTER TABLE `builtin_metrics` ADD COLUMN `translation` TEXT COMMENT 'translation of metric' AFTER `lang`;
/* v8.4.0 2025-10-15 */
ALTER TABLE `notify_rule` ADD COLUMN `extra_config` text COMMENT 'extra config';
/* v8.4.1 2025-11-10 */
ALTER TABLE `alert_rule` ADD COLUMN `pipeline_configs` text COMMENT 'pipeline configs';

View File

@@ -7,6 +7,7 @@ import (
"encoding/json"
"errors"
"fmt"
"net/url"
"strings"
"time"
@@ -117,7 +118,8 @@ func (p *PostgreSQL) NewConn(ctx context.Context, database string) (*gorm.DB, er
}()
// Simplified connection logic for PostgreSQL
dsn := fmt.Sprintf("postgres://%s:%s@%s/%s?sslmode=disable&TimeZone=Asia/Shanghai", p.Shard.User, p.Shard.Password, p.Shard.Addr, database)
dsn := fmt.Sprintf("postgres://%s:%s@%s/%s?sslmode=disable&TimeZone=Asia/Shanghai", url.QueryEscape(p.Shard.User), url.QueryEscape(p.Shard.Password), p.Shard.Addr, database)
db, err = sqlbase.NewDB(
ctx,
postgres.Open(dsn),

View File

@@ -90,8 +90,6 @@ MaxLifetime = 7200
MaxOpenConns = 150
# max idle connections
MaxIdleConns = 50
# enable auto migrate or not
# EnableAutoMigrate = false
[Redis]
# address, ip:port or ip1:port,ip2:port for cluster and sentinel(SentinelAddrs)

2
go.mod
View File

@@ -161,4 +161,6 @@ require (
replace golang.org/x/exp v0.0.0-20231006140011-7918f672742d => golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1
replace github.com/olivere/elastic/v7 => github.com/n9e/elastic/v7 v7.0.33-0.20251031061708-f480a2dfcfa7
// replace github.com/flashcatcloud/ibex => ../github.com/flashcatcloud/ibex

4
go.sum
View File

@@ -259,10 +259,10 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU=
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/n9e/elastic/v7 v7.0.33-0.20251031061708-f480a2dfcfa7 h1:fPs1GClmnQZ6E/nzrJCieQKJNM46eqMkHaBg3SoHcgY=
github.com/n9e/elastic/v7 v7.0.33-0.20251031061708-f480a2dfcfa7/go.mod h1:/kVskIy0Pd8nAiKtPtcI4XnzOM+pM6MWQ+zP6YqPVFI=
github.com/oklog/ulid v1.3.1 h1:EGfNDEx6MqHz8B3uNV6QAib1UR2Lm97sHi3ocA6ESJ4=
github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U=
github.com/olivere/elastic/v7 v7.0.32 h1:R7CXvbu8Eq+WlsLgxmKVKPox0oOwAE/2T9Si5BnvK6E=
github.com/olivere/elastic/v7 v7.0.32/go.mod h1:c7PVmLe3Fxq77PIfY/bZmxY/TAamBhCzZ8xDOE09a9k=
github.com/opensearch-project/opensearch-go/v2 v2.3.0 h1:nQIEMr+A92CkhHrZgUhcfsrZjibvB3APXf2a1VwCmMQ=
github.com/opensearch-project/opensearch-go/v2 v2.3.0/go.mod h1:8LDr9FCgUTVoT+5ESjc2+iaZuldqE+23Iq0r1XeNue8=
github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc=

File diff suppressed because it is too large Load Diff

View File

@@ -1,4 +1,5 @@
{
[
{
"name": "JMX - Kubernetes",
"tags": "Prometheus JMX Kubernetes",
"configs": {
@@ -1870,4 +1871,5 @@
"version": "3.0.0"
},
"uuid": 1755595969673000
}
}
]

View File

@@ -42,6 +42,11 @@ func (c *CvalCache) initSyncConfigs() {
log.Fatalln("failed to sync configs:", err)
}
err = models.RefreshPhoneEncryptionCache(c.ctx)
if err != nil {
logger.Errorf("failed to refresh phone encryption cache: %v", err)
}
go c.loopSyncConfigs()
}

View File

@@ -284,7 +284,7 @@ func (ncc *NotifyChannelCacheType) processNotifyTask(task *NotifyTask) {
start := time.Now()
resp, err := task.NotifyChannel.SendHTTP(task.Events, task.TplContent, task.CustomParams, task.Sendtos, httpClient)
resp = fmt.Sprintf("duration: %d ms %s", time.Since(start).Milliseconds(), resp)
logger.Infof("notify_id: %d, channel_name: %v, event:%+v, tplContent:%v, customParams:%v, userInfo:%+v, respBody: %v, err: %v",
logger.Infof("http_sendernotify_id: %d, channel_name: %v, event:%+v, tplContent:%v, customParams:%v, userInfo:%+v, respBody: %v, err: %v",
task.NotifyRuleId, task.NotifyChannel.Name, task.Events[0], task.TplContent, task.CustomParams, task.Sendtos, resp, err)
// 调用通知记录回调函数
@@ -296,7 +296,7 @@ func (ncc *NotifyChannelCacheType) processNotifyTask(task *NotifyTask) {
start := time.Now()
resp, err := task.NotifyChannel.SendHTTP(task.Events, task.TplContent, task.CustomParams, []string{task.Sendtos[i]}, httpClient)
resp = fmt.Sprintf("send_time: %s duration: %d ms %s", time.Now().Format("2006-01-02 15:04:05"), time.Since(start).Milliseconds(), resp)
logger.Infof("notify_id: %d, channel_name: %v, event:%+v, tplContent:%v, customParams:%v, userInfo:%+v, respBody: %v, err: %v",
logger.Infof("http_sender notify_id: %d, channel_name: %v, event:%+v, tplContent:%v, customParams:%v, userInfo:%+v, respBody: %v, err: %v",
task.NotifyRuleId, task.NotifyChannel.Name, task.Events[0], task.TplContent, task.CustomParams, task.Sendtos[i], resp, err)
// 调用通知记录回调函数

View File

@@ -498,6 +498,23 @@ func (e *AlertCurEvent) FE2DB() {
}
func (e *AlertCurEvent) FillTagsMap() {
e.TagsMap = make(map[string]string)
for i := 0; i < len(e.TagsJSON); i++ {
pair := strings.TrimSpace(e.TagsJSON[i])
if pair == "" {
continue
}
arr := strings.SplitN(pair, "=", 2)
if len(arr) != 2 {
continue
}
e.TagsMap[arr[0]] = arr[1]
}
}
func (e *AlertCurEvent) DB2Mem() {
e.IsRecovered = false
e.NotifyGroupsJSON = strings.Fields(e.NotifyGroups)

View File

@@ -230,7 +230,7 @@ func AlertMuteGet(ctx *ctx.Context, where string, args ...interface{}) (*AlertMu
return lst[0], err
}
func AlertMuteGets(ctx *ctx.Context, prods []string, bgid int64, disabled int, query string) (lst []AlertMute, err error) {
func AlertMuteGets(ctx *ctx.Context, prods []string, bgid int64, disabled int, expired int, query string) (lst []AlertMute, err error) {
session := DB(ctx)
if bgid != -1 {
@@ -249,6 +249,15 @@ func AlertMuteGets(ctx *ctx.Context, prods []string, bgid int64, disabled int, q
}
}
if expired != -1 {
now := time.Now().Unix()
if expired == 1 {
session = session.Where("mute_time_type = ? AND etime < ?", TimeRange, now)
} else {
session = session.Where("(mute_time_type = ? AND etime >= ?) OR mute_time_type = ?", TimeRange, now, Periodic)
}
}
if query != "" {
arr := strings.Fields(query)
for i := 0; i < len(arr); i++ {
@@ -478,7 +487,7 @@ func AlertMuteGetsAll(ctx *ctx.Context) ([]*AlertMute, error) {
// get my cluster's mutes
var lst []*AlertMute
if !ctx.IsCenter {
lst, err := poster.GetByUrls[[]*AlertMute](ctx, "/v1/n9e/alert-mutes?disabled=0")
lst, err := poster.GetByUrls[[]*AlertMute](ctx, "/v1/n9e/active-alert-mutes")
if err != nil {
return nil, err
}
@@ -490,6 +499,10 @@ func AlertMuteGetsAll(ctx *ctx.Context) ([]*AlertMute, error) {
session := DB(ctx).Model(&AlertMute{}).Where("disabled = 0")
// 只筛选在生效时间内的屏蔽规则, 这里 btime < now+10 是为了避免同步期间有规则满足了生效时间条件
now := time.Now().Unix()
session = session.Where("(mute_time_type = ? AND btime <= ? AND etime >= ?) OR mute_time_type = ?", TimeRange, now+10, now, Periodic)
err := session.Find(&lst).Error
if err != nil {
return nil, err

View File

@@ -119,6 +119,7 @@ type AlertRule struct {
UpdateByNickname string `json:"update_by_nickname" gorm:"-"` // for fe
CronPattern string `json:"cron_pattern"`
NotifyRuleIds []int64 `json:"notify_rule_ids" gorm:"serializer:json"`
PipelineConfigs []PipelineConfig `json:"pipeline_configs" gorm:"serializer:json"`
NotifyVersion int `json:"notify_version"` // 0: old, 1: new
}

View File

@@ -172,6 +172,7 @@ type AlertRule struct {
DatasourceQueries []models.DatasourceQuery `gorm:"datasource_queries;type:text;serializer:json"` // datasource queries
NotifyRuleIds []int64 `gorm:"column:notify_rule_ids;type:varchar(1024)"`
NotifyVersion int `gorm:"column:notify_version;type:int;default:0"`
PipelineConfigs []models.PipelineConfig `gorm:"column:pipeline_configs;type:text;serializer:json"`
}
type AlertSubscribe struct {
@@ -259,7 +260,7 @@ type BoardBusigroup struct {
}
type Users struct {
Belong string `gorm:"column:belong;varchar(16);default:'';comment:belong"`
Belong string `gorm:"column:belong;type:varchar(16);default:'';comment:belong"`
LastActiveTime int64 `gorm:"column:last_active_time;type:int;default:0;comment:last_active_time"`
Phone string `gorm:"column:phone;type:varchar(1024);not null;default:''"`
}

View File

@@ -545,7 +545,7 @@ func (ncc *NotifyChannelConfig) SendHTTP(events []*AlertCurEvent, tpl map[string
if err != nil {
logger.Errorf("send_http: failed to send http notify. url=%s request_body=%s error=%v", url, string(body), err)
lastErrorMessage = err.Error()
time.Sleep(time.Duration(httpConfig.RetryInterval) * time.Second)
time.Sleep(time.Duration(httpConfig.RetryInterval) * time.Millisecond)
continue
}
defer resp.Body.Close()

View File

@@ -49,11 +49,11 @@ func ConvertAlert(rule PromRule, interval string, datasouceQueries []DatasourceQ
appendTags = append(appendTags, fmt.Sprintf("%s=%s", strings.ReplaceAll(k, " ", ""), strings.ReplaceAll(v, " ", "")))
} else {
switch v {
case "critical":
case "critical", "Critical", "CRITICAL", "error", "Error", "ERROR", "fatal", "Fatal", "FATAL", "page", "Page", "PAGE", "sev1", "SEV1", "Severity1", "severity1", "SEVERITY1":
severity = 1
case "warning":
case "warning", "Warning", "WARNING", "warn", "Warn", "WARN", "sev2", "SEV2", "Severity2", "severity2", "SEVERITY2":
severity = 2
case "info":
case "info", "Info", "INFO", "notice", "Notice", "NOTICE", "sev3", "SEV3", "Severity3", "severity3", "SEVERITY3":
severity = 3
}
ruleName += "-" + v

View File

@@ -369,6 +369,12 @@ func UserGetById(ctx *ctx.Context, id int64) (*User, error) {
return UserGet(ctx, "id=?", id)
}
func CountAdminUsers(ctx *ctx.Context) (int64, error) {
var count int64
err := DB(ctx).Model(&User{}).Where("roles LIKE ?", "%"+AdminRole+"%").Count(&count).Error
return count, err
}
func UsersGetByGroupIds(ctx *ctx.Context, groupIds []int64) ([]User, error) {
if len(groupIds) == 0 {
return nil, nil

View File

@@ -12,15 +12,16 @@ import (
)
type UserGroup struct {
Id int64 `json:"id" gorm:"primaryKey"`
Name string `json:"name"`
Note string `json:"note"`
CreateAt int64 `json:"create_at"`
CreateBy string `json:"create_by"`
UpdateAt int64 `json:"update_at"`
UpdateBy string `json:"update_by"`
UserIds []int64 `json:"-" gorm:"-"`
Users []User `json:"users" gorm:"-"`
Id int64 `json:"id" gorm:"primaryKey"`
Name string `json:"name"`
Note string `json:"note"`
CreateAt int64 `json:"create_at"`
CreateBy string `json:"create_by"`
UpdateAt int64 `json:"update_at"`
UpdateBy string `json:"update_by"`
UserIds []int64 `json:"-" gorm:"-"`
Users []User `json:"users" gorm:"-"`
BusiGroupsRes []*BusiGroupRes `json:"busi_groups" gorm:"-"`
}
func (ug *UserGroup) TableName() string {

View File

@@ -192,6 +192,15 @@ var I18N = `{
"View Alerting Engines": "查看告警引擎列表",
"View Product Version": "查看产品版本",
"Some alert rules still in the BusiGroup": "业务组中仍有告警规则",
"Some alert mutes still in the BusiGroup": "业务组中仍有屏蔽规则",
"Some alert subscribes still in the BusiGroup": "业务组中仍有订阅规则",
"Some Board still in the BusiGroup": "业务组中仍有仪表盘",
"Some targets still in the BusiGroup": "业务组中仍有监控对象",
"Some recording rules still in the BusiGroup": "业务组中仍有记录规则",
"Some recovery scripts still in the BusiGroup": "业务组中仍有自愈脚本",
"Some target busigroups still in the BusiGroup": "业务组中仍有监控对象",
"---------zh_CN--------": "---------zh_CN--------"
},
"zh_HK": {
@@ -387,6 +396,15 @@ var I18N = `{
"View Alerting Engines": "查看告警引擎列表",
"View Product Version": "查看產品版本",
"Some alert rules still in the BusiGroup": "業務組中仍有告警規則",
"Some alert mutes still in the BusiGroup": "業務組中仍有屏蔽規則",
"Some alert subscribes still in the BusiGroup": "業務組中仍有訂閱規則",
"Some Board still in the BusiGroup": "業務組中仍有儀表板",
"Some targets still in the BusiGroup": "業務組中仍有監控對象",
"Some recording rules still in the BusiGroup": "業務組中仍有記錄規則",
"Some recovery scripts still in the BusiGroup": "業務組中仍有自愈腳本",
"Some target busigroups still in the BusiGroup": "業務組中仍有監控對象",
"---------zh_HK--------": "---------zh_HK--------"
},
"ja_JP": {
@@ -579,6 +597,15 @@ var I18N = `{
"View Alerting Engines": "アラートエンジンの表示",
"View Product Version": "製品のバージョンを見る",
"Some alert rules still in the BusiGroup": "ビジネスグループにまだアラートルールがあります",
"Some alert mutes still in the BusiGroup": "ビジネスグループにまだミュートルールがあります",
"Some alert subscribes still in the BusiGroup": "ビジネスグループにまだサブスクライブルールがあります",
"Some Board still in the BusiGroup": "ビジネスグループにまだダッシュボードがあります",
"Some targets still in the BusiGroup": "ビジネスグループにまだ監視対象があります",
"Some recording rules still in the BusiGroup": "ビジネスグループにまだ記録ルールがあります",
"Some recovery scripts still in the BusiGroup": "ビジネスグループにまだ自己回復スクリプトがあります",
"Some target busigroups still in the BusiGroup": "ビジネスグループにまだ監視対象があります",
"---------ja_JP--------": "---------ja_JP--------"
},
"ru_RU": {
@@ -771,6 +798,15 @@ var I18N = `{
"View Alerting Engines": "Просмотр списка алертинг-инженеров",
"View Product Version": "Просмотр версии продукта",
"Some alert rules still in the BusiGroup": "В бизнес-группе еще есть правила оповещений",
"Some alert mutes still in the BusiGroup": "В бизнес-группе еще есть правила отключения оповещений",
"Some alert subscribes still in the BusiGroup": "В бизнес-группе еще есть правила подписки",
"Some Board still in the BusiGroup": "В бизнес-группе еще есть панели мониторинга",
"Some targets still in the BusiGroup": "В бизнес-группе еще есть объекты мониторинга",
"Some recording rules still in the BusiGroup": "В бизнес-группе еще есть правила записи",
"Some recovery scripts still in the BusiGroup": "В бизнес-группе еще есть скрипты самоисцеления",
"Some target busigroups still in the BusiGroup": "В бизнес-группе еще есть объекты мониторинга",
"---------ru_RU--------": "---------ru_RU--------"
}
}`

View File

@@ -5,6 +5,7 @@ import (
"crypto/tls"
"fmt"
"net/http"
"strings"
"sync"
"time"
@@ -140,14 +141,22 @@ func (s *SsoClient) GetDisplayName() string {
return s.DisplayName
}
func (s *SsoClient) GetSsoLogoutAddr() string {
func (s *SsoClient) GetSsoLogoutAddr(idToken string) string {
s.RLock()
defer s.RUnlock()
if !s.Enable {
return ""
}
return s.SsoLogoutAddr
return s.replaceIdTokenTemplate(s.SsoLogoutAddr, idToken)
}
// replaceIdTokenTemplate 替换登出 URL 中的 {{$__id_token__}} 模板变量
func (s *SsoClient) replaceIdTokenTemplate(logoutAddr, idToken string) string {
if idToken == "" {
return logoutAddr
}
return strings.ReplaceAll(logoutAddr, "{{$__id_token__}}", idToken)
}
func wrapStateKey(key string) string {
@@ -201,6 +210,7 @@ type CallbackOutput struct {
Redirect string `json:"redirect"`
Msg string `json:"msg"`
AccessToken string `json:"accessToken"`
IdToken string `json:"idToken"`
Username string `json:"username"`
Nickname string `json:"nickname"`
Phone string `yaml:"phone"`
@@ -245,6 +255,7 @@ func (s *SsoClient) exchangeUser(code string) (*CallbackOutput, error) {
output := &CallbackOutput{
AccessToken: oauth2Token.AccessToken,
IdToken: rawIDToken,
Username: extractClaim(data, s.Attributes.Username),
Nickname: extractClaim(data, s.Attributes.Nickname),
Phone: extractClaim(data, s.Attributes.Phone),

View File

@@ -52,6 +52,10 @@ func (po *PromOption) Equal(target PromOption) bool {
return false
}
if po.InsecureSkipVerify != target.InsecureSkipVerify {
return false
}
if len(po.Headers) != len(target.Headers) {
return false
}

View File

@@ -181,7 +181,30 @@ func (w WriterType) Post(req []byte, headers ...map[string]string) error {
}
if resp.StatusCode >= 400 && resp.StatusCode < 500 {
logger.Warningf("push data with remote write:%s request got status code: %v, response body: %s", url, resp.StatusCode, string(body))
// 解码并解析 req 以便打印指标信息
decoded, decodeErr := snappy.Decode(nil, req)
metricsInfo := "failed to decode request"
if decodeErr == nil {
var writeReq prompb.WriteRequest
if unmarshalErr := proto.Unmarshal(decoded, &writeReq); unmarshalErr == nil {
metricsInfo = fmt.Sprintf("timeseries count: %d", len(writeReq.Timeseries))
logger.Warningf("push data with remote write:%s request got status code: %v, response body: %s, %s", url, resp.StatusCode, string(body), metricsInfo)
// 只打印前几条样本,避免日志泛滥
sampleCount := 5
if sampleCount > len(writeReq.Timeseries) {
sampleCount = len(writeReq.Timeseries)
}
for i := 0; i < sampleCount; i++ {
logger.Warningf("push data with remote write:%s timeseries: [%d] %s", url, i, writeReq.Timeseries[i].String())
}
} else {
metricsInfo = fmt.Sprintf("failed to unmarshal: %v", unmarshalErr)
logger.Warningf("push data with remote write:%s request got status code: %v, response body: %s, metrics: %s", url, resp.StatusCode, string(body), metricsInfo)
}
} else {
metricsInfo = fmt.Sprintf("failed to decode: %v", decodeErr)
logger.Warningf("push data with remote write:%s request got status code: %v, response body: %s, metrics: %s", url, resp.StatusCode, string(body), metricsInfo)
}
continue
}