Compare commits

...

61 Commits

Author SHA1 Message Date
ning
96f3cfa065 add all perms api 2023-03-16 11:40:00 +08:00
ning
144f0ad795 update pkg version 2023-03-08 17:59:03 +08:00
ning
1375ff1435 delete log 2023-03-01 17:43:34 +08:00
ning
26dc03146b update rule enable time 2023-03-01 16:42:08 +08:00
ning
e8378c6858 update rule enable time 2023-03-01 16:14:16 +08:00
ning
da182f1b05 add log 2023-03-01 15:48:02 +08:00
ning
cad0d3cf0f add log 2023-03-01 15:35:42 +08:00
ning
fec1e686f4 add log 2023-03-01 15:25:55 +08:00
ning
a357f11164 add log 2023-03-01 14:59:07 +08:00
ning
d03ba4c4d0 fix: query https api 2023-01-30 16:44:51 +08:00
kongfei605
d531178c9b convert tplx.Funcmap to text.funcmap (#1352) 2023-01-10 21:02:20 +08:00
ning
174df1495c refactor: change some log level 2023-01-10 19:59:41 +08:00
ning
ffe423148d fix: push event api 2023-01-10 19:08:39 +08:00
ning
926559c9a7 refactor: motify log print 2023-01-10 15:54:55 +08:00
Yening Qin
136642f126 optimize handle external event (#1350)
* optimize handle external event
2023-01-10 13:30:45 +08:00
Ulric Qin
a054828fcc Merge branch 'main' of github.com:ccfos/nightingale 2023-01-06 23:39:49 +08:00
Ulric Qin
e46e946689 code refactor 2023-01-06 23:39:37 +08:00
ning
cf083c543b fix: alert mute sync 2023-01-06 16:22:41 +08:00
xiaoziv
2e1508fdd3 feat: rule engine rewrite (#1340)
* feat: rule engine rewrite

* rename filter to muteStrategy

* rename file

* fix bg strategy match bug

* fix deadlock

* Update mute_strategy.go

* Update rule_helper.go

* use rule from cache

* add comment

* add IdentDeletedMuteStrategy

* rename strategy

* rename eventTags to tagsMap

Co-authored-by: ulricqin <ulricqin@qq.com>
2023-01-06 16:16:22 +08:00
kongfei605
954543a5b2 Parse rules without html escaper (#1345) 2023-01-06 11:02:36 +08:00
Ulric Qin
71a402c33c code refactor 2023-01-06 10:51:33 +08:00
Ulric Qin
e30a5a316f code refactor 2023-01-06 10:50:18 +08:00
jsp-kld
0c9b7de391 Dashboard for VMware (#1331)
by [vsphere-monitor](https://github.com/jsp-kld/vsphere-monitor)
2022-12-20 21:00:59 +08:00
Yening Qin
063b6f63df fix load prom options from database and add more log (#1330)
* add more log
* fix PromOptions set
2022-12-20 11:59:01 +08:00
lsy1990
44b780093a support fetch user info based on query type (#1326)
* support fetch user info based on query type

* refector on type
2022-12-17 17:36:40 +08:00
710leo
780ad19dd9 fix: alert mute 2022-12-17 12:37:06 +08:00
710leo
c6d133772a fix: sync alert mute and subscribe when cluster is blank 2022-12-17 10:44:27 +08:00
Allen Zhou
c5bb8a4a13 target tags can rewrite labels deined in categraf config file (#1321)
Co-authored-by: allenz <godloveallen@foxmail.com>
2022-12-14 11:20:51 +08:00
Ulric Qin
06c1664577 rename Writer.Name to Writer.ClusterName 2022-12-13 22:52:04 +08:00
710leo
96a4c1ebfa delete GaugeCronDuration cluster label 2022-12-13 16:50:17 +08:00
Yening Qin
b0c05368f7 n9e server support multi cluster alert (#1318)
* support multi

* refactor

* code refactor

* refactor

* code refactor

* fix run mult cluster rule

* code refactor

* add alerting_engine api

* add alerting_engine api

* update sql

* refactor recording push

* refactor

* refactor

* delete useless cluster

* split to fields

* change stats

* change stats
2022-12-13 16:24:23 +08:00
Ulric Qin
eebf2cff49 add api: userFindAll 2022-12-12 12:55:08 +08:00
Ulric Qin
30d021bc19 Merge branch 'main' of github.com:ccfos/nightingale 2022-12-12 11:20:37 +08:00
Ulric Qin
b4ea395fe3 update README 2022-12-12 11:20:06 +08:00
zhousbo
9f4d1a1ea7 fix: support redis sentinel password (#1315) 2022-12-09 22:55:25 +08:00
lsy1990
ed06da90d9 support fetch user group by user name (#1311) 2022-12-07 20:50:17 +08:00
hubo
9461b549d2 replace lable host to ident (#1302) 2022-11-30 20:12:38 +08:00
lunuan
3b1b595461 update dashboard template for mongodb (#1293)
Co-authored-by: LiuHX <huaxingliu@fintopia.tech>
2022-11-30 18:33:24 +08:00
Windy
4257de69fd fix: webapi conf sso section typo (#1298) 2022-11-30 14:27:47 +08:00
Mystery0 M
ddc86f20ee feat: add telegram notify support (#1295)
* feat: add telegram notify support
2022-11-30 14:20:21 +08:00
Ulric Qin
bf27162a9b modify default settings of DisableUsageReport 2022-11-28 20:57:42 +08:00
Ulric Qin
f8ac0a9b4a refactor forwarding logic 2022-11-23 20:40:18 +08:00
Yening Qin
7a190b152c feat: add timeseries sample log filter (#1281)
feat: add timeseries sample log filter
2022-11-22 21:53:34 +08:00
Ulric Qin
99fbdae121 refactor boardPutConfigs 2022-11-11 12:11:39 +08:00
kongfei605
aa26ddfb48 Merge pull request #1263 from ccfos/router_easyjson
regenerate easyjson file for router_opentsdb
2022-11-10 12:50:22 +08:00
kongfei
ba5aba9cdf sync main branch code 2022-11-10 12:47:46 +08:00
kongfei
3400803672 regenerate easyjosn file for router_opentsdb 2022-11-10 12:41:16 +08:00
kongfei605
f11377b289 replace json with easyjson for router (#1261) 2022-11-10 11:11:20 +08:00
kongfei
1165312532 replace json with easyjson for router 2022-11-10 11:00:38 +08:00
JellyTony
8a145d5ba2 feat: 报警脚本超时时间改为可配置 (#1253)
* Update docker-compose.yaml

* Update docker-compose.yaml

* feat: 报警脚本超时时间改为可配置

* feat: docker 镜像Alerting 增加 超时时间

Co-authored-by: ulricqin <ulricqin@qq.com>
Co-authored-by: JeffreyBool <zhanggaoyuan@mediatrack.cn>
2022-11-04 15:18:36 +08:00
47
352415662a feat:CAS and OAuth2 login (#1236)
* Feat(cas login):Add CAS login

Signed-off-by: root <foursevenlove@gmail.com>

* Fix(CAS login):1.print logs of CAS Authentication Response's Attributes 2.modify fileds of ssoClient and CAS config.

Signed-off-by: root <foursevenlove@gmail.com>

* Fix(CAS login):Fields modifing

Signed-off-by: root <foursevenlove@gmail.com>

* Feat(OAuth Login):1.Add OAuth2 login 2.Add display name

Signed-off-by: root <foursevenlove@gmail.com>

* Fix(webapi.conf):Add example

Signed-off-by: root <foursevenlove@gmail.com>

* fix(webapi.conf):Modify default value of username in OAuth2

Signed-off-by: root <foursevenlove@gmail.com>

* Fix:Error handling

Signed-off-by: root <foursevenlove@gmail.com>

Signed-off-by: root <foursevenlove@gmail.com>
2022-11-02 14:31:59 +08:00
Ulric Qin
65d8f80637 Merge branch 'main' of github.com:ccfos/nightingale 2022-11-02 08:35:06 +08:00
Ulric Qin
b3700c7251 add Headers configuration demo 2022-11-02 08:34:49 +08:00
chenginger
106a8e490a alert mute cannot refresh the bug (#1242)
bugfix:mute cannot be refreshed after being modified
2022-11-01 15:41:01 +08:00
Ulric Qin
5332f797a6 add alert duration in wecom.tpl 2022-10-30 17:11:51 +08:00
Ulric Qin
aff0dbfea1 use json-iterator/go instead encoding/json 2022-10-28 10:22:04 +08:00
Ulric Qin
da5dd683d6 bugfix 2022-10-25 09:46:32 +08:00
zheng
15892d6e57 规则名称支持变量 (#1217)
* 规则名称支持变量

* parse rule_name
2022-10-20 20:18:15 +08:00
xtan
fbff60eefb docs: pg init sql (#1210)
Co-authored-by: tanxiao <tanxiao@asiainfo.com>
2022-10-20 12:32:13 +08:00
xtan
62867ddbf2 feat: conf file password supports ciphertext (#1207)
Co-authored-by: tanxiao <tanxiao@asiainfo.com>
2022-10-20 12:31:48 +08:00
Ulric Qin
5d4acb6cc3 update sql 2022-10-19 12:25:50 +08:00
91 changed files with 7875 additions and 1453 deletions

View File

@@ -59,9 +59,8 @@
## Getting Started
- [快速安装](https://mp.weixin.qq.com/s/iEC4pfL1TgjMDOWYh8H-FA)
- [详细文档](https://n9e.github.io/)
- [社区分享](https://n9e.github.io/docs/prologue/share/)
- [国外文档](https://n9e.github.io/)
- [国内文档](http://n9e.flashcat.cloud/)
## Screenshots

View File

@@ -165,6 +165,7 @@ CREATE TABLE `board` (
`id` bigint unsigned not null auto_increment,
`group_id` bigint not null default 0 comment 'busi group id',
`name` varchar(191) not null,
`ident` varchar(200) not null default '',
`tags` varchar(255) not null comment 'split by space',
`public` tinyint(1) not null default 0 comment '0:false 1:true',
`create_at` bigint not null default 0,
@@ -172,7 +173,8 @@ CREATE TABLE `board` (
`update_at` bigint not null default 0,
`update_by` varchar(64) not null default '',
PRIMARY KEY (`id`),
UNIQUE KEY (`group_id`, `name`)
UNIQUE KEY (`group_id`, `name`),
KEY(`ident`)
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
-- for dashboard new version
@@ -523,6 +525,5 @@ CREATE TABLE `alerting_engines`
`instance` varchar(128) not null default '' comment 'instance identification, e.g. 10.9.0.9:9090',
`cluster` varchar(128) not null default '' comment 'target reader cluster',
`clock` bigint not null,
PRIMARY KEY (`id`),
UNIQUE KEY (`instance`)
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
PRIMARY KEY (`id`)
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;

View File

@@ -198,7 +198,9 @@ CREATE TABLE board (
id bigserial not null ,
group_id bigint not null default 0 ,
name varchar(191) not null,
ident varchar(200) not null default '',
tags varchar(255) not null ,
public smallint not null default 0,
create_at bigint not null default 0,
create_by varchar(64) not null default '',
update_at bigint not null default 0,
@@ -208,6 +210,8 @@ ALTER TABLE board ADD CONSTRAINT board_pk PRIMARY KEY (id);
ALTER TABLE board ADD CONSTRAINT board_un UNIQUE (group_id,"name");
COMMENT ON COLUMN board.group_id IS 'busi group id';
COMMENT ON COLUMN board.tags IS 'split by space';
COMMENT ON COLUMN board.public IS '0:false 1:true';
CREATE INDEX board_ident_idx ON board (ident);
-- for dashboard new version
CREATE TABLE board_payload (
@@ -606,6 +610,5 @@ CREATE TABLE alerting_engines
clock bigint not null
) ;
ALTER TABLE alerting_engines ADD CONSTRAINT alerting_engines_pk PRIMARY KEY (id);
ALTER TABLE alerting_engines ADD CONSTRAINT alerting_engines_un UNIQUE (instance);
COMMENT ON COLUMN alerting_engines.instance IS 'instance identification, e.g. 10.9.0.9:9090';
COMMENT ON COLUMN alerting_engines.cluster IS 'target reader cluster';

View File

@@ -29,6 +29,7 @@ func (n *N9EPlugin) Notify(bs []byte) {
"dingtalk_robot_token",
"wecom_robot_token",
"feishu_robot_token",
"telegram_robot_token",
}
for _, ch := range channels {
if ret := gjson.GetBytes(bs, ch); ret.Exists() {

View File

@@ -11,13 +11,11 @@ BusiGroupLabelKey = "busigroup"
# sleep x seconds, then start judge engine
EngineDelay = 60
DisableUsageReport = false
DisableUsageReport = true
# config | database
ReaderFrom = "config"
ForceUseServerTS = true
[Log]
# log write dir
Dir = "logs"
@@ -75,10 +73,12 @@ InsecureSkipVerify = true
Batch = 5
[Alerting]
# timeout settings, unit: ms, default: 30000ms
Timeout=30000
TemplatesDir = "./etc/template"
NotifyConcurrency = 10
# use builtin go code notify
NotifyBuiltinChannels = ["email", "dingtalk", "wecom", "feishu", "mm"]
NotifyBuiltinChannels = ["email", "dingtalk", "wecom", "feishu", "mm", "telegram"]
[Alerting.CallScript]
# built in sending capability in go code
@@ -160,11 +160,13 @@ MaxIdleConnsPerHost = 100
[WriterOpt]
# queue channel count
QueueCount = 100
QueueCount = 1000
# queue max size
QueueMaxSize = 200000
QueueMaxSize = 1000000
# once pop samples number from queue
QueuePopSize = 2000
QueuePopSize = 1000
# metric or ident
ShardingKey = "ident"
[[Writers]]
Url = "http://prometheus:9090/api/v1/write"

View File

@@ -0,0 +1,7 @@
**级别状态**: {{if .IsRecovered}}<font color="info">S{{.Severity}} Recovered</font>{{else}}<font color="warning">S{{.Severity}} Triggered</font>{{end}}
**规则标题**: {{.RuleName}}{{if .RuleNote}}
**规则备注**: {{.RuleNote}}{{end}}
**监控指标**: {{.TagsJSON}}
{{if .IsRecovered}}**恢复时间**{{timeformat .LastEvalTime}}{{else}}**触发时间**: {{timeformat .TriggerTime}}
**触发时值**: {{.TriggerValue}}{{end}}
**发送时间**: {{timestamp}}

View File

@@ -44,6 +44,11 @@ Label = "mm bot"
# do not change Key
Key = "mm"
[[NotifyChannels]]
Label = "telegram机器人"
# do not change Key
Key = "telegram"
[[ContactKeys]]
Label = "Wecom Robot Token"
# do not change Key
@@ -64,6 +69,11 @@ Label = "MatterMost Webhook URL"
# do not change Key
Key = "mm_webhook_url"
[[ContactKeys]]
Label = "Telegram Robot Token"
# do not change Key
Key = "telegram_robot_token"
[Log]
# log write dir
Dir = "logs"

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -23,6 +23,7 @@ func (n *N9EPlugin) Notify(bs []byte) {
"dingtalk_robot_token",
"wecom_robot_token",
"feishu_robot_token",
"telegram_robot_token",
}
for _, ch := range channels {
if ret := gjson.GetBytes(bs, ch); ret.Exists() {

View File

@@ -9,14 +9,15 @@ ClusterName = "Default"
BusiGroupLabelKey = "busigroup"
# sleep x seconds, then start judge engine
EngineDelay = 60
EngineDelay = 30
DisableUsageReport = false
DisableUsageReport = true
# config | database
ReaderFrom = "config"
ForceUseServerTS = true
# if true, target tags can rewrite labels defined in categraf config file
LabelRewrite = false
[Log]
# log write dir
@@ -75,10 +76,12 @@ InsecureSkipVerify = true
Batch = 5
[Alerting]
# timeout settings, unit: ms, default: 30000ms
Timeout=30000
TemplatesDir = "./etc/template"
NotifyConcurrency = 10
# use builtin go code notify
NotifyBuiltinChannels = ["email", "dingtalk", "wecom", "feishu", "mm"]
NotifyBuiltinChannels = ["email", "dingtalk", "wecom", "feishu", "mm", "telegram"]
[Alerting.CallScript]
# built in sending capability in go code
@@ -132,6 +135,8 @@ Address = "127.0.0.1:6379"
RedisType = "standalone"
# Mastername for sentinel type
# MasterName = "mymaster"
# SentinelUsername = ""
# SentinelPassword = ""
[DB]
# postgres: host=%s port=%s user=%s dbname=%s password=%s sslmode=%s
@@ -163,13 +168,28 @@ Timeout = 30000
DialTimeout = 3000
MaxIdleConnsPerHost = 100
# [[Readers]]
# ClusterName = "Default"
# prometheus base url
# Url = "http://127.0.0.1:9090"
# Basic auth username
# BasicAuthUser = ""
# Basic auth password
# BasicAuthPass = ""
# timeout settings, unit: ms
# Timeout = 30000
# DialTimeout = 3000
# MaxIdleConnsPerHost = 100
[WriterOpt]
# queue channel count
QueueCount = 100
QueueCount = 1000
# queue max size
QueueMaxSize = 200000
QueueMaxSize = 1000000
# once pop samples number from queue
QueuePopSize = 2000
QueuePopSize = 1000
# metric or ident
ShardingKey = "ident"
[[Writers]]
Url = "http://127.0.0.1:9090/api/v1/write"
@@ -178,6 +198,7 @@ BasicAuthUser = ""
# Basic auth password
BasicAuthPass = ""
# timeout settings, unit: ms
Headers = ["X-From", "n9e"]
Timeout = 10000
DialTimeout = 3000
TLSHandshakeTimeout = 30000

View File

@@ -0,0 +1,9 @@
**级别状态**: {{if .IsRecovered}}<font color="info">S{{.Severity}} Recovered</font>{{else}}<font color="warning">S{{.Severity}} Triggered</font>{{end}}
**规则标题**: {{.RuleName}}{{if .RuleNote}}
**规则备注**: {{.RuleNote}}{{end}}{{if .TargetIdent}}
**监控对象**: {{.TargetIdent}}{{end}}
**监控指标**: {{.TagsJSON}}{{if not .IsRecovered}}
**触发时值**: {{.TriggerValue}}{{end}}
{{if .IsRecovered}}**恢复时间**: {{timeformat .LastEvalTime}}{{else}}**首次触发时间**: {{timeformat .FirstTriggerTime}}{{end}}
{{$time_duration := sub now.Unix .FirstTriggerTime }}{{if .IsRecovered}}{{$time_duration = sub .LastEvalTime .FirstTriggerTime }}{{end}}**持续时长**: {{humanizeDurationInterface $time_duration}}
**发送时间**: {{timestamp}}

View File

@@ -1,7 +1,9 @@
**级别状态**: {{if .IsRecovered}}<font color="info">S{{.Severity}} Recovered</font>{{else}}<font color="warning">S{{.Severity}} Triggered</font>{{end}}
**规则标题**: {{.RuleName}}{{if .RuleNote}}
**规则备注**: {{.RuleNote}}{{end}}
**监控指标**: {{.TagsJSON}}
{{if .IsRecovered}}**恢复时间**{{timeformat .LastEvalTime}}{{else}}**触发时间**: {{timeformat .TriggerTime}}
**规则备注**: {{.RuleNote}}{{end}}{{if .TargetIdent}}
**监控对象**: {{.TargetIdent}}{{end}}
**监控指标**: {{.TagsJSON}}{{if not .IsRecovered}}
**触发时值**: {{.TriggerValue}}{{end}}
{{if .IsRecovered}}**恢复时间**: {{timeformat .LastEvalTime}}{{else}}**首次触发时间**: {{timeformat .FirstTriggerTime}}{{end}}
{{$time_duration := sub now.Unix .FirstTriggerTime }}{{if .IsRecovered}}{{$time_duration = sub .LastEvalTime .FirstTriggerTime }}{{end}}**持续时长**: {{humanizeDurationInterface $time_duration}}
**发送时间**: {{timestamp}}

View File

@@ -44,6 +44,11 @@ Label = "mm bot"
# do not change Key
Key = "mm"
[[NotifyChannels]]
Label = "telegram机器人"
# do not change Key
Key = "telegram"
[[ContactKeys]]
Label = "Wecom Robot Token"
# do not change Key
@@ -64,6 +69,11 @@ Label = "MatterMost Webhook URL"
# do not change Key
Key = "mm_webhook_url"
[[ContactKeys]]
Label = "Telegram Robot Token"
# do not change Key
Key = "telegram_robot_token"
[Log]
# log write dir
Dir = "logs"
@@ -149,6 +159,7 @@ Email = "mail"
[OIDC]
Enable = false
DisplayName = "OIDC登录"
RedirectURL = "http://n9e.com/callback"
SsoAddr = "http://sso.example.org"
ClientId = ""
@@ -161,6 +172,54 @@ Nickname = "nickname"
Phone = "phone_number"
Email = "email"
[CAS]
Enable = false
DisplayName = "CAS登录"
SsoAddr = "https://cas.example.com/cas/"
RedirectURL = "http://127.0.0.1:18000/callback/cas"
CoverAttributes = false
# cas user default roles
DefaultRoles = ["Standard"]
[CAS.Attributes]
Nickname = "nickname"
Phone = "phone_number"
Email = "email"
[OAuth]
Enable = false
DisplayName = "OAuth2登录"
RedirectURL = "http://127.0.0.1:18000/callback/oauth"
SsoAddr = "https://sso.example.com/oauth2/authorize"
TokenAddr = "https://sso.example.com/oauth2/token"
UserInfoAddr = "https://api.example.com/api/v1/user/info"
# "header" "querystring" "formdata"
TranTokenMethod = "header"
ClientId = ""
ClientSecret = ""
CoverAttributes = true
DefaultRoles = ["Standard"]
UserinfoIsArray = false
UserinfoPrefix = "data"
Scopes = ["profile", "email", "phone"]
[OAuth.Attributes]
# Username must be defined
Username = "username"
Nickname = "nickname"
Phone = "phone_number"
Email = "email"
# example
# # nested : UserinfoIsArray=false, UserinfoPrefix="data"
# # {"data":{"username":"123456","nickname":"姓名"},"code":0,"message":"ok"}
# # nested and array : UserinfoIsArray=true, UserinfoPrefix="data"
# # {"data":[{"username":"123456","nickname":"姓名"}],"code":0,"message":"ok"}
# # flat : UserinfoIsArray=false, UserinfoPrefix=""
# # {"username":"123456","nickname":"姓名"}
# # flat and array : UserinfoIsArray=true, UserinfoPrefix=""
# # [{"username":"123456","nickname":"姓名"}]
[Redis]
# address, ip:port or ip1:port,ip2:port for cluster and sentinel(SentinelAddrs)
Address = "127.0.0.1:6379"
@@ -173,6 +232,8 @@ Address = "127.0.0.1:6379"
RedisType = "standalone"
# Mastername for sentinel type
# MasterName = "mymaster"
# SentinelUsername = ""
# SentinelPassword = ""
[DB]
DSN="root:1234@tcp(127.0.0.1:3306)/n9e_v5?charset=utf8mb4&parseTime=True&loc=Local&allowNativePasswords=true"
@@ -204,6 +265,7 @@ BasicAuthPass = ""
Timeout = 30000
DialTimeout = 3000
MaxIdleConnsPerHost = 100
Headers = ["X-From", "n9e"]
[Ibex]
Address = "http://127.0.0.1:10090"

16
go.mod
View File

@@ -6,9 +6,9 @@ require (
github.com/coreos/go-oidc v2.2.1+incompatible
github.com/dgrijalva/jwt-go v3.2.0+incompatible
github.com/gin-contrib/pprof v1.3.0
github.com/gin-gonic/gin v1.7.4
github.com/gin-gonic/gin v1.7.7
github.com/go-ldap/ldap/v3 v3.4.1
github.com/go-redis/redis/v9 v9.0.0-beta.2
github.com/go-redis/redis/v9 v9.0.0-rc.1
github.com/gogo/protobuf v1.3.2
github.com/golang-jwt/jwt v3.2.2+incompatible
github.com/golang/protobuf v1.5.2
@@ -16,6 +16,7 @@ require (
github.com/google/uuid v1.3.0
github.com/json-iterator/go v1.1.12
github.com/koding/multiconfig v0.0.0-20171124222453-69c27309b2d7
github.com/mailru/easyjson v0.7.7
github.com/mattn/go-isatty v0.0.12
github.com/orcaman/concurrent-map v0.0.0-20210501183033-44dafcb38ecc
github.com/pkg/errors v0.9.1
@@ -23,7 +24,7 @@ require (
github.com/prometheus/common v0.32.1
github.com/prometheus/prometheus v2.5.0+incompatible
github.com/tidwall/gjson v1.14.0
github.com/toolkits/pkg v1.2.9
github.com/toolkits/pkg v1.3.3
github.com/urfave/cli/v2 v2.3.0
golang.org/x/oauth2 v0.0.0-20210514164344-f6687ab2804c
gopkg.in/gomail.v2 v2.0.0-20160411212932-81ebce5c23df
@@ -58,6 +59,7 @@ require (
github.com/jackc/pgx/v4 v4.13.0 // indirect
github.com/jinzhu/inflection v1.0.0 // indirect
github.com/jinzhu/now v1.1.2 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/leodido/go-urn v1.2.0 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.1 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
@@ -72,10 +74,10 @@ require (
github.com/tidwall/pretty v1.2.0 // indirect
github.com/ugorji/go/codec v1.1.7 // indirect
go.uber.org/automaxprocs v1.4.0 // indirect
golang.org/x/crypto v0.0.0-20210817164053-32db794688a5 // indirect
golang.org/x/net v0.0.0-20220425223048-2871e0cb64e4 // indirect
golang.org/x/sys v0.0.0-20220422013727-9388b58f7150 // indirect
golang.org/x/text v0.3.7 // indirect
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519 // indirect
golang.org/x/net v0.0.0-20220722155237-a158d28d115b // indirect
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f // indirect
golang.org/x/text v0.3.8 // indirect
google.golang.org/appengine v1.6.6 // indirect
google.golang.org/genproto v0.0.0-20211007155348-82e027067bd4 // indirect
google.golang.org/grpc v1.41.0 // indirect

40
go.sum
View File

@@ -97,8 +97,8 @@ github.com/gin-contrib/pprof v1.3.0/go.mod h1:waMjT1H9b179t3CxuG1cV3DHpga6ybizwf
github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE=
github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
github.com/gin-gonic/gin v1.6.2/go.mod h1:75u5sXoLsGZoRN5Sgbi1eraJ4GU3++wFwWzhwvtwp4M=
github.com/gin-gonic/gin v1.7.4 h1:QmUZXrvJ9qZ3GfWvQ+2wnW/1ePrTEJqPKMYEU3lD/DM=
github.com/gin-gonic/gin v1.7.4/go.mod h1:jD2toBW3GZUr5UMcdrwQA10I7RuaFOl/SGeDjXkfUtY=
github.com/gin-gonic/gin v1.7.7 h1:3DoBmSbJbZAWqXJC3SLjAPfutPJJRN1U5pALB7EeTTs=
github.com/gin-gonic/gin v1.7.7/go.mod h1:axIBovoeJpVj8S3BwE0uPMTeReE4+AfFtqpqaZ1qq1U=
github.com/go-asn1-ber/asn1-ber v1.5.1 h1:pDbRAunXzIUXfx4CB2QJFv5IuPiuoW+sWvr/Us009o8=
github.com/go-asn1-ber/asn1-ber v1.5.1/go.mod h1:hEBeB/ic+5LoWskz+yKT7vGhhPYkProFKoKdwZRWMe0=
github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
@@ -121,8 +121,8 @@ github.com/go-playground/universal-translator v0.17.0/go.mod h1:UkSxE5sNxxRwHyU+
github.com/go-playground/validator/v10 v10.2.0/go.mod h1:uOYAAleCW8F/7oMFd6aG0GOhaH6EGOAJShg8Id5JGkI=
github.com/go-playground/validator/v10 v10.4.1 h1:pH2c5ADXtd66mxoE0Zm9SUhxE20r7aM3F26W0hOn+GE=
github.com/go-playground/validator/v10 v10.4.1/go.mod h1:nlOn6nFhuKACm19sB/8EGNn9GlaMV7XkbRSipzJ0Ii4=
github.com/go-redis/redis/v9 v9.0.0-beta.2 h1:ZSr84TsnQyKMAg8gnV+oawuQezeJR11/09THcWCQzr4=
github.com/go-redis/redis/v9 v9.0.0-beta.2/go.mod h1:Bldcd/M/bm9HbnNPi/LUtYBSD8ttcZYBMupwMXhdU0o=
github.com/go-redis/redis/v9 v9.0.0-rc.1 h1:/+bS+yeUnanqAbuD3QwlejzQZ+4eqgfUtFTG4b+QnXs=
github.com/go-redis/redis/v9 v9.0.0-rc.1/go.mod h1:8et+z03j0l8N+DvsVnclzjf3Dl/pFHgRk+2Ct1qw66A=
github.com/go-sql-driver/mysql v1.6.0 h1:BCTh4TKNUYmOmMUcQ3IipzF5prigylS7XXjEkfCHuOE=
github.com/go-sql-driver/mysql v1.6.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg=
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
@@ -245,6 +245,8 @@ github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD
github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc=
github.com/jinzhu/now v1.1.2 h1:eVKgfIdy9b6zbWBMgFpfDPoAMifwSZagU9HmEU6zgiI=
github.com/jinzhu/now v1.1.2/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8=
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4=
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
@@ -277,6 +279,8 @@ github.com/lib/pq v1.1.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
github.com/lib/pq v1.2.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
github.com/lib/pq v1.10.2 h1:AqzbZs4ZoCBp+GtejcpCpcxM3zlSMx29dXbUSeVtJb8=
github.com/lib/pq v1.10.2/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
github.com/mattn/go-colorable v0.1.1/go.mod h1:FuOcm+DKB9mbwrcAfNl7/TZVBZ6rcnceauSikq3lYCQ=
github.com/mattn/go-colorable v0.1.6/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
github.com/mattn/go-isatty v0.0.5/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
@@ -296,7 +300,7 @@ github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRW
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE=
github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE=
github.com/onsi/gomega v1.20.0 h1:8W0cWlwFkflGPLltQvLRB7ZVD5HuP6ng320w2IS245Q=
github.com/onsi/gomega v1.21.1 h1:OB/euWYIExnPBohllTicTHmGTrMaqJ67nIu80j0/uEM=
github.com/orcaman/concurrent-map v0.0.0-20210501183033-44dafcb38ecc h1:Ak86L+yDSOzKFa7WM5bf5itSOo1e3Xh8bm5YCMUXIjQ=
github.com/orcaman/concurrent-map v0.0.0-20210501183033-44dafcb38ecc/go.mod h1:Lu3tH6HLW3feq74c2GC+jIMS/K2CFcDWnWD9XkenwhI=
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
@@ -360,16 +364,16 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk=
github.com/tidwall/gjson v1.14.0 h1:6aeJ0bzojgWLa82gDQHcx3S0Lr/O51I9bJ5nv6JFx5w=
github.com/tidwall/gjson v1.14.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs=
github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
github.com/toolkits/pkg v1.2.9 h1:zGlrJDl+2sMBoxBRIoMtAwvKmW5wctuji2+qHCecMKk=
github.com/toolkits/pkg v1.2.9/go.mod h1:ZUsQAOoaR99PSbes+RXSirvwmtd6+XIUvizCmrjfUYc=
github.com/toolkits/pkg v1.3.3 h1:qpQAQ18Jr47dv4NcBALlH0ad7L2PuqSh5K+nJKNg5lU=
github.com/toolkits/pkg v1.3.3/go.mod h1:USXArTJlz1f1DCnQHNPYugO8GPkr1NRhP4eYQZQVshk=
github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw=
github.com/ugorji/go/codec v1.1.7 h1:2SvQaVZ1ouYrrKKwoSk2pzd4A9evlKJb9oTL+OaLUSs=
github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY=
@@ -379,6 +383,7 @@ github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
github.com/zenazn/goji v0.9.0/go.mod h1:7S9M489iMyHBNxwZnk9/EHS098H4/F6TATF2mIxtB1Q=
go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
@@ -411,8 +416,9 @@ golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPh
golang.org/x/crypto v0.0.0-20201203163018-be400aefbc4c/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I=
golang.org/x/crypto v0.0.0-20210616213533-5ff15b29337e/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.0.0-20210817164053-32db794688a5 h1:HWj/xjIHfjYU5nVXpTM0s39J9CbLn7Cc5a7IC5rwsMQ=
golang.org/x/crypto v0.0.0-20210817164053-32db794688a5/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519 h1:7I4JAnoQBe7ZtJcBaYHi5UtiO8tQHbUSXxL+pnGRANg=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
@@ -443,6 +449,7 @@ golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzB
golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -476,8 +483,8 @@ golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwY
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
golang.org/x/net v0.0.0-20210525063256-abc453219eb5/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20220425223048-2871e0cb64e4 h1:HVyaeDAYux4pnY+D/SiwmLOR36ewZ4iGQIIrtnuCjFA=
golang.org/x/net v0.0.0-20220425223048-2871e0cb64e4/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b h1:PxfKdU9lEEDYjdIzOtC4qFWgkU2rGHdKlKowJSMN9h0=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
@@ -495,6 +502,7 @@ golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJ
golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@@ -540,10 +548,12 @@ golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220422013727-9388b58f7150 h1:xHms4gcpe1YE7A3yIllJXP16CMAGuqwO2lX1mTyyRRc=
golang.org/x/sys v0.0.0-20220422013727-9388b58f7150/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f h1:v4INt8xihDGvnrfjMDVXGxw9wrfxYyCjk0KbXjhR55s=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
@@ -552,8 +562,9 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.3.8 h1:nAL+RVCQ9uMn3vJZbV+MRnydTJFPf8qqY42YiA6MrqY=
golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
@@ -604,6 +615,7 @@ golang.org/x/tools v0.0.0-20200729194436-6467de6f59a7/go.mod h1:njjCfa9FT2d7l9Bc
golang.org/x/tools v0.0.0-20200804011535-6c149bb5ef0d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
golang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/xerrors v0.0.0-20190410155217-1f06c39b4373/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20190513163551-3ee3066db522/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=

View File

@@ -34,6 +34,11 @@ func newWebapiCmd() *cli.Command {
Aliases: []string{"c"},
Usage: "specify configuration file(.json,.yaml,.toml)",
},
&cli.StringFlag{
Name: "key",
Aliases: []string{"k"},
Usage: "specify the secret key for configuration file field encryption",
},
},
Action: func(c *cli.Context) error {
printEnv()
@@ -43,6 +48,9 @@ func newWebapiCmd() *cli.Command {
opts = append(opts, webapi.SetConfigFile(c.String("conf")))
}
opts = append(opts, webapi.SetVersion(version.VERSION))
if c.String("key") != "" {
opts = append(opts, webapi.SetKey(c.String("key")))
}
webapi.Run(opts...)
return nil
@@ -60,6 +68,11 @@ func newServerCmd() *cli.Command {
Aliases: []string{"c"},
Usage: "specify configuration file(.json,.yaml,.toml)",
},
&cli.StringFlag{
Name: "key",
Aliases: []string{"k"},
Usage: "specify the secret key for configuration file field encryption",
},
},
Action: func(c *cli.Context) error {
printEnv()
@@ -69,6 +82,9 @@ func newServerCmd() *cli.Command {
opts = append(opts, server.SetConfigFile(c.String("conf")))
}
opts = append(opts, server.SetVersion(version.VERSION))
if c.String("key") != "" {
opts = append(opts, server.SetKey(c.String("key")))
}
server.Run(opts...)
return nil

View File

@@ -3,9 +3,9 @@ package models
import (
"bytes"
"fmt"
"html/template"
"strconv"
"strings"
"text/template"
"github.com/didi/nightingale/v5/src/pkg/tplx"
)
@@ -63,10 +63,11 @@ type AggrRule struct {
Value string
}
func (e *AlertCurEvent) ParseRuleNote() error {
e.RuleNote = strings.TrimSpace(e.RuleNote)
func (e *AlertCurEvent) ParseRule(field string) error {
f := e.GetField(field)
f = strings.TrimSpace(f)
if e.RuleNote == "" {
if f == "" {
return nil
}
@@ -75,8 +76,8 @@ func (e *AlertCurEvent) ParseRuleNote() error {
"{{$value := .TriggerValue}}",
}
text := strings.Join(append(defs, e.RuleNote), "")
t, err := template.New(fmt.Sprint(e.RuleId)).Funcs(tplx.TemplateFuncMap).Parse(text)
text := strings.Join(append(defs, f), "")
t, err := template.New(fmt.Sprint(e.RuleId)).Funcs(template.FuncMap(tplx.TemplateFuncMap)).Parse(text)
if err != nil {
return err
}
@@ -87,7 +88,13 @@ func (e *AlertCurEvent) ParseRuleNote() error {
return err
}
e.RuleNote = body.String()
if field == "rule_name" {
e.RuleName = body.String()
}
if field == "rule_note" {
e.RuleNote = body.String()
}
return nil
}
@@ -133,6 +140,8 @@ func (e *AlertCurEvent) GetField(field string) string {
return fmt.Sprint(e.RuleId)
case "rule_name":
return e.RuleName
case "rule_note":
return e.RuleNote
case "severity":
return fmt.Sprint(e.Severity)
case "runbook_url":
@@ -411,9 +420,9 @@ func AlertCurEventGetByIds(ids []int64) ([]*AlertCurEvent, error) {
return lst, err
}
func AlertCurEventGetByRule(ruleId int64) ([]*AlertCurEvent, error) {
func AlertCurEventGetByRuleIdAndCluster(ruleId int64, cluster string) ([]*AlertCurEvent, error) {
var lst []*AlertCurEvent
err := DB().Where("rule_id=?", ruleId).Find(&lst).Error
err := DB().Where("rule_id=? and cluster=?", ruleId, cluster).Find(&lst).Error
return lst, err
}

View File

@@ -136,7 +136,9 @@ func (m *AlertMute) Add() error {
if err := m.Verify(); err != nil {
return err
}
m.CreateAt = time.Now().Unix()
now := time.Now().Unix()
m.CreateAt = now
m.UpdateAt = now
return Insert(m)
}
@@ -174,7 +176,7 @@ func AlertMuteStatistics(cluster string) (*Statistics, error) {
return nil, err
}
session := DB().Model(&AlertMute{}).Select("count(*) as total", "max(create_at) as last_updated")
session := DB().Model(&AlertMute{}).Select("count(*) as total", "max(update_at) as last_updated")
if cluster != "" {
session = session.Where("(cluster like ? or cluster = ?)", "%"+cluster+"%", ClusterAll)
}
@@ -201,6 +203,11 @@ func AlertMuteGetsByCluster(cluster string) ([]*AlertMute, error) {
if err != nil {
return nil, err
}
if cluster == "" {
return lst, nil
}
for _, m := range lst {
if MatchCluster(m.Cluster, cluster) {
mlst = append(mlst, m)

View File

@@ -14,45 +14,50 @@ import (
)
type AlertRule struct {
Id int64 `json:"id" gorm:"primaryKey"`
GroupId int64 `json:"group_id"` // busi group id
Cate string `json:"cate"` // alert rule cate (prometheus|elasticsearch)
Cluster string `json:"cluster"` // take effect by clusters, seperated by space
Name string `json:"name"` // rule name
Note string `json:"note"` // will sent in notify
Prod string `json:"prod"` // product empty means n9e
Algorithm string `json:"algorithm"` // algorithm (''|holtwinters), empty means threshold
AlgoParams string `json:"-" gorm:"algo_params"` // params algorithm need
AlgoParamsJson interface{} `json:"algo_params" gorm:"-"` //
Delay int `json:"delay"` // Time (in seconds) to delay evaluation
Severity int `json:"severity"` // 1: Emergency 2: Warning 3: Notice
Disabled int `json:"disabled"` // 0: enabled, 1: disabled
PromForDuration int `json:"prom_for_duration"` // prometheus for, unit:s
PromQl string `json:"prom_ql"` // just one ql
PromEvalInterval int `json:"prom_eval_interval"` // unit:s
EnableStime string `json:"enable_stime"` // e.g. 00:00
EnableEtime string `json:"enable_etime"` // e.g. 23:59
EnableDaysOfWeek string `json:"-"` // split by space: 0 1 2 3 4 5 6
EnableDaysOfWeekJSON []string `json:"enable_days_of_week" gorm:"-"` // for fe
EnableInBG int `json:"enable_in_bg"` // 0: global 1: enable one busi-group
NotifyRecovered int `json:"notify_recovered"` // whether notify when recovery
NotifyChannels string `json:"-"` // split by space: sms voice email dingtalk wecom
NotifyChannelsJSON []string `json:"notify_channels" gorm:"-"` // for fe
NotifyGroups string `json:"-"` // split by space: 233 43
NotifyGroupsObj []UserGroup `json:"notify_groups_obj" gorm:"-"` // for fe
NotifyGroupsJSON []string `json:"notify_groups" gorm:"-"` // for fe
NotifyRepeatStep int `json:"notify_repeat_step"` // notify repeat interval, unit: min
NotifyMaxNumber int `json:"notify_max_number"` // notify: max number
RecoverDuration int64 `json:"recover_duration"` // unit: s
Callbacks string `json:"-"` // split by space: http://a.com/api/x http://a.com/api/y'
CallbacksJSON []string `json:"callbacks" gorm:"-"` // for fe
RunbookUrl string `json:"runbook_url"` // sop url
AppendTags string `json:"-"` // split by space: service=n9e mod=api
AppendTagsJSON []string `json:"append_tags" gorm:"-"` // for fe
CreateAt int64 `json:"create_at"`
CreateBy string `json:"create_by"`
UpdateAt int64 `json:"update_at"`
UpdateBy string `json:"update_by"`
Id int64 `json:"id" gorm:"primaryKey"`
GroupId int64 `json:"group_id"` // busi group id
Cate string `json:"cate"` // alert rule cate (prometheus|elasticsearch)
Cluster string `json:"cluster"` // take effect by clusters, seperated by space
Name string `json:"name"` // rule name
Note string `json:"note"` // will sent in notify
Prod string `json:"prod"` // product empty means n9e
Algorithm string `json:"algorithm"` // algorithm (''|holtwinters), empty means threshold
AlgoParams string `json:"-" gorm:"algo_params"` // params algorithm need
AlgoParamsJson interface{} `json:"algo_params" gorm:"-"` //
Delay int `json:"delay"` // Time (in seconds) to delay evaluation
Severity int `json:"severity"` // 1: Emergency 2: Warning 3: Notice
Disabled int `json:"disabled"` // 0: enabled, 1: disabled
PromForDuration int `json:"prom_for_duration"` // prometheus for, unit:s
PromQl string `json:"prom_ql"` // just one ql
PromEvalInterval int `json:"prom_eval_interval"` // unit:s
EnableStime string `json:"-"` // split by space: "00:00 10:00 12:00"
EnableStimeJSON string `json:"enable_stime" gorm:"-"` // for fe
EnableStimesJSON []string `json:"enable_stimes" gorm:"-"` // for fe
EnableEtime string `json:"-"` // split by space: "00:00 10:00 12:00"
EnableEtimeJSON string `json:"enable_etime" gorm:"-"` // for fe
EnableEtimesJSON []string `json:"enable_etimes" gorm:"-"` // for fe
EnableDaysOfWeek string `json:"-"` // eg: "0 1 2 3 4 5 6 ; 0 1 2"
EnableDaysOfWeekJSON []string `json:"enable_days_of_week" gorm:"-"` // for fe
EnableDaysOfWeeksJSON [][]string `json:"enable_days_of_weeks" gorm:"-"` // for fe
EnableInBG int `json:"enable_in_bg"` // 0: global 1: enable one busi-group
NotifyRecovered int `json:"notify_recovered"` // whether notify when recovery
NotifyChannels string `json:"-"` // split by space: sms voice email dingtalk wecom
NotifyChannelsJSON []string `json:"notify_channels" gorm:"-"` // for fe
NotifyGroups string `json:"-"` // split by space: 233 43
NotifyGroupsObj []UserGroup `json:"notify_groups_obj" gorm:"-"` // for fe
NotifyGroupsJSON []string `json:"notify_groups" gorm:"-"` // for fe
NotifyRepeatStep int `json:"notify_repeat_step"` // notify repeat interval, unit: min
NotifyMaxNumber int `json:"notify_max_number"` // notify: max number
RecoverDuration int64 `json:"recover_duration"` // unit: s
Callbacks string `json:"-"` // split by space: http://a.com/api/x http://a.com/api/y'
CallbacksJSON []string `json:"callbacks" gorm:"-"` // for fe
RunbookUrl string `json:"runbook_url"` // sop url
AppendTags string `json:"-"` // split by space: service=n9e mod=api
AppendTagsJSON []string `json:"append_tags" gorm:"-"` // for fe
CreateAt int64 `json:"create_at"`
CreateBy string `json:"create_by"`
UpdateAt int64 `json:"update_at"`
UpdateBy string `json:"update_by"`
}
func (ar *AlertRule) TableName() string {
@@ -224,7 +229,29 @@ func (ar *AlertRule) FillNotifyGroups(cache map[int64]*UserGroup) error {
}
func (ar *AlertRule) FE2DB() error {
ar.EnableDaysOfWeek = strings.Join(ar.EnableDaysOfWeekJSON, " ")
if len(ar.EnableStimesJSON) > 0 {
ar.EnableStime = strings.Join(ar.EnableStimesJSON, " ")
ar.EnableEtime = strings.Join(ar.EnableEtimesJSON, " ")
} else {
ar.EnableStime = ar.EnableStimeJSON
ar.EnableEtime = ar.EnableEtimeJSON
}
if len(ar.EnableDaysOfWeeksJSON) > 0 {
for i := 0; i < len(ar.EnableDaysOfWeeksJSON); i++ {
if len(ar.EnableDaysOfWeeksJSON) == 1 {
ar.EnableDaysOfWeek = strings.Join(ar.EnableDaysOfWeeksJSON[i], " ")
} else {
if i == len(ar.EnableDaysOfWeeksJSON)-1 {
ar.EnableDaysOfWeek += strings.Join(ar.EnableDaysOfWeeksJSON[i], " ")
} else {
ar.EnableDaysOfWeek += strings.Join(ar.EnableDaysOfWeeksJSON[i], " ") + ";"
}
}
}
} else {
ar.EnableDaysOfWeek = strings.Join(ar.EnableDaysOfWeekJSON, " ")
}
ar.NotifyChannels = strings.Join(ar.NotifyChannelsJSON, " ")
ar.NotifyGroups = strings.Join(ar.NotifyGroupsJSON, " ")
ar.Callbacks = strings.Join(ar.CallbacksJSON, " ")
@@ -239,7 +266,21 @@ func (ar *AlertRule) FE2DB() error {
}
func (ar *AlertRule) DB2FE() {
ar.EnableDaysOfWeekJSON = strings.Fields(ar.EnableDaysOfWeek)
ar.EnableStimesJSON = strings.Fields(ar.EnableStime)
ar.EnableEtimesJSON = strings.Fields(ar.EnableEtime)
if len(ar.EnableEtimesJSON) > 0 {
ar.EnableStimeJSON = ar.EnableStimesJSON[0]
ar.EnableEtimeJSON = ar.EnableEtimesJSON[0]
}
cache := strings.Split(ar.EnableDaysOfWeek, ";")
for i := 0; i < len(cache); i++ {
ar.EnableDaysOfWeeksJSON = append(ar.EnableDaysOfWeeksJSON, strings.Fields(cache[i]))
}
if len(ar.EnableDaysOfWeeksJSON) > 0 {
ar.EnableDaysOfWeekJSON = ar.EnableDaysOfWeeksJSON[0]
}
ar.NotifyChannelsJSON = strings.Fields(ar.NotifyChannels)
ar.NotifyGroupsJSON = strings.Fields(ar.NotifyGroups)
ar.CallbacksJSON = strings.Fields(ar.Callbacks)
@@ -425,3 +466,38 @@ func AlertRuleStatistics(cluster string) (*Statistics, error) {
return stats[0], nil
}
func (ar *AlertRule) IsPrometheusRule() bool {
return ar.Algorithm == "" && (ar.Cate == "" || strings.ToLower(ar.Cate) == "prometheus")
}
func (ar *AlertRule) GenerateNewEvent() *AlertCurEvent {
event := &AlertCurEvent{}
ar.UpdateEvent(event)
return event
}
func (ar *AlertRule) UpdateEvent(event *AlertCurEvent) {
if event == nil {
return
}
event.GroupId = ar.GroupId
event.Cate = ar.Cate
event.RuleId = ar.Id
event.RuleName = ar.Name
event.RuleNote = ar.Note
event.RuleProd = ar.Prod
event.RuleAlgo = ar.Algorithm
event.Severity = ar.Severity
event.PromForDuration = ar.PromForDuration
event.PromQl = ar.PromQl
event.PromEvalInterval = ar.PromEvalInterval
event.Callbacks = ar.Callbacks
event.CallbacksJSON = ar.CallbacksJSON
event.RunbookUrl = ar.RunbookUrl
event.NotifyRecovered = ar.NotifyRecovered
event.NotifyChannels = ar.NotifyChannels
event.NotifyChannelsJSON = ar.NotifyChannelsJSON
event.NotifyGroups = ar.NotifyGroups
event.NotifyGroupsJSON = ar.NotifyGroupsJSON
}

View File

@@ -238,6 +238,11 @@ func AlertSubscribeGetsByCluster(cluster string) ([]*AlertSubscribe, error) {
if err != nil {
return nil, err
}
if cluster == "" {
return lst, nil
}
for _, s := range lst {
if MatchCluster(s.Cluster, cluster) {
slst = append(slst, s)

View File

@@ -1,6 +1,9 @@
package models
import "time"
import (
"fmt"
"time"
)
type AlertingEngines struct {
Id int64 `json:"id" gorm:"primaryKey"`
@@ -15,23 +18,62 @@ func (e *AlertingEngines) TableName() string {
// UpdateCluster 页面上用户会给各个n9e-server分配要关联的目标集群是什么
func (e *AlertingEngines) UpdateCluster(c string) error {
count, err := Count(DB().Model(&AlertingEngines{}).Where("id<>? and instance=? and cluster=?", e.Id, e.Instance, c))
if err != nil {
return err
}
if count > 0 {
return fmt.Errorf("instance %s and cluster %s already exists", e.Instance, c)
}
e.Cluster = c
return DB().Model(e).Select("cluster").Updates(e).Error
}
func AlertingEngineAdd(instance, cluster string) error {
count, err := Count(DB().Model(&AlertingEngines{}).Where("instance=? and cluster=?", instance, cluster))
if err != nil {
return err
}
if count > 0 {
return fmt.Errorf("instance %s and cluster %s already exists", instance, cluster)
}
err = DB().Create(&AlertingEngines{
Instance: instance,
Cluster: cluster,
Clock: time.Now().Unix(),
}).Error
return err
}
func AlertingEngineDel(ids []int64) error {
if len(ids) == 0 {
return nil
}
return DB().Where("id in ?", ids).Delete(new(AlertingEngines)).Error
}
// AlertingEngineGetCluster 根据实例名获取对应的集群名字
func AlertingEngineGetCluster(instance string) (string, error) {
func AlertingEngineGetClusters(instance string) ([]string, error) {
var objs []AlertingEngines
err := DB().Where("instance=?", instance).Find(&objs).Error
if err != nil {
return "", err
return []string{}, err
}
if len(objs) == 0 {
return "", nil
return []string{}, nil
}
var clusters []string
for i := 0; i < len(objs); i++ {
clusters = append(clusters, objs[i].Cluster)
}
return objs[0].Cluster, nil
return clusters, nil
}
// AlertingEngineGets 拉取列表数据,用户要在页面上看到所有 n9e-server 实例列表,然后为其分配 cluster
@@ -72,9 +114,9 @@ func AlertingEngineGetsInstances(where string, args ...interface{}) ([]string, e
return arr, err
}
func AlertingEngineHeartbeat(instance, cluster string) error {
func AlertingEngineHeartbeatWithCluster(instance, cluster string) error {
var total int64
err := DB().Model(new(AlertingEngines)).Where("instance=?", instance).Count(&total).Error
err := DB().Model(new(AlertingEngines)).Where("instance=? and cluster=?", instance, cluster).Count(&total).Error
if err != nil {
return err
}
@@ -88,9 +130,15 @@ func AlertingEngineHeartbeat(instance, cluster string) error {
}).Error
} else {
// updates
fields := map[string]interface{}{"clock": time.Now().Unix(), "cluster": cluster}
err = DB().Model(new(AlertingEngines)).Where("instance=?", instance).Updates(fields).Error
fields := map[string]interface{}{"clock": time.Now().Unix()}
err = DB().Model(new(AlertingEngines)).Where("instance=? and cluster=?", instance, cluster).Updates(fields).Error
}
return err
}
func AlertingEngineHeartbeat(instance string) error {
fields := map[string]interface{}{"clock": time.Now().Unix()}
err := DB().Model(new(AlertingEngines)).Where("instance=?", instance).Updates(fields).Error
return err
}

View File

@@ -44,7 +44,7 @@ func (b *Board) CanRenameIdent(ident string) (bool, error) {
return true, nil
}
cnt, err := Count(DB().Model(b).Where("ident=? and id <> ?", b.Ident, b.Id))
cnt, err := Count(DB().Model(b).Where("ident=? and id <> ?", ident, b.Id))
if err != nil {
return false, err
}

View File

@@ -512,6 +512,23 @@ func (u *User) UserGroups(limit int, query string) ([]UserGroup, error) {
var lst []UserGroup
if u.IsAdmin() {
err := session.Where("name like ?", "%"+query+"%").Find(&lst).Error
if err != nil {
return lst, err
}
if len(lst) == 0 && len(query) > 0 {
// 隐藏功能一般人不告诉哈哈。query可能是给的用户名所以上面的sql没有查到当做user来查一下试试
user, err := UserGetByUsername(query)
if user == nil {
return lst, err
}
var ids []int64
ids, err = MyGroupIds(user.Id)
if err != nil || len(ids) == 0 {
return lst, err
}
lst, err = UserGroupGetByIds(ids)
}
return lst, err
}

150
src/pkg/cas/cas.go Normal file
View File

@@ -0,0 +1,150 @@
package cas
import (
"bytes"
"context"
"net/url"
"strings"
"time"
"github.com/didi/nightingale/v5/src/storage"
"github.com/google/uuid"
"github.com/toolkits/pkg/cas"
"github.com/toolkits/pkg/logger"
)
type Config struct {
Enable bool
SsoAddr string
RedirectURL string
DisplayName string
CoverAttributes bool
Attributes struct {
Nickname string
Phone string
Email string
}
DefaultRoles []string
}
type ssoClient struct {
config Config
ssoAddr string
callbackAddr string
displayName string
attributes struct {
nickname string
phone string
email string
}
}
var (
cli ssoClient
)
func Init(cf Config) {
if !cf.Enable {
return
}
cli = ssoClient{}
cli.config = cf
cli.ssoAddr = cf.SsoAddr
cli.callbackAddr = cf.RedirectURL
cli.displayName = cf.DisplayName
cli.attributes.nickname = cf.Attributes.Nickname
cli.attributes.phone = cf.Attributes.Phone
cli.attributes.email = cf.Attributes.Email
}
func GetDisplayName() string {
return cli.displayName
}
// Authorize return the cas authorize location and state
func Authorize(redirect string) (string, string, error) {
state := uuid.New().String()
ctx := context.Background()
err := storage.Redis.Set(ctx, wrapStateKey(state), redirect, time.Duration(300*time.Second)).Err()
if err != nil {
return "", "", err
}
return cli.genRedirectURL(state), state, nil
}
func fetchRedirect(ctx context.Context, state string) (string, error) {
return storage.Redis.Get(ctx, wrapStateKey(state)).Result()
}
func deleteRedirect(ctx context.Context, state string) error {
return storage.Redis.Del(ctx, wrapStateKey(state)).Err()
}
func wrapStateKey(key string) string {
return "n9e_cas_" + key
}
func (cli *ssoClient) genRedirectURL(state string) string {
var buf bytes.Buffer
buf.WriteString(cli.ssoAddr + "login")
v := url.Values{
"service": {cli.callbackAddr},
}
if strings.Contains(cli.ssoAddr, "?") {
buf.WriteByte('&')
} else {
buf.WriteByte('?')
}
buf.WriteString(v.Encode())
return buf.String()
}
type CallbackOutput struct {
Redirect string `json:"redirect"`
Msg string `json:"msg"`
AccessToken string `json:"accessToken"`
Username string `json:"username"`
Nickname string `json:"nickname"`
Phone string `yaml:"phone"`
Email string `yaml:"email"`
}
func ValidateServiceTicket(ctx context.Context, ticket, state string) (ret *CallbackOutput, err error) {
casUrl, err := url.Parse(cli.config.SsoAddr)
if err != nil {
logger.Error(err)
return
}
serviceUrl, err := url.Parse(cli.callbackAddr)
if err != nil {
logger.Error(err)
return
}
resOptions := &cas.RestOptions{
CasURL: casUrl,
ServiceURL: serviceUrl,
}
resCli := cas.NewRestClient(resOptions)
authRet, err := resCli.ValidateServiceTicket(cas.ServiceTicket(ticket))
if err != nil {
logger.Errorf("Ticket Validating Failed: %s", err)
return
}
ret = &CallbackOutput{}
ret.Username = authRet.User
ret.Nickname = authRet.Attributes.Get(cli.attributes.nickname)
logger.Debugf("CAS Authentication Response's Attributes--[Nickname]: %s", ret.Nickname)
ret.Email = authRet.Attributes.Get(cli.attributes.email)
logger.Debugf("CAS Authentication Response's Attributes--[Email]: %s", ret.Email)
ret.Phone = authRet.Attributes.Get(cli.attributes.phone)
logger.Debugf("CAS Authentication Response's Attributes--[Phone]: %s", ret.Phone)
ret.Redirect, err = fetchRedirect(ctx, state)
if err != nil {
logger.Debugf("get redirect err:%s state:%s", state, err)
}
err = deleteRedirect(ctx, state)
if err != nil {
logger.Debugf("delete redirect err:%s state:%s", state, err)
}
return
}

225
src/pkg/oauth2x/oauth2x.go Normal file
View File

@@ -0,0 +1,225 @@
package oauth2x
import (
"bytes"
"context"
"fmt"
"io/ioutil"
"net/http"
"time"
"github.com/didi/nightingale/v5/src/storage"
"github.com/toolkits/pkg/logger"
"github.com/google/uuid"
jsoniter "github.com/json-iterator/go"
"golang.org/x/oauth2"
)
type ssoClient struct {
config oauth2.Config
ssoAddr string
userInfoAddr string
TranTokenMethod string
callbackAddr string
displayName string
coverAttributes bool
attributes struct {
username string
nickname string
phone string
email string
}
userinfoIsArray bool
userinfoPrefix string
}
type Config struct {
Enable bool
DisplayName string
RedirectURL string
SsoAddr string
TokenAddr string
UserInfoAddr string
TranTokenMethod string
ClientId string
ClientSecret string
CoverAttributes bool
Attributes struct {
Username string
Nickname string
Phone string
Email string
}
DefaultRoles []string
UserinfoIsArray bool
UserinfoPrefix string
Scopes []string
}
var (
cli ssoClient
)
func Init(cf Config) {
if !cf.Enable {
return
}
cli.ssoAddr = cf.SsoAddr
cli.userInfoAddr = cf.UserInfoAddr
cli.TranTokenMethod = cf.TranTokenMethod
cli.callbackAddr = cf.RedirectURL
cli.displayName = cf.DisplayName
cli.coverAttributes = cf.CoverAttributes
cli.attributes.username = cf.Attributes.Username
cli.attributes.nickname = cf.Attributes.Nickname
cli.attributes.phone = cf.Attributes.Phone
cli.attributes.email = cf.Attributes.Email
cli.userinfoIsArray = cf.UserinfoIsArray
cli.userinfoPrefix = cf.UserinfoPrefix
cli.config = oauth2.Config{
ClientID: cf.ClientId,
ClientSecret: cf.ClientSecret,
Endpoint: oauth2.Endpoint{
AuthURL: cf.SsoAddr,
TokenURL: cf.TokenAddr,
},
RedirectURL: cf.RedirectURL,
Scopes: cf.Scopes,
}
}
func GetDisplayName() string {
return cli.displayName
}
func wrapStateKey(key string) string {
return "n9e_oauth_" + key
}
// Authorize return the sso authorize location with state
func Authorize(redirect string) (string, error) {
state := uuid.New().String()
ctx := context.Background()
err := storage.Redis.Set(ctx, wrapStateKey(state), redirect, time.Duration(300*time.Second)).Err()
if err != nil {
return "", err
}
return cli.config.AuthCodeURL(state), nil
}
func fetchRedirect(ctx context.Context, state string) (string, error) {
return storage.Redis.Get(ctx, wrapStateKey(state)).Result()
}
func deleteRedirect(ctx context.Context, state string) error {
return storage.Redis.Del(ctx, wrapStateKey(state)).Err()
}
// Callback 用 code 兑换 accessToken 以及 用户信息
func Callback(ctx context.Context, code, state string) (*CallbackOutput, error) {
ret, err := exchangeUser(code)
if err != nil {
return nil, fmt.Errorf("ilegal user:%v", err)
}
ret.Redirect, err = fetchRedirect(ctx, state)
if err != nil {
logger.Errorf("get redirect err:%v code:%s state:%s", code, state, err)
}
err = deleteRedirect(ctx, state)
if err != nil {
logger.Errorf("delete redirect err:%v code:%s state:%s", code, state, err)
}
return ret, nil
}
type CallbackOutput struct {
Redirect string `json:"redirect"`
Msg string `json:"msg"`
AccessToken string `json:"accessToken"`
Username string `json:"username"`
Nickname string `json:"nickname"`
Phone string `yaml:"phone"`
Email string `yaml:"email"`
}
func exchangeUser(code string) (*CallbackOutput, error) {
ctx := context.Background()
oauth2Token, err := cli.config.Exchange(ctx, code)
if err != nil {
return nil, fmt.Errorf("failed to exchange token: %s", err)
}
userInfo, err := getUserInfo(cli.userInfoAddr, oauth2Token.AccessToken, cli.TranTokenMethod)
if err != nil {
logger.Errorf("failed to get user info: %s", err)
return nil, fmt.Errorf("failed to get user info: %s", err)
}
return &CallbackOutput{
AccessToken: oauth2Token.AccessToken,
Username: getUserinfoField(userInfo, cli.userinfoIsArray, cli.userinfoPrefix, cli.attributes.username),
Nickname: getUserinfoField(userInfo, cli.userinfoIsArray, cli.userinfoPrefix, cli.attributes.nickname),
Phone: getUserinfoField(userInfo, cli.userinfoIsArray, cli.userinfoPrefix, cli.attributes.phone),
Email: getUserinfoField(userInfo, cli.userinfoIsArray, cli.userinfoPrefix, cli.attributes.email),
}, nil
}
func getUserInfo(userInfoAddr, accessToken string, TranTokenMethod string) ([]byte, error) {
var req *http.Request
if TranTokenMethod == "formdata" {
body := bytes.NewBuffer([]byte("access_token=" + accessToken))
r, err := http.NewRequest("POST", userInfoAddr, body)
if err != nil {
return nil, err
}
r.Header.Add("Content-Type", "application/x-www-form-urlencoded")
req = r
} else if TranTokenMethod == "querystring" {
r, err := http.NewRequest("GET", userInfoAddr+"?access_token="+accessToken, nil)
if err != nil {
return nil, err
}
r.Header.Add("Authorization", "Bearer "+accessToken)
req = r
} else {
r, err := http.NewRequest("GET", userInfoAddr, nil)
if err != nil {
return nil, err
}
r.Header.Add("Authorization", "Bearer "+accessToken)
req = r
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, err
}
body, err := ioutil.ReadAll(resp.Body)
resp.Body.Close()
if err != nil {
return nil, nil
}
return body, err
}
func getUserinfoField(input []byte, isArray bool, prefix, field string) string {
if prefix == "" {
if isArray {
return jsoniter.Get(input, 0).Get(field).ToString()
} else {
return jsoniter.Get(input, field).ToString()
}
} else {
if isArray {
return jsoniter.Get(input, prefix, 0).Get(field).ToString()
} else {
return jsoniter.Get(input, prefix).Get(field).ToString()
}
}
}

View File

@@ -20,6 +20,7 @@ type ssoClient struct {
ssoAddr string
callbackAddr string
coverAttributes bool
displayName string
attributes struct {
username string
nickname string
@@ -30,6 +31,7 @@ type ssoClient struct {
type Config struct {
Enable bool
DisplayName string
RedirectURL string
SsoAddr string
ClientId string
@@ -59,6 +61,7 @@ func Init(cf Config) {
cli.attributes.nickname = cf.Attributes.Nickname
cli.attributes.phone = cf.Attributes.Phone
cli.attributes.email = cf.Attributes.Email
cli.displayName = cf.DisplayName
provider, err := oidc.NewProvider(context.Background(), cf.SsoAddr)
if err != nil {
log.Fatal(err)
@@ -77,6 +80,10 @@ func Init(cf Config) {
}
}
func GetDisplayName() string {
return cli.displayName
}
func wrapStateKey(key string) string {
return "n9e_oidc_" + key
}

100
src/pkg/secu/aes.go Normal file
View File

@@ -0,0 +1,100 @@
package secu
import (
"bytes"
"crypto/aes"
"crypto/cipher"
"encoding/base64"
"strings"
)
// BASE64StdEncode base64编码
func BASE64StdEncode(src []byte) string {
return base64.StdEncoding.EncodeToString(src)
}
// BASE64StdDecode base64解码
func BASE64StdDecode(src string) ([]byte, error) {
dst, err := base64.StdEncoding.DecodeString(src)
if err != nil {
return nil, err
}
return dst, nil
}
func PKCS7Padding(ciphertext []byte, blockSize int) []byte {
padding := blockSize - len(ciphertext)%blockSize
padtext := bytes.Repeat([]byte{byte(padding)}, padding)
return append(ciphertext, padtext...)
}
func PKCS7UnPadding(originData []byte) []byte {
length := len(originData)
unpadding := int(originData[length-1])
return originData[:(length - unpadding)]
}
//AES加密
func AesEncrypt(origData, key []byte) ([]byte, error) {
block, err := aes.NewCipher(key)
if err != nil {
return nil, err
}
//加密块填充
blockSize := block.BlockSize()
padOrigData := PKCS7Padding(origData, blockSize)
//初始化CBC加密
blockMode := cipher.NewCBCEncrypter(block, key[:blockSize])
crypted := make([]byte, len(padOrigData))
//加密
blockMode.CryptBlocks(crypted, padOrigData)
return crypted, nil
}
//AES解密
func AesDecrypt(crypted, key []byte) ([]byte, error) {
block, err := aes.NewCipher(key)
if err != nil {
return nil, err
}
blockSize := block.BlockSize()
blockMode := cipher.NewCBCDecrypter(block, key[:blockSize])
origData := make([]byte, len(crypted))
//解密
blockMode.CryptBlocks(origData, crypted)
//去除填充
origData = PKCS7UnPadding(origData)
return origData, nil
}
// 针对配置文件属性进行解密处理
func DealWithDecrypt(src string, key string) (string, error) {
//如果是{{cipher}}前缀,则代表是加密过的属性,先解密
if strings.HasPrefix(src, "{{cipher}}") {
data := src[10:]
decodeData, err := BASE64StdDecode(data)
if err != nil {
return src, err
}
//解密
origin, err := AesDecrypt(decodeData, []byte(key))
if err != nil {
return src, err
}
//返回明文
return string(origin), nil
} else {
return src, nil
}
}
// 针对配置文件属性进行加密处理
func DealWithEncrypt(src string, key string) (string, error) {
encrypted, err := AesEncrypt([]byte(src), []byte(key))
if err != nil {
return src, err
}
data := BASE64StdEncode(encrypted)
return "{{cipher}}" + data, nil
}

View File

@@ -4,6 +4,7 @@ import (
"fmt"
"html/template"
"math"
"reflect"
"regexp"
"strconv"
"time"
@@ -33,6 +34,10 @@ func Timestamp(pattern ...string) string {
return time.Now().Format(defp)
}
func Now() time.Time {
return time.Now()
}
func Args(args ...interface{}) map[string]interface{} {
result := make(map[string]interface{})
for i, a := range args {
@@ -95,11 +100,27 @@ func Humanize1024(s string) string {
return fmt.Sprintf("%.4g%s", v, prefix)
}
func ToString(v interface{}) string {
return fmt.Sprint(v)
}
func HumanizeDuration(s string) string {
v, err := strconv.ParseFloat(s, 64)
if err != nil {
return s
}
return HumanizeDurationFloat64(v)
}
func HumanizeDurationInterface(i interface{}) string {
f, err := ToFloat64(i)
if err != nil {
return ToString(i)
}
return HumanizeDurationFloat64(f)
}
func HumanizeDurationFloat64(v float64) string {
if math.IsNaN(v) || math.IsInf(v, 0) {
return fmt.Sprintf("%.4g", v)
}
@@ -155,3 +176,179 @@ func HumanizePercentageH(s string) string {
}
return fmt.Sprintf("%.2f%%", v)
}
// Add returns the sum of a and b.
func Add(a, b interface{}) (interface{}, error) {
av := reflect.ValueOf(a)
bv := reflect.ValueOf(b)
switch av.Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
switch bv.Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
return av.Int() + bv.Int(), nil
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
return av.Int() + int64(bv.Uint()), nil
case reflect.Float32, reflect.Float64:
return float64(av.Int()) + bv.Float(), nil
default:
return nil, fmt.Errorf("add: unknown type for %q (%T)", bv, b)
}
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
switch bv.Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
return int64(av.Uint()) + bv.Int(), nil
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
return av.Uint() + bv.Uint(), nil
case reflect.Float32, reflect.Float64:
return float64(av.Uint()) + bv.Float(), nil
default:
return nil, fmt.Errorf("add: unknown type for %q (%T)", bv, b)
}
case reflect.Float32, reflect.Float64:
switch bv.Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
return av.Float() + float64(bv.Int()), nil
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
return av.Float() + float64(bv.Uint()), nil
case reflect.Float32, reflect.Float64:
return av.Float() + bv.Float(), nil
default:
return nil, fmt.Errorf("add: unknown type for %q (%T)", bv, b)
}
default:
return nil, fmt.Errorf("add: unknown type for %q (%T)", av, a)
}
}
// Subtract returns the difference of b from a.
func Subtract(a, b interface{}) (interface{}, error) {
av := reflect.ValueOf(a)
bv := reflect.ValueOf(b)
switch av.Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
switch bv.Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
return av.Int() - bv.Int(), nil
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
return av.Int() - int64(bv.Uint()), nil
case reflect.Float32, reflect.Float64:
return float64(av.Int()) - bv.Float(), nil
default:
return nil, fmt.Errorf("subtract: unknown type for %q (%T)", bv, b)
}
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
switch bv.Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
return int64(av.Uint()) - bv.Int(), nil
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
return av.Uint() - bv.Uint(), nil
case reflect.Float32, reflect.Float64:
return float64(av.Uint()) - bv.Float(), nil
default:
return nil, fmt.Errorf("subtract: unknown type for %q (%T)", bv, b)
}
case reflect.Float32, reflect.Float64:
switch bv.Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
return av.Float() - float64(bv.Int()), nil
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
return av.Float() - float64(bv.Uint()), nil
case reflect.Float32, reflect.Float64:
return av.Float() - bv.Float(), nil
default:
return nil, fmt.Errorf("subtract: unknown type for %q (%T)", bv, b)
}
default:
return nil, fmt.Errorf("subtract: unknown type for %q (%T)", av, a)
}
}
// Multiply returns the product of a and b.
func Multiply(a, b interface{}) (interface{}, error) {
av := reflect.ValueOf(a)
bv := reflect.ValueOf(b)
switch av.Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
switch bv.Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
return av.Int() * bv.Int(), nil
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
return av.Int() * int64(bv.Uint()), nil
case reflect.Float32, reflect.Float64:
return float64(av.Int()) * bv.Float(), nil
default:
return nil, fmt.Errorf("multiply: unknown type for %q (%T)", bv, b)
}
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
switch bv.Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
return int64(av.Uint()) * bv.Int(), nil
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
return av.Uint() * bv.Uint(), nil
case reflect.Float32, reflect.Float64:
return float64(av.Uint()) * bv.Float(), nil
default:
return nil, fmt.Errorf("multiply: unknown type for %q (%T)", bv, b)
}
case reflect.Float32, reflect.Float64:
switch bv.Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
return av.Float() * float64(bv.Int()), nil
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
return av.Float() * float64(bv.Uint()), nil
case reflect.Float32, reflect.Float64:
return av.Float() * bv.Float(), nil
default:
return nil, fmt.Errorf("multiply: unknown type for %q (%T)", bv, b)
}
default:
return nil, fmt.Errorf("multiply: unknown type for %q (%T)", av, a)
}
}
// Divide returns the division of b from a.
func Divide(a, b interface{}) (interface{}, error) {
av := reflect.ValueOf(a)
bv := reflect.ValueOf(b)
switch av.Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
switch bv.Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
return av.Int() / bv.Int(), nil
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
return av.Int() / int64(bv.Uint()), nil
case reflect.Float32, reflect.Float64:
return float64(av.Int()) / bv.Float(), nil
default:
return nil, fmt.Errorf("divide: unknown type for %q (%T)", bv, b)
}
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
switch bv.Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
return int64(av.Uint()) / bv.Int(), nil
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
return av.Uint() / bv.Uint(), nil
case reflect.Float32, reflect.Float64:
return float64(av.Uint()) / bv.Float(), nil
default:
return nil, fmt.Errorf("divide: unknown type for %q (%T)", bv, b)
}
case reflect.Float32, reflect.Float64:
switch bv.Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
return av.Float() / float64(bv.Int()), nil
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
return av.Float() / float64(bv.Uint()), nil
case reflect.Float32, reflect.Float64:
return av.Float() / bv.Float(), nil
default:
return nil, fmt.Errorf("divide: unknown type for %q (%T)", bv, b)
}
default:
return nil, fmt.Errorf("divide: unknown type for %q (%T)", av, a)
}
}

73
src/pkg/tplx/conv.go Normal file
View File

@@ -0,0 +1,73 @@
package tplx
import (
"fmt"
"strconv"
)
// ToFloat64 convert interface to float64
func ToFloat64(val interface{}) (float64, error) {
switch v := val.(type) {
case string:
if f, err := strconv.ParseFloat(v, 64); err == nil {
return f, nil
}
// try int
if i, err := strconv.ParseInt(v, 0, 64); err == nil {
return float64(i), nil
}
// try bool
b, err := strconv.ParseBool(v)
if err == nil {
if b {
return 1, nil
} else {
return 0, nil
}
}
if v == "Yes" || v == "yes" || v == "YES" || v == "Y" || v == "ON" || v == "on" || v == "On" || v == "ok" || v == "up" {
return 1, nil
}
if v == "No" || v == "no" || v == "NO" || v == "N" || v == "OFF" || v == "off" || v == "Off" || v == "fail" || v == "err" || v == "down" {
return 0, nil
}
return 0, fmt.Errorf("unparseable value %v", v)
case float64:
return v, nil
case uint64:
return float64(v), nil
case uint32:
return float64(v), nil
case uint16:
return float64(v), nil
case uint8:
return float64(v), nil
case uint:
return float64(v), nil
case int64:
return float64(v), nil
case int32:
return float64(v), nil
case int16:
return float64(v), nil
case int8:
return float64(v), nil
case bool:
if v {
return 1, nil
} else {
return 0, nil
}
case int:
return float64(v), nil
case float32:
return float64(v), nil
default:
return strconv.ParseFloat(fmt.Sprint(v), 64)
}
}

View File

@@ -8,20 +8,27 @@ import (
)
var TemplateFuncMap = template.FuncMap{
"escape": url.PathEscape,
"unescaped": Unescaped,
"urlconvert": Urlconvert,
"timeformat": Timeformat,
"timestamp": Timestamp,
"args": Args,
"reReplaceAll": ReReplaceAll,
"match": regexp.MatchString,
"toUpper": strings.ToUpper,
"toLower": strings.ToLower,
"contains": strings.Contains,
"humanize": Humanize,
"humanize1024": Humanize1024,
"humanizeDuration": HumanizeDuration,
"humanizePercentage": HumanizePercentage,
"humanizePercentageH": HumanizePercentageH,
"escape": url.PathEscape,
"unescaped": Unescaped,
"urlconvert": Urlconvert,
"timeformat": Timeformat,
"timestamp": Timestamp,
"args": Args,
"reReplaceAll": ReReplaceAll,
"match": regexp.MatchString,
"toUpper": strings.ToUpper,
"toLower": strings.ToLower,
"contains": strings.Contains,
"humanize": Humanize,
"humanize1024": Humanize1024,
"humanizeDuration": HumanizeDuration,
"humanizeDurationInterface": HumanizeDurationInterface,
"humanizePercentage": HumanizePercentage,
"humanizePercentageH": HumanizePercentageH,
"add": Add,
"sub": Subtract,
"mul": Multiply,
"div": Divide,
"now": Now,
"toString": ToString,
}

View File

@@ -1,7 +1,9 @@
package conv
import (
"fmt"
"math"
"strings"
"github.com/prometheus/common/model"
)
@@ -13,6 +15,12 @@ type Vector struct {
Value float64 `json:"value"`
}
func (v *Vector) ReadableValue() string {
ret := fmt.Sprintf("%.5f", v.Value)
ret = strings.TrimRight(ret, "0")
return strings.TrimRight(ret, ".")
}
func ConvertVectors(value model.Value) (lst []Vector) {
if value == nil {
return

View File

@@ -12,13 +12,17 @@ func AppendLabels(pt *prompb.TimeSeries, target *models.Target) {
return
}
labelKeys := make(map[string]struct{})
labelKeys := make(map[string]int)
for j := 0; j < len(pt.Labels); j++ {
labelKeys[pt.Labels[j].Name] = struct{}{}
labelKeys[pt.Labels[j].Name] = j
}
for key, value := range target.TagsMap {
if _, has := labelKeys[key]; has {
if index, has := labelKeys[key]; has {
// overwrite labels
if config.C.LabelRewrite {
pt.Labels[index].Value = value
}
continue
}
@@ -32,7 +36,7 @@ func AppendLabels(pt *prompb.TimeSeries, target *models.Target) {
if _, has := labelKeys[config.C.BusiGroupLabelKey]; has {
return
}
// 将业务组名称作为tag附加到数据上
if target.GroupId > 0 && len(config.C.BusiGroupLabelKey) > 0 {
bg := memsto.BusiGroupCache.GetByBusiGroupId(target.GroupId)
if bg == nil {

View File

@@ -0,0 +1,52 @@
package sender
import (
"strings"
"time"
"github.com/didi/nightingale/v5/src/pkg/poster"
"github.com/toolkits/pkg/logger"
)
type TelegramMessage struct {
Text string
Tokens []string
}
type telegram struct {
ParseMode string `json:"parse_mode"`
Text string `json:"text"`
}
func SendTelegram(message TelegramMessage) {
for i := 0; i < len(message.Tokens); i++ {
if !strings.Contains(message.Tokens[i], "/") && !strings.HasPrefix(message.Tokens[i], "https://") {
logger.Errorf("telegram_sender: result=fail invalid token=%s", message.Tokens[i])
continue
}
var url string
if strings.HasPrefix(message.Tokens[i], "https://") {
url = message.Tokens[i]
} else {
array := strings.Split(message.Tokens[i], "/")
if len(array) != 2 {
logger.Errorf("telegram_sender: result=fail invalid token=%s", message.Tokens[i])
continue
}
botToken := array[0]
chatId := array[1]
url = "https://api.telegram.org/bot" + botToken + "/sendMessage?chat_id=" + chatId
}
body := telegram{
ParseMode: "markdown",
Text: message.Text,
}
res, code, err := poster.PostJSON(url, time.Second*5, body, 3)
if err != nil {
logger.Errorf("telegram_sender: result=fail url=%s code=%d error=%v response=%s", url, code, err, string(res))
} else {
logger.Infof("telegram_sender: result=succ url=%s code=%d response=%s", url, code, string(res))
}
}
}

View File

@@ -19,6 +19,7 @@ import (
"github.com/didi/nightingale/v5/src/pkg/httpx"
"github.com/didi/nightingale/v5/src/pkg/logx"
"github.com/didi/nightingale/v5/src/pkg/ormx"
"github.com/didi/nightingale/v5/src/pkg/secu"
"github.com/didi/nightingale/v5/src/storage"
)
@@ -27,7 +28,68 @@ var (
once sync.Once
)
func MustLoad(fpaths ...string) {
func DealConfigCrypto(key string) {
decryptDsn, err := secu.DealWithDecrypt(C.DB.DSN, key)
if err != nil {
fmt.Println("failed to decrypt the db dsn", err)
os.Exit(1)
}
C.DB.DSN = decryptDsn
decryptRedisPwd, err := secu.DealWithDecrypt(C.Redis.Password, key)
if err != nil {
fmt.Println("failed to decrypt the redis password", err)
os.Exit(1)
}
C.Redis.Password = decryptRedisPwd
decryptSmtpPwd, err := secu.DealWithDecrypt(C.SMTP.Pass, key)
if err != nil {
fmt.Println("failed to decrypt the smtp password", err)
os.Exit(1)
}
C.SMTP.Pass = decryptSmtpPwd
decryptHookPwd, err := secu.DealWithDecrypt(C.Alerting.Webhook.BasicAuthPass, key)
if err != nil {
fmt.Println("failed to decrypt the alert webhook password", err)
os.Exit(1)
}
C.Alerting.Webhook.BasicAuthPass = decryptHookPwd
decryptIbexPwd, err := secu.DealWithDecrypt(C.Ibex.BasicAuthPass, key)
if err != nil {
fmt.Println("failed to decrypt the ibex password", err)
os.Exit(1)
}
C.Ibex.BasicAuthPass = decryptIbexPwd
if len(C.Readers) == 0 {
C.Reader.ClusterName = C.ClusterName
C.Readers = append(C.Readers, C.Reader)
}
for index, v := range C.Readers {
decryptReaderPwd, err := secu.DealWithDecrypt(v.BasicAuthPass, key)
if err != nil {
fmt.Printf("failed to decrypt the reader password: %s , error: %s", v.BasicAuthPass, err.Error())
os.Exit(1)
}
C.Readers[index].BasicAuthPass = decryptReaderPwd
}
for index, v := range C.Writers {
decryptWriterPwd, err := secu.DealWithDecrypt(v.BasicAuthPass, key)
if err != nil {
fmt.Printf("failed to decrypt the writer password: %s , error: %s", v.BasicAuthPass, err.Error())
os.Exit(1)
}
C.Writers[index].BasicAuthPass = decryptWriterPwd
}
}
func MustLoad(key string, fpaths ...string) {
once.Do(func() {
loaders := []multiconfig.Loader{
&multiconfig.TagLoader{},
@@ -66,6 +128,8 @@ func MustLoad(fpaths ...string) {
}
m.MustLoad(C)
DealConfigCrypto(key)
if C.EngineDelay == 0 {
C.EngineDelay = 120
}
@@ -145,7 +209,7 @@ func MustLoad(fpaths ...string) {
}
if C.WriterOpt.QueueMaxSize <= 0 {
C.WriterOpt.QueueMaxSize = 100000
C.WriterOpt.QueueMaxSize = 10000000
}
if C.WriterOpt.QueuePopSize <= 0 {
@@ -153,10 +217,18 @@ func MustLoad(fpaths ...string) {
}
if C.WriterOpt.QueueCount <= 0 {
C.WriterOpt.QueueCount = 100
C.WriterOpt.QueueCount = 1000
}
for _, write := range C.Writers {
if C.WriterOpt.ShardingKey == "" {
C.WriterOpt.ShardingKey = "ident"
}
for i, write := range C.Writers {
if C.Writers[i].ClusterName == "" {
C.Writers[i].ClusterName = C.ClusterName
}
for _, relabel := range write.WriteRelabels {
regex, ok := relabel.Regex.(string)
if !ok {
@@ -190,11 +262,12 @@ func MustLoad(fpaths ...string) {
type Config struct {
RunMode string
ClusterName string
ClusterName string // 监控对象上报时,指定的集群名称
BusiGroupLabelKey string
EngineDelay int64
DisableUsageReport bool
ReaderFrom string
LabelRewrite bool
ForceUseServerTS bool
Log logx.Config
HTTP httpx.Config
@@ -208,10 +281,12 @@ type Config struct {
WriterOpt WriterGlobalOpt
Writers []WriterOptions
Reader PromOption
Readers []PromOption
Ibex Ibex
}
type WriterOptions struct {
ClusterName string
Url string
BasicAuthUser string
BasicAuthPass string
@@ -236,6 +311,7 @@ type WriterGlobalOpt struct {
QueueCount int
QueueMaxSize int
QueuePopSize int
ShardingKey string
}
type HeartbeatConfig struct {
@@ -255,6 +331,7 @@ type SMTPConfig struct {
}
type Alerting struct {
Timeout int64
TemplatesDir string
NotifyConcurrency int
NotifyBuiltinChannels []string

View File

@@ -1,59 +1,92 @@
package config
import (
"strings"
"sync"
"github.com/didi/nightingale/v5/src/models"
"github.com/didi/nightingale/v5/src/pkg/prom"
)
type PromClient struct {
prom.API
ClusterName string
type PromClientMap struct {
sync.RWMutex
Clients map[string]prom.API
}
var ReaderClient *PromClient = &PromClient{}
var ReaderClients = &PromClientMap{Clients: make(map[string]prom.API)}
func (pc *PromClient) Set(clusterName string, c prom.API) {
func (pc *PromClientMap) Set(clusterName string, c prom.API) {
if c == nil {
return
}
pc.Lock()
defer pc.Unlock()
pc.ClusterName = clusterName
pc.API = c
pc.Clients[clusterName] = c
}
func (pc *PromClient) Get() (string, prom.API) {
func (pc *PromClientMap) GetClusterNames() []string {
pc.RLock()
defer pc.RUnlock()
return pc.ClusterName, pc.API
var clusterNames []string
for k := range pc.Clients {
clusterNames = append(clusterNames, k)
}
return clusterNames
}
func (pc *PromClient) GetClusterName() string {
func (pc *PromClientMap) GetCli(cluster string) prom.API {
pc.RLock()
defer pc.RUnlock()
return pc.ClusterName
c := pc.Clients[cluster]
return c
}
func (pc *PromClient) GetCli() prom.API {
func (pc *PromClientMap) IsNil(cluster string) bool {
pc.RLock()
defer pc.RUnlock()
return pc.API
}
func (pc *PromClient) IsNil() bool {
if pc == nil {
c, exists := pc.Clients[cluster]
if !exists {
return true
}
pc.RLock()
defer pc.RUnlock()
return pc.API == nil
return c == nil
}
func (pc *PromClient) Reset() {
// Hit 根据当前有效的cluster和规则的cluster配置计算有效的cluster列表
func (pc *PromClientMap) Hit(cluster string) []string {
pc.RLock()
defer pc.RUnlock()
clusters := make([]string, 0, len(pc.Clients))
if cluster == models.ClusterAll {
for c := range pc.Clients {
clusters = append(clusters, c)
}
return clusters
}
ruleClusters := strings.Fields(cluster)
for c := range pc.Clients {
for _, rc := range ruleClusters {
if rc == c {
clusters = append(clusters, c)
continue
}
}
}
return clusters
}
func (pc *PromClientMap) Reset() {
pc.Lock()
defer pc.Unlock()
pc.ClusterName = ""
pc.API = nil
pc.Clients = make(map[string]prom.API)
}
func (pc *PromClientMap) Del(cluster string) {
pc.Lock()
defer pc.Unlock()
delete(pc.Clients, cluster)
}

View File

@@ -1,8 +1,13 @@
package config
import "sync"
import (
"sync"
"github.com/didi/nightingale/v5/src/pkg/tls"
)
type PromOption struct {
ClusterName string
Url string
BasicAuthUser string
BasicAuthPass string
@@ -10,6 +15,9 @@ type PromOption struct {
Timeout int64
DialTimeout int64
UseTLS bool
tls.ClientConfig
MaxIdleConnsPerHost int
Headers []string
@@ -64,9 +72,9 @@ func (pos *PromOptionsStruct) Set(clusterName string, po PromOption) {
pos.Unlock()
}
func (pos *PromOptionsStruct) Sets(clusterName string, po PromOption) {
func (pos *PromOptionsStruct) Del(clusterName string) {
pos.Lock()
pos.Data = map[string]PromOption{clusterName: po}
delete(pos.Data, clusterName)
pos.Unlock()
}

View File

@@ -17,7 +17,19 @@ import (
func InitReader() error {
rf := strings.ToLower(strings.TrimSpace(C.ReaderFrom))
if rf == "" || rf == "config" {
return setClientFromPromOption(C.ClusterName, C.Reader)
if len(C.Readers) == 0 {
C.Reader.ClusterName = C.ClusterName
C.Readers = append(C.Readers, C.Reader)
}
for _, reader := range C.Readers {
err := setClientFromPromOption(reader.ClusterName, reader)
if err != nil {
logger.Errorf("failed to setClientFromPromOption: %v", err)
continue
}
}
return nil
}
if rf == "database" {
@@ -38,72 +50,97 @@ func initFromDatabase() error {
}
func loadFromDatabase() {
cluster, err := models.AlertingEngineGetCluster(C.Heartbeat.Endpoint)
clusters, err := models.AlertingEngineGetClusters(C.Heartbeat.Endpoint)
if err != nil {
logger.Errorf("failed to get current cluster, error: %v", err)
return
}
if cluster == "" {
ReaderClient.Reset()
if len(clusters) == 0 {
ReaderClients.Reset()
logger.Warning("no datasource binded to me")
return
}
ckey := "prom." + cluster + ".option"
cval, err := models.ConfigsGet(ckey)
if err != nil {
logger.Errorf("failed to get ckey: %s, error: %v", ckey, err)
return
}
if cval == "" {
ReaderClient.Reset()
return
}
var po PromOption
err = json.Unmarshal([]byte(cval), &po)
if err != nil {
logger.Errorf("failed to unmarshal PromOption: %s", err)
return
}
if ReaderClient.IsNil() {
// first time
if err = setClientFromPromOption(cluster, po); err != nil {
logger.Errorf("failed to setClientFromPromOption: %v", err)
return
newCluster := make(map[string]struct{})
for _, cluster := range clusters {
newCluster[cluster] = struct{}{}
ckey := "prom." + cluster + ".option"
cval, err := models.ConfigsGet(ckey)
if err != nil {
logger.Errorf("failed to get ckey: %s, error: %v", ckey, err)
continue
}
PromOptions.Sets(cluster, po)
return
}
localPo, has := PromOptions.Get(cluster)
if !has || !localPo.Equal(po) {
if err = setClientFromPromOption(cluster, po); err != nil {
logger.Errorf("failed to setClientFromPromOption: %v", err)
return
if cval == "" {
logger.Debugf("ckey: %s is empty", ckey)
continue
}
PromOptions.Sets(cluster, po)
return
var po PromOption
err = json.Unmarshal([]byte(cval), &po)
if err != nil {
logger.Errorf("failed to unmarshal PromOption: %s", err)
continue
}
if ReaderClients.IsNil(cluster) {
// first time
if err = setClientFromPromOption(cluster, po); err != nil {
logger.Errorf("failed to setClientFromPromOption: %v", err)
continue
}
logger.Info("setClientFromPromOption success: ", cluster)
PromOptions.Set(cluster, po)
continue
}
localPo, has := PromOptions.Get(cluster)
if !has || !localPo.Equal(po) {
if err = setClientFromPromOption(cluster, po); err != nil {
logger.Errorf("failed to setClientFromPromOption: %v", err)
continue
}
PromOptions.Set(cluster, po)
}
}
// delete useless cluster
oldClusters := ReaderClients.GetClusterNames()
for _, oldCluster := range oldClusters {
if _, has := newCluster[oldCluster]; !has {
ReaderClients.Del(oldCluster)
PromOptions.Del(oldCluster)
logger.Info("delete cluster: ", oldCluster)
}
}
}
func newClientFromPromOption(po PromOption) (api.Client, error) {
transport := &http.Transport{
// TLSClientConfig: tlsConfig,
Proxy: http.ProxyFromEnvironment,
DialContext: (&net.Dialer{
Timeout: time.Duration(po.DialTimeout) * time.Millisecond,
}).DialContext,
ResponseHeaderTimeout: time.Duration(po.Timeout) * time.Millisecond,
MaxIdleConnsPerHost: po.MaxIdleConnsPerHost,
}
if po.UseTLS {
tlsConfig, err := po.TLSConfig()
if err != nil {
logger.Errorf("new cluster %s fail: %v", po.Url, err)
return nil, err
}
transport.TLSClientConfig = tlsConfig
}
return api.NewClient(api.Config{
Address: po.Url,
RoundTripper: &http.Transport{
// TLSClientConfig: tlsConfig,
Proxy: http.ProxyFromEnvironment,
DialContext: (&net.Dialer{
Timeout: time.Duration(po.DialTimeout) * time.Millisecond,
}).DialContext,
ResponseHeaderTimeout: time.Duration(po.Timeout) * time.Millisecond,
MaxIdleConnsPerHost: po.MaxIdleConnsPerHost,
},
Address: po.Url,
RoundTripper: transport,
})
}
@@ -116,12 +153,18 @@ func setClientFromPromOption(clusterName string, po PromOption) error {
return fmt.Errorf("prometheus url is blank")
}
if strings.HasPrefix(po.Url, "https") {
po.UseTLS = true
po.InsecureSkipVerify = true
}
cli, err := newClientFromPromOption(po)
if err != nil {
return fmt.Errorf("failed to newClientFromPromOption: %v", err)
}
ReaderClient.Set(clusterName, prom.NewAPI(cli, prom.ClientOptions{
logger.Debugf("setClientFromPromOption: %s, %+v", clusterName, po)
ReaderClients.Set(clusterName, prom.NewAPI(cli, prom.ClientOptions{
BasicAuthUser: po.BasicAuthUser,
BasicAuthPass: po.BasicAuthPass,
Headers: po.Headers,

View File

@@ -45,7 +45,11 @@ func consume(events []interface{}, sema *semaphore.Semaphore) {
func consumeOne(event *models.AlertCurEvent) {
LogEvent(event, "consume")
if err := event.ParseRuleNote(); err != nil {
if err := event.ParseRule("rule_name"); err != nil {
event.RuleName = fmt.Sprintf("failed to parse rule name: %v", err)
}
if err := event.ParseRule("rule_note"); err != nil {
event.RuleNote = fmt.Sprintf("failed to parse rule note: %v", err)
}
@@ -72,9 +76,10 @@ func persist(event *models.AlertCurEvent) {
// 不管是告警还是恢复,全量告警里都要记录
if err := his.Add(); err != nil {
logger.Errorf(
"event_persist_his_fail: %v rule_id=%d hash=%s tags=%v timestamp=%d value=%s",
"event_persist_his_fail: %v rule_id=%d cluster:%s hash=%s tags=%v timestamp=%d value=%s",
err,
event.RuleId,
event.Cluster,
event.Hash,
event.TagsJSON,
event.TriggerTime,
@@ -97,9 +102,10 @@ func persist(event *models.AlertCurEvent) {
if event.Id > 0 {
if err := event.Add(); err != nil {
logger.Errorf(
"event_persist_cur_fail: %v rule_id=%d hash=%s tags=%v timestamp=%d value=%s",
"event_persist_cur_fail: %v rule_id=%d cluster:%s hash=%s tags=%v timestamp=%d value=%s",
err,
event.RuleId,
event.Cluster,
event.Hash,
event.TagsJSON,
event.TriggerTime,
@@ -122,9 +128,10 @@ func persist(event *models.AlertCurEvent) {
if event.Id > 0 {
if err := event.Add(); err != nil {
logger.Errorf(
"event_persist_cur_fail: %v rule_id=%d hash=%s tags=%v timestamp=%d value=%s",
"event_persist_cur_fail: %v rule_id=%d cluster:%s hash=%s tags=%v timestamp=%d value=%s",
err,
event.RuleId,
event.Cluster,
event.Hash,
event.TagsJSON,
event.TriggerTime,

View File

@@ -1,33 +0,0 @@
package engine
import (
"strconv"
"strings"
"time"
"github.com/didi/nightingale/v5/src/models"
)
func isNoneffective(timestamp int64, alertRule *models.AlertRule) bool {
if alertRule.Disabled == 1 {
return true
}
tm := time.Unix(timestamp, 0)
triggerTime := tm.Format("15:04")
triggerWeek := strconv.Itoa(int(tm.Weekday()))
if alertRule.EnableStime <= alertRule.EnableEtime {
if triggerTime < alertRule.EnableStime || triggerTime > alertRule.EnableEtime {
return true
}
} else {
if triggerTime < alertRule.EnableStime && triggerTime > alertRule.EnableEtime {
return true
}
}
alertRule.EnableDaysOfWeek = strings.Replace(alertRule.EnableDaysOfWeek, "7", "0", 1)
return !strings.Contains(alertRule.EnableDaysOfWeek, triggerWeek)
}

View File

@@ -5,13 +5,15 @@ import (
"fmt"
"time"
"github.com/toolkits/pkg/logger"
"github.com/didi/nightingale/v5/src/server/common/sender"
"github.com/didi/nightingale/v5/src/server/config"
promstat "github.com/didi/nightingale/v5/src/server/stat"
"github.com/toolkits/pkg/container/list"
"github.com/toolkits/pkg/logger"
)
var EventQueue = list.NewSafeListLimited(10000000)
func Start(ctx context.Context) error {
err := reloadTpls()
if err != nil {
@@ -22,7 +24,9 @@ func Start(ctx context.Context) error {
go loopConsume(ctx)
// filter my rules and start worker
go loopFilterRules(ctx)
//go loopFilterRules(ctx)
go ruleHolder.LoopSyncRules(ctx)
go reportQueueSize()
@@ -53,10 +57,7 @@ func Reload() {
func reportQueueSize() {
for {
time.Sleep(time.Second)
clusterName := config.ReaderClient.GetClusterName()
if clusterName == "" {
continue
}
promstat.GaugeAlertQueueSize.WithLabelValues(clusterName).Set(float64(EventQueue.Len()))
promstat.GaugeAlertQueueSize.Set(float64(EventQueue.Len()))
}
}

View File

@@ -17,11 +17,12 @@ func LogEvent(event *models.AlertCurEvent, location string, err ...error) {
}
logger.Infof(
"event(%s %s) %s: rule_id=%d %v%s@%d %s",
"event(%s %s) %s: rule_id=%d cluster:%s %v%s@%d %s",
event.Hash,
status,
location,
event.RuleId,
event.Cluster,
event.TagsJSON,
event.TriggerValue,
event.TriggerTime,

View File

@@ -1,73 +0,0 @@
package engine
import (
"github.com/didi/nightingale/v5/src/models"
"github.com/didi/nightingale/v5/src/server/memsto"
)
// 如果传入了clock这个可选参数就表示使用这个clock表示的时间否则就从event的字段中取TriggerTime
func IsMuted(event *models.AlertCurEvent, clock ...int64) bool {
mutes, has := memsto.AlertMuteCache.Gets(event.GroupId)
if !has || len(mutes) == 0 {
return false
}
for i := 0; i < len(mutes); i++ {
if matchMute(event, mutes[i], clock...) {
return true
}
}
return false
}
func matchMute(event *models.AlertCurEvent, mute *models.AlertMute, clock ...int64) bool {
if mute.Disabled == 1 {
return false
}
ts := event.TriggerTime
if len(clock) > 0 {
ts = clock[0]
}
if ts < mute.Btime || ts > mute.Etime {
return false
}
return matchTags(event.TagsMap, mute.ITags)
}
func matchTag(value string, filter models.TagFilter) bool {
switch filter.Func {
case "==":
return filter.Value == value
case "!=":
return filter.Value != value
case "in":
_, has := filter.Vset[value]
return has
case "not in":
_, has := filter.Vset[value]
return !has
case "=~":
return filter.Regexp.MatchString(value)
case "!~":
return !filter.Regexp.MatchString(value)
}
// unexpect func
return false
}
func matchTags(eventTagsMap map[string]string, itags []models.TagFilter) bool {
for _, filter := range itags {
value, has := eventTagsMap[filter.Key]
if !has {
return false
}
if !matchTag(value, filter) {
return false
}
}
return true
}

View File

@@ -0,0 +1,202 @@
package engine
import (
"strconv"
"strings"
"time"
"github.com/toolkits/pkg/logger"
"github.com/didi/nightingale/v5/src/models"
"github.com/didi/nightingale/v5/src/server/memsto"
)
var AlertMuteStrategies = AlertMuteStrategiesType{
&TimeNonEffectiveMuteStrategy{},
&IdentNotExistsMuteStrategy{},
&BgNotMatchMuteStrategy{},
&EventMuteStrategy{},
}
type AlertMuteStrategiesType []AlertMuteStrategy
func (ss AlertMuteStrategiesType) IsMuted(rule *models.AlertRule, event *models.AlertCurEvent) bool {
for _, s := range ss {
if s.IsMuted(rule, event) {
logger.Debugf("[%T] mute: rule:%+v event:%+v", s, rule, event)
return true
}
}
return false
}
// AlertMuteStrategy 是过滤event的抽象,当返回true时,表示该告警时间由于某些原因不需要告警
type AlertMuteStrategy interface {
IsMuted(rule *models.AlertRule, event *models.AlertCurEvent) bool
}
// TimeNonEffectiveMuteStrategy 根据规则配置的告警时间过滤,如果产生的告警不在规则配置的告警时间内,则不告警
type TimeNonEffectiveMuteStrategy struct{}
func (s *TimeNonEffectiveMuteStrategy) IsMuted(rule *models.AlertRule, event *models.AlertCurEvent) bool {
if rule.Disabled == 1 {
logger.Debugf("[%T] mute: rule_disabled:%d cluster:%s", s, rule.Id, event.Cluster)
return true
}
tm := time.Unix(event.TriggerTime, 0)
triggerTime := tm.Format("15:04")
triggerWeek := strconv.Itoa(int(tm.Weekday()))
enableStime := strings.Fields(rule.EnableStime)
enableEtime := strings.Fields(rule.EnableEtime)
enableDaysOfWeek := strings.Split(rule.EnableDaysOfWeek, ";")
length := len(enableDaysOfWeek)
// enableStime,enableEtime,enableDaysOfWeek三者长度肯定相同这里循环一个即可
for i := 0; i < length; i++ {
enableDaysOfWeek[i] = strings.Replace(enableDaysOfWeek[i], "7", "0", 1)
if !strings.Contains(enableDaysOfWeek[i], triggerWeek) {
continue
}
if enableStime[i] <= enableEtime[i] {
if triggerTime < enableStime[i] || triggerTime > enableEtime[i] {
continue
}
} else {
if triggerTime < enableStime[i] && triggerTime > enableEtime[i] {
continue
}
}
// 到这里说明当前时刻在告警规则的某组生效时间范围内,直接返回 false
return false
}
return true
}
// IdentNotExistsMuteStrategy 根据ident是否存在过滤,如果ident不存在,则target_up的告警直接过滤掉
type IdentNotExistsMuteStrategy struct{}
func (s *IdentNotExistsMuteStrategy) IsMuted(rule *models.AlertRule, event *models.AlertCurEvent) bool {
ident, has := event.TagsMap["ident"]
if !has {
return false
}
_, exists := memsto.TargetCache.Get(ident)
// 如果是target_up的告警,且ident已经不存在了,直接过滤掉
// 这里的判断有点太粗暴了,但是目前没有更好的办法
if !exists && strings.Contains(rule.PromQl, "target_up") {
logger.Debugf("[%T] mute: rule_eval:%d cluster:%s ident:%s", s, rule.Id, event.Cluster, ident)
return true
}
return false
}
// BgNotMatchMuteStrategy 当规则开启只在bg内部告警时,对于非bg内部的机器过滤
type BgNotMatchMuteStrategy struct{}
func (s *BgNotMatchMuteStrategy) IsMuted(rule *models.AlertRule, event *models.AlertCurEvent) bool {
// 没有开启BG内部告警,直接不过滤
if rule.EnableInBG == 0 {
return false
}
ident, has := event.TagsMap["ident"]
if !has {
return false
}
target, exists := memsto.TargetCache.Get(ident)
// 对于包含ident的告警事件check一下ident所属bg和rule所属bg是否相同
// 如果告警规则选择了只在本BG生效那其他BG的机器就不能因此规则产生告警
if exists && target.GroupId != rule.GroupId {
logger.Debugf("[%T] mute: rule_eval:%d cluster:%s", s, rule.Id, event.Cluster)
return true
}
return false
}
type EventMuteStrategy struct{}
var EventMuteStra = new(EventMuteStrategy)
func (s *EventMuteStrategy) IsMuted(rule *models.AlertRule, event *models.AlertCurEvent) bool {
mutes, has := memsto.AlertMuteCache.Gets(event.GroupId)
if !has || len(mutes) == 0 {
return false
}
for i := 0; i < len(mutes); i++ {
if matchMute(event, mutes[i]) {
return true
}
}
return false
}
// matchMute 如果传入了clock这个可选参数就表示使用这个clock表示的时间否则就从event的字段中取TriggerTime
func matchMute(event *models.AlertCurEvent, mute *models.AlertMute, clock ...int64) bool {
if mute.Disabled == 1 {
return false
}
ts := event.TriggerTime
if len(clock) > 0 {
ts = clock[0]
}
// 如果不是全局的,判断 cluster
if mute.Cluster != models.ClusterAll {
// mute.Cluster 是一个字符串可能是多个cluster的组合比如"cluster1 cluster2"
clusters := strings.Fields(mute.Cluster)
cm := make(map[string]struct{}, len(clusters))
for i := 0; i < len(clusters); i++ {
cm[clusters[i]] = struct{}{}
}
// 判断event.Cluster是否包含在cm中
if _, has := cm[event.Cluster]; !has {
return false
}
}
if ts < mute.Btime || ts > mute.Etime {
return false
}
return matchTags(event.TagsMap, mute.ITags)
}
func matchTag(value string, filter models.TagFilter) bool {
switch filter.Func {
case "==":
return filter.Value == value
case "!=":
return filter.Value != value
case "in":
_, has := filter.Vset[value]
return has
case "not in":
_, has := filter.Vset[value]
return !has
case "=~":
return filter.Regexp.MatchString(value)
case "!~":
return !filter.Regexp.MatchString(value)
}
// unexpect func
return false
}
func matchTags(eventTagsMap map[string]string, itags []models.TagFilter) bool {
for _, filter := range itags {
value, has := eventTagsMap[filter.Key]
if !has {
return false
}
if !matchTag(value, filter) {
return false
}
}
return true
}

View File

@@ -126,6 +126,7 @@ func handleNotice(notice Notice, bs []byte) {
dingtalkset := make(map[string]struct{})
feishuset := make(map[string]struct{})
mmset := make(map[string]struct{})
telegramset := make(map[string]struct{})
for _, user := range notice.Event.NotifyUsersObj {
if user.Email != "" {
@@ -161,6 +162,11 @@ func handleNotice(notice Notice, bs []byte) {
if ret.Exists() {
mmset[ret.String()] = struct{}{}
}
ret = gjson.GetBytes(bs, "telegram_robot_token")
if ret.Exists() {
telegramset[ret.String()] = struct{}{}
}
}
phones := StringSetKeys(phoneset)
@@ -259,6 +265,23 @@ func handleNotice(notice Notice, bs []byte) {
Text: content,
Tokens: StringSetKeys(mmset),
})
case "telegram":
if len(telegramset) == 0 {
continue
}
if !slice.ContainsString(config.C.Alerting.NotifyBuiltinChannels, "telegram") {
continue
}
content, has := notice.Tpls["telegram.tpl"]
if !has {
content = "telegram.tpl not found"
}
sender.SendTelegram(sender.TelegramMessage{
Text: content,
Tokens: StringSetKeys(telegramset),
})
}
}
}
@@ -355,6 +378,20 @@ func handleSubscribe(event models.AlertCurEvent, sub *models.AlertSubscribe) {
return
}
// 如果不是全局的,判断 cluster
if sub.Cluster != models.ClusterAll {
// sub.Cluster 是一个字符串可能是多个cluster的组合比如"cluster1 cluster2"
clusters := strings.Fields(sub.Cluster)
cm := make(map[string]struct{}, len(clusters))
for i := 0; i < len(clusters); i++ {
cm[clusters[i]] = struct{}{}
}
if _, has := cm[event.Cluster]; !has {
return
}
}
if !matchTags(event.TagsMap, sub.ITags) {
return
}
@@ -398,6 +435,10 @@ func alertingCallScript(stdinBytes []byte) {
return
}
if config.C.Alerting.Timeout == 0 {
config.C.Alerting.Timeout = 30000
}
fpath := config.C.Alerting.CallScript.ScriptPath
cmd := exec.Command(fpath)
cmd.Stdin = bytes.NewReader(stdinBytes)
@@ -413,7 +454,7 @@ func alertingCallScript(stdinBytes []byte) {
return
}
err, isTimeout := sys.WrapTimeout(cmd, time.Duration(30)*time.Second)
err, isTimeout := sys.WrapTimeout(cmd, time.Duration(config.C.Alerting.Timeout)*time.Millisecond)
if isTimeout {
if err == nil {

View File

@@ -65,6 +65,7 @@ func notifyMaintainerWithBuiltin(title, msg, triggerTime string, users []*models
dingtalkset := make(map[string]struct{})
feishuset := make(map[string]struct{})
mmset := make(map[string]struct{})
telegramset := make(map[string]struct{})
for _, user := range users {
if user.Email != "" {
@@ -100,6 +101,11 @@ func notifyMaintainerWithBuiltin(title, msg, triggerTime string, users []*models
if ret.Exists() {
mmset[ret.String()] = struct{}{}
}
ret = gjson.GetBytes(bs, "telegram_robot_token")
if ret.Exists() {
telegramset[ret.String()] = struct{}{}
}
}
phones := StringSetKeys(phoneset)
@@ -152,6 +158,15 @@ func notifyMaintainerWithBuiltin(title, msg, triggerTime string, users []*models
Text: content,
Tokens: StringSetKeys(mmset),
})
case "telegram":
if len(telegramset) == 0 {
continue
}
content := "**Title: **" + title + "\n**Content: **" + msg + "\n**Time: **" + triggerTime
sender.SendTelegram(sender.TelegramMessage{
Text: content,
Tokens: StringSetKeys(telegramset),
})
}
}
}

View File

@@ -1,5 +0,0 @@
package engine
import "github.com/toolkits/pkg/container/list"
var EventQueue = list.NewSafeListLimited(10000000)

166
src/server/engine/rule.go Normal file
View File

@@ -0,0 +1,166 @@
package engine
import (
"context"
"fmt"
"strings"
"sync"
"time"
"github.com/didi/nightingale/v5/src/server/config"
"github.com/didi/nightingale/v5/src/server/memsto"
"github.com/didi/nightingale/v5/src/server/naming"
)
type RuleContext interface {
Key() string
Hash() string
Prepare()
Start()
Eval()
Stop()
}
var ruleHolder = &RuleHolder{
alertRules: make(map[string]RuleContext),
recordRules: make(map[string]RuleContext),
externalAlertRules: make(map[string]*AlertRuleContext),
}
type RuleHolder struct {
externalLock sync.RWMutex
// key: hash
alertRules map[string]RuleContext
// key: hash
recordRules map[string]RuleContext
// key: key
externalAlertRules map[string]*AlertRuleContext
}
func (rh *RuleHolder) LoopSyncRules(ctx context.Context) {
time.Sleep(time.Duration(config.C.EngineDelay) * time.Second)
duration := 9000 * time.Millisecond
for {
select {
case <-ctx.Done():
return
case <-time.After(duration):
rh.SyncAlertRules()
rh.SyncRecordRules()
}
}
}
func (rh *RuleHolder) SyncAlertRules() {
ids := memsto.AlertRuleCache.GetRuleIds()
alertRules := make(map[string]RuleContext)
externalAllRules := make(map[string]*AlertRuleContext)
for _, id := range ids {
rule := memsto.AlertRuleCache.Get(id)
if rule == nil {
continue
}
// 如果 rule 不是通过 prometheus engine 来告警的,则创建为 externalRule
if !rule.IsPrometheusRule() {
ruleClusters := strings.Fields(rule.Cluster)
for _, cluster := range ruleClusters {
// hash ring not hit
if !naming.ClusterHashRing.IsHit(cluster, fmt.Sprintf("%d", rule.Id), config.C.Heartbeat.Endpoint) {
continue
}
externalRule := NewAlertRuleContext(rule, cluster)
externalAllRules[externalRule.Key()] = externalRule
}
continue
}
ruleClusters := config.ReaderClients.Hit(rule.Cluster)
for _, cluster := range ruleClusters {
// hash ring not hit
if !naming.ClusterHashRing.IsHit(cluster, fmt.Sprintf("%d", rule.Id), config.C.Heartbeat.Endpoint) {
continue
}
alertRule := NewAlertRuleContext(rule, cluster)
alertRules[alertRule.Hash()] = alertRule
}
}
for hash, rule := range alertRules {
if _, has := rh.alertRules[hash]; !has {
rule.Prepare()
rule.Start()
rh.alertRules[hash] = rule
}
}
for hash, rule := range rh.alertRules {
if _, has := alertRules[hash]; !has {
rule.Stop()
delete(rh.alertRules, hash)
}
}
for hash, rule := range externalAllRules {
rh.externalLock.Lock()
if _, has := rh.externalAlertRules[hash]; !has {
rule.Prepare()
rh.externalAlertRules[hash] = rule
}
rh.externalLock.Unlock()
}
rh.externalLock.Lock()
for hash := range rh.externalAlertRules {
if _, has := externalAllRules[hash]; !has {
delete(rh.externalAlertRules, hash)
}
}
rh.externalLock.Unlock()
}
func (rh *RuleHolder) SyncRecordRules() {
ids := memsto.RecordingRuleCache.GetRuleIds()
recordRules := make(map[string]RuleContext)
for _, id := range ids {
rule := memsto.RecordingRuleCache.Get(id)
if rule == nil {
continue
}
ruleClusters := config.ReaderClients.Hit(rule.Cluster)
for _, cluster := range ruleClusters {
if !naming.ClusterHashRing.IsHit(cluster, fmt.Sprintf("%d", rule.Id), config.C.Heartbeat.Endpoint) {
continue
}
recordRule := NewRecordRuleContext(rule, cluster)
recordRules[recordRule.Hash()] = recordRule
}
}
for hash, rule := range recordRules {
if _, has := rh.recordRules[hash]; !has {
rule.Prepare()
rule.Start()
rh.recordRules[hash] = rule
}
}
for hash, rule := range rh.recordRules {
if _, has := recordRules[hash]; !has {
rule.Stop()
delete(rh.recordRules, hash)
}
}
}
func GetExternalAlertRule(cluster string, id int64) (*AlertRuleContext, bool) {
key := fmt.Sprintf("alert-%s-%d", cluster, id)
ruleHolder.externalLock.RLock()
defer ruleHolder.externalLock.RUnlock()
rule, has := ruleHolder.externalAlertRules[key]
return rule, has
}

View File

@@ -0,0 +1,300 @@
package engine
import (
"context"
"fmt"
"strings"
"time"
"github.com/prometheus/common/model"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/str"
"github.com/didi/nightingale/v5/src/models"
"github.com/didi/nightingale/v5/src/pkg/prom"
"github.com/didi/nightingale/v5/src/server/common/conv"
"github.com/didi/nightingale/v5/src/server/config"
"github.com/didi/nightingale/v5/src/server/memsto"
promstat "github.com/didi/nightingale/v5/src/server/stat"
)
type AlertRuleContext struct {
cluster string
quit chan struct{}
rule *models.AlertRule
fires *AlertCurEventMap
pendings *AlertCurEventMap
}
func NewAlertRuleContext(rule *models.AlertRule, cluster string) *AlertRuleContext {
return &AlertRuleContext{
cluster: cluster,
quit: make(chan struct{}),
rule: rule,
}
}
func (arc *AlertRuleContext) RuleFromCache() *models.AlertRule {
return memsto.AlertRuleCache.Get(arc.rule.Id)
}
func (arc *AlertRuleContext) Key() string {
return fmt.Sprintf("alert-%s-%d", arc.cluster, arc.rule.Id)
}
func (arc *AlertRuleContext) Hash() string {
return str.MD5(fmt.Sprintf("%d_%d_%s_%s",
arc.rule.Id,
arc.rule.PromEvalInterval,
arc.rule.PromQl,
arc.cluster,
))
}
func (arc *AlertRuleContext) Prepare() {
arc.recoverAlertCurEventFromDb()
}
func (arc *AlertRuleContext) Start() {
logger.Infof("eval:%s started", arc.Key())
interval := arc.rule.PromEvalInterval
if interval <= 0 {
interval = 10
}
go func() {
for {
select {
case <-arc.quit:
return
default:
arc.Eval()
time.Sleep(time.Duration(interval) * time.Second)
}
}
}()
}
func (arc *AlertRuleContext) Eval() {
promql := strings.TrimSpace(arc.rule.PromQl)
if promql == "" {
logger.Errorf("rule_eval:%s promql is blank", arc.Key())
return
}
if config.ReaderClients.IsNil(arc.cluster) {
logger.Errorf("rule_eval:%s error reader client is nil", arc.Key())
return
}
readerClient := config.ReaderClients.GetCli(arc.cluster)
var value model.Value
var err error
cachedRule := arc.RuleFromCache()
if cachedRule == nil {
logger.Errorf("rule_eval:%s rule not found", arc.Key())
return
}
// 如果是单个goroutine执行, 完全可以考虑把cachedRule赋值给arc.rule, 不会有问题
// 但是在externalRule的场景中, 会调用HandleVectors/RecoverSingle;就行不通了,还是在需要的时候从cache中拿rule吧
// arc.rule = cachedRule
// 如果cache中的规则由prometheus规则改为其他类型也没必要再去prometheus查询了
if cachedRule.IsPrometheusRule() {
var warnings prom.Warnings
value, warnings, err = readerClient.Query(context.Background(), promql, time.Now())
if err != nil {
logger.Errorf("rule_eval:%s promql:%s, error:%v", arc.Key(), promql, err)
//notifyToMaintainer(err, "failed to query prometheus")
Report(QueryPrometheusError)
return
}
if len(warnings) > 0 {
logger.Errorf("rule_eval:%s promql:%s, warnings:%v", arc.Key(), promql, warnings)
return
}
logger.Debugf("rule_eval:%s promql:%s, value:%v", arc.Key(), promql, value)
}
arc.HandleVectors(conv.ConvertVectors(value), "inner")
}
func (arc *AlertRuleContext) HandleVectors(vectors []conv.Vector, from string) {
// 有可能rule的一些配置已经发生变化比如告警接收人、callbacks等
// 这些信息的修改是不会引起worker restart的但是确实会影响告警处理逻辑
// 所以这里直接从memsto.AlertRuleCache中获取并覆盖
cachedRule := arc.RuleFromCache()
if cachedRule == nil {
logger.Errorf("rule_eval:%s rule not found", arc.Key())
return
}
now := time.Now().Unix()
alertingKeys := map[string]struct{}{}
for _, vector := range vectors {
alertVector := NewAlertVector(arc, cachedRule, vector, from)
event := alertVector.BuildEvent(now)
// 如果event被mute了,本质也是fire的状态,这里无论如何都添加到alertingKeys中,防止fire的事件自动恢复了
alertingKeys[alertVector.Hash()] = struct{}{}
if AlertMuteStrategies.IsMuted(cachedRule, event) {
logger.Debugf("rule_eval:%s event:%+v is muted", arc.Key(), event)
continue
}
arc.handleEvent(event)
}
arc.HandleRecover(alertingKeys, now)
}
func (arc *AlertRuleContext) HandleRecover(alertingKeys map[string]struct{}, now int64) {
for _, hash := range arc.pendings.Keys() {
if _, has := alertingKeys[hash]; has {
continue
}
arc.pendings.Delete(hash)
}
for hash := range arc.fires.GetAll() {
if _, has := alertingKeys[hash]; has {
continue
}
arc.RecoverSingle(hash, now, nil)
}
}
func (arc *AlertRuleContext) RecoverSingle(hash string, now int64, value *string) {
cachedRule := arc.RuleFromCache()
if cachedRule == nil {
logger.Errorf("rule_eval:%s rule not found", arc.Key())
return
}
event, has := arc.fires.Get(hash)
if !has {
return
}
// 如果配置了留观时长,就不能立马恢复了
if cachedRule.RecoverDuration > 0 && now-event.LastEvalTime < cachedRule.RecoverDuration {
return
}
if value != nil {
event.TriggerValue = *value
}
// 没查到触发阈值的vector姑且就认为这个vector的值恢复了
// 我确实无法分辨是prom中有值但是未满足阈值所以没返回还是prom中确实丢了一些点导致没有数据可以返回尴尬
arc.fires.Delete(hash)
arc.pendings.Delete(hash)
// 可能是因为调整了promql才恢复的所以事件里边要体现最新的promql否则用户会比较困惑
// 当然其实rule的各个字段都可能发生变化了都更新一下吧
cachedRule.UpdateEvent(event)
event.IsRecovered = true
event.LastEvalTime = now
arc.pushEventToQueue(event)
}
func (arc *AlertRuleContext) handleEvent(event *models.AlertCurEvent) {
if event == nil {
logger.Debugf("rule_eval:%s event:%+v is nil", arc.Key(), event)
return
}
if event.PromForDuration == 0 {
arc.fireEvent(event)
return
}
var preTriggerTime int64
preEvent, has := arc.pendings.Get(event.Hash)
if has {
arc.pendings.UpdateLastEvalTime(event.Hash, event.LastEvalTime)
preTriggerTime = preEvent.TriggerTime
} else {
arc.pendings.Set(event.Hash, event)
preTriggerTime = event.TriggerTime
}
if event.LastEvalTime-preTriggerTime+int64(event.PromEvalInterval) >= int64(event.PromForDuration) {
arc.fireEvent(event)
}
}
func (arc *AlertRuleContext) fireEvent(event *models.AlertCurEvent) {
// As arc.rule maybe outdated, use rule from cache
cachedRule := arc.RuleFromCache()
if cachedRule == nil {
logger.Errorf("rule_eval:%s event:%+v is nil", arc.Key(), event)
return
}
if fired, has := arc.fires.Get(event.Hash); has {
arc.fires.UpdateLastEvalTime(event.Hash, event.LastEvalTime)
if cachedRule.NotifyRepeatStep == 0 {
// 说明不想重复通知那就直接返回了nothing to do
logger.Debugf("rule_eval:%s event:%+v nothing to do", arc.Key(), event)
return
}
// 之前发送过告警了,这次是否要继续发送,要看是否过了通道静默时间
if event.LastEvalTime > fired.LastSentTime+int64(cachedRule.NotifyRepeatStep)*60 {
if cachedRule.NotifyMaxNumber == 0 {
// 最大可以发送次数如果是0表示不想限制最大发送次数一直发即可
event.NotifyCurNumber = fired.NotifyCurNumber + 1
event.FirstTriggerTime = fired.FirstTriggerTime
arc.pushEventToQueue(event)
} else {
// 有最大发送次数的限制,就要看已经发了几次了,是否达到了最大发送次数
if fired.NotifyCurNumber >= cachedRule.NotifyMaxNumber {
logger.Debugf("rule_eval:%s event:%+v notify to max number", arc.Key(), event)
return
} else {
event.NotifyCurNumber = fired.NotifyCurNumber + 1
event.FirstTriggerTime = fired.FirstTriggerTime
arc.pushEventToQueue(event)
}
}
}
} else {
event.NotifyCurNumber = 1
event.FirstTriggerTime = event.TriggerTime
arc.pushEventToQueue(event)
}
}
func (arc *AlertRuleContext) pushEventToQueue(event *models.AlertCurEvent) {
if !event.IsRecovered {
event.LastSentTime = event.LastEvalTime
arc.fires.Set(event.Hash, event)
}
promstat.CounterAlertsTotal.WithLabelValues(event.Cluster).Inc()
LogEvent(event, "push_queue")
if !EventQueue.PushFront(event) {
logger.Warningf("event_push_queue: queue is full, event:%+v", event)
}
}
func (arc *AlertRuleContext) Stop() {
logger.Infof("%s stopped", arc.Key())
close(arc.quit)
}
func (arc *AlertRuleContext) recoverAlertCurEventFromDb() {
arc.pendings = NewAlertCurEventMap(nil)
curEvents, err := models.AlertCurEventGetByRuleIdAndCluster(arc.rule.Id, arc.cluster)
if err != nil {
logger.Errorf("recover event from db for rule:%s failed, err:%s", arc.Key(), err)
arc.fires = NewAlertCurEventMap(nil)
return
}
fireMap := make(map[string]*models.AlertCurEvent)
for _, event := range curEvents {
event.DB2Mem()
fireMap[event.Hash] = event
}
arc.fires = NewAlertCurEventMap(fireMap)
}

View File

@@ -0,0 +1,189 @@
package engine
import (
"fmt"
"sort"
"strings"
"sync"
"github.com/toolkits/pkg/str"
"github.com/didi/nightingale/v5/src/models"
"github.com/didi/nightingale/v5/src/server/common/conv"
"github.com/didi/nightingale/v5/src/server/memsto"
)
type AlertCurEventMap struct {
sync.RWMutex
Data map[string]*models.AlertCurEvent
}
func (a *AlertCurEventMap) SetAll(data map[string]*models.AlertCurEvent) {
a.Lock()
defer a.Unlock()
a.Data = data
}
func (a *AlertCurEventMap) Set(key string, value *models.AlertCurEvent) {
a.Lock()
defer a.Unlock()
a.Data[key] = value
}
func (a *AlertCurEventMap) Get(key string) (*models.AlertCurEvent, bool) {
a.RLock()
defer a.RUnlock()
event, exists := a.Data[key]
return event, exists
}
func (a *AlertCurEventMap) UpdateLastEvalTime(key string, lastEvalTime int64) {
a.Lock()
defer a.Unlock()
event, exists := a.Data[key]
if !exists {
return
}
event.LastEvalTime = lastEvalTime
}
func (a *AlertCurEventMap) Delete(key string) {
a.Lock()
defer a.Unlock()
delete(a.Data, key)
}
func (a *AlertCurEventMap) Keys() []string {
a.RLock()
defer a.RUnlock()
keys := make([]string, 0, len(a.Data))
for k := range a.Data {
keys = append(keys, k)
}
return keys
}
func (a *AlertCurEventMap) GetAll() map[string]*models.AlertCurEvent {
a.RLock()
defer a.RUnlock()
return a.Data
}
func NewAlertCurEventMap(data map[string]*models.AlertCurEvent) *AlertCurEventMap {
if data == nil {
return &AlertCurEventMap{
Data: make(map[string]*models.AlertCurEvent),
}
}
return &AlertCurEventMap{
Data: data,
}
}
// AlertVector 包含一个告警事件的告警上下文
type AlertVector struct {
Ctx *AlertRuleContext
Rule *models.AlertRule
Vector conv.Vector
From string
tagsMap map[string]string
tagsArr []string
target string
targetNote string
groupName string
}
func NewAlertVector(ctx *AlertRuleContext, rule *models.AlertRule, vector conv.Vector, from string) *AlertVector {
if rule == nil {
rule = ctx.rule
}
av := &AlertVector{
Ctx: ctx,
Rule: rule,
Vector: vector,
From: from,
}
av.fillTags()
av.mayHandleIdent()
av.mayHandleGroup()
return av
}
func (av *AlertVector) Hash() string {
return str.MD5(fmt.Sprintf("%d_%s_%s", av.Rule.Id, av.Vector.Key, av.Ctx.cluster))
}
func (av *AlertVector) fillTags() {
// handle series tags
tagsMap := make(map[string]string)
for label, value := range av.Vector.Labels {
tagsMap[string(label)] = string(value)
}
// handle rule tags
for _, tag := range av.Rule.AppendTagsJSON {
arr := strings.SplitN(tag, "=", 2)
tagsMap[arr[0]] = arr[1]
}
tagsMap["rulename"] = av.Rule.Name
av.tagsMap = tagsMap
// handle tagsArr
av.tagsArr = labelMapToArr(tagsMap)
}
func (av *AlertVector) mayHandleIdent() {
// handle ident
if ident, has := av.tagsMap["ident"]; has {
if target, exists := memsto.TargetCache.Get(ident); exists {
av.target = target.Ident
av.targetNote = target.Note
}
}
}
func (av *AlertVector) mayHandleGroup() {
// handle bg
bg := memsto.BusiGroupCache.GetByBusiGroupId(av.Rule.GroupId)
if bg != nil {
av.groupName = bg.Name
}
}
func (av *AlertVector) BuildEvent(now int64) *models.AlertCurEvent {
event := av.Rule.GenerateNewEvent()
event.TriggerTime = av.Vector.Timestamp
event.TagsMap = av.tagsMap
event.Cluster = av.Ctx.cluster
event.Hash = av.Hash()
event.TargetIdent = av.target
event.TargetNote = av.targetNote
event.TriggerValue = av.Vector.ReadableValue()
event.TagsJSON = av.tagsArr
event.GroupName = av.groupName
event.Tags = strings.Join(av.tagsArr, ",,")
event.IsRecovered = false
if av.From == "inner" {
event.LastEvalTime = now
} else {
event.LastEvalTime = event.TriggerTime
}
return event
}
func labelMapToArr(m map[string]string) []string {
numLabels := len(m)
labelStrings := make([]string, 0, numLabels)
for label, value := range m {
labelStrings = append(labelStrings, fmt.Sprintf("%s=%s", label, value))
}
if numLabels > 1 {
sort.Strings(labelStrings)
}
return labelStrings
}

View File

@@ -0,0 +1,100 @@
package engine
import (
"context"
"fmt"
"strings"
"time"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/str"
"github.com/didi/nightingale/v5/src/models"
"github.com/didi/nightingale/v5/src/server/common/conv"
"github.com/didi/nightingale/v5/src/server/config"
"github.com/didi/nightingale/v5/src/server/writer"
)
type RecordRuleContext struct {
cluster string
quit chan struct{}
rule *models.RecordingRule
}
func NewRecordRuleContext(rule *models.RecordingRule, cluster string) *RecordRuleContext {
return &RecordRuleContext{
cluster: cluster,
quit: make(chan struct{}),
rule: rule,
}
}
func (rrc *RecordRuleContext) Key() string {
return fmt.Sprintf("record-%s-%d", rrc.cluster, rrc.rule.Id)
}
func (rrc *RecordRuleContext) Hash() string {
return str.MD5(fmt.Sprintf("%d_%d_%s_%s",
rrc.rule.Id,
rrc.rule.PromEvalInterval,
rrc.rule.PromQl,
rrc.cluster,
))
}
func (rrc *RecordRuleContext) Prepare() {}
func (rrc *RecordRuleContext) Start() {
logger.Infof("eval:%s started", rrc.Key())
interval := rrc.rule.PromEvalInterval
if interval <= 0 {
interval = 10
}
go func() {
for {
select {
case <-rrc.quit:
return
default:
rrc.Eval()
time.Sleep(time.Duration(interval) * time.Second)
}
}
}()
}
func (rrc *RecordRuleContext) Eval() {
promql := strings.TrimSpace(rrc.rule.PromQl)
if promql == "" {
logger.Errorf("eval:%s promql is blank", rrc.Key())
return
}
if config.ReaderClients.IsNil(rrc.cluster) {
logger.Errorf("eval:%s reader client is nil", rrc.Key())
return
}
value, warnings, err := config.ReaderClients.GetCli(rrc.cluster).Query(context.Background(), promql, time.Now())
if err != nil {
logger.Errorf("eval:%d promql:%s, error:%v", rrc.Key(), promql, err)
return
}
if len(warnings) > 0 {
logger.Errorf("eval:%d promql:%s, warnings:%v", rrc.Key(), promql, warnings)
return
}
ts := conv.ConvertToTimeSeries(value, rrc.rule)
if len(ts) != 0 {
for _, v := range ts {
writer.Writers.PushSample(rrc.rule.Name, v, rrc.cluster)
}
}
}
func (rrc *RecordRuleContext) Stop() {
logger.Infof("%s stopped", rrc.Key())
close(rrc.quit)
}

View File

@@ -1,757 +0,0 @@
package engine
import (
"context"
"fmt"
"log"
"sort"
"strings"
"sync"
"time"
"github.com/didi/nightingale/v5/src/server/writer"
"github.com/prometheus/common/model"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/str"
"github.com/didi/nightingale/v5/src/models"
"github.com/didi/nightingale/v5/src/pkg/prom"
"github.com/didi/nightingale/v5/src/server/common/conv"
"github.com/didi/nightingale/v5/src/server/config"
"github.com/didi/nightingale/v5/src/server/memsto"
"github.com/didi/nightingale/v5/src/server/naming"
promstat "github.com/didi/nightingale/v5/src/server/stat"
)
func loopFilterRules(ctx context.Context) {
// wait for samples
time.Sleep(time.Duration(config.C.EngineDelay) * time.Second)
duration := time.Duration(9000) * time.Millisecond
for {
select {
case <-ctx.Done():
return
case <-time.After(duration):
filterRules()
filterRecordingRules()
}
}
}
func filterRules() {
ids := memsto.AlertRuleCache.GetRuleIds()
logger.Debugf("AlertRuleCache.GetRuleIds successids.len: %d", len(ids))
count := len(ids)
mines := make([]int64, 0, count)
for i := 0; i < count; i++ {
node, err := naming.HashRing.GetNode(fmt.Sprint(ids[i]))
if err != nil {
logger.Warning("failed to get node from hashring:", err)
continue
}
if node == config.C.Heartbeat.Endpoint {
mines = append(mines, ids[i])
}
}
Workers.Build(mines)
RuleEvalForExternal.Build()
}
type RuleEval struct {
rule *models.AlertRule
fires *AlertCurEventMap
pendings *AlertCurEventMap
quit chan struct{}
}
type AlertCurEventMap struct {
sync.RWMutex
Data map[string]*models.AlertCurEvent
}
func (a *AlertCurEventMap) SetAll(data map[string]*models.AlertCurEvent) {
a.Lock()
defer a.Unlock()
a.Data = data
}
func (a *AlertCurEventMap) Set(key string, value *models.AlertCurEvent) {
a.Lock()
defer a.Unlock()
a.Data[key] = value
}
func (a *AlertCurEventMap) Get(key string) (*models.AlertCurEvent, bool) {
a.RLock()
defer a.RUnlock()
event, exists := a.Data[key]
return event, exists
}
func (a *AlertCurEventMap) UpdateLastEvalTime(key string, lastEvalTime int64) {
a.Lock()
defer a.Unlock()
event, exists := a.Data[key]
if !exists {
return
}
event.LastEvalTime = lastEvalTime
}
func (a *AlertCurEventMap) Delete(key string) {
a.Lock()
defer a.Unlock()
delete(a.Data, key)
}
func (a *AlertCurEventMap) Keys() []string {
a.RLock()
defer a.RUnlock()
keys := make([]string, 0, len(a.Data))
for k := range a.Data {
keys = append(keys, k)
}
return keys
}
func (a *AlertCurEventMap) GetAll() map[string]*models.AlertCurEvent {
a.RLock()
defer a.RUnlock()
return a.Data
}
func NewAlertCurEventMap() *AlertCurEventMap {
return &AlertCurEventMap{
Data: make(map[string]*models.AlertCurEvent),
}
}
func (r *RuleEval) Stop() {
logger.Infof("rule_eval:%d stopping", r.RuleID())
close(r.quit)
}
func (r *RuleEval) RuleID() int64 {
return r.rule.Id
}
func (r *RuleEval) Start() {
logger.Infof("rule_eval:%d started", r.RuleID())
for {
select {
case <-r.quit:
// logger.Infof("rule_eval:%d stopped", r.RuleID())
return
default:
r.Work()
logger.Debugf("rule executed, rule_eval:%d", r.RuleID())
interval := r.rule.PromEvalInterval
if interval <= 0 {
interval = 10
}
time.Sleep(time.Duration(interval) * time.Second)
}
}
}
func (r *RuleEval) Work() {
promql := strings.TrimSpace(r.rule.PromQl)
if promql == "" {
logger.Errorf("rule_eval:%d promql is blank", r.RuleID())
return
}
if config.ReaderClient.IsNil() {
logger.Error("reader client is nil")
return
}
clusterName, readerClient := config.ReaderClient.Get()
var value model.Value
var err error
if r.rule.Algorithm == "" && (r.rule.Cate == "" || strings.ToLower(r.rule.Cate) == "prometheus") {
var warnings prom.Warnings
value, warnings, err = readerClient.Query(context.Background(), promql, time.Now())
if err != nil {
logger.Errorf("rule_eval:%d promql:%s, error:%v", r.RuleID(), promql, err)
//notifyToMaintainer(err, "failed to query prometheus")
Report(QueryPrometheusError)
return
}
if len(warnings) > 0 {
logger.Errorf("rule_eval:%d promql:%s, warnings:%v", r.RuleID(), promql, warnings)
return
}
logger.Debugf("rule_eval:%d promql:%s, value:%v", r.RuleID(), promql, value)
}
r.Judge(clusterName, conv.ConvertVectors(value))
}
type WorkersType struct {
rules map[string]*RuleEval
recordRules map[string]RecordingRuleEval
}
var Workers = &WorkersType{rules: make(map[string]*RuleEval), recordRules: make(map[string]RecordingRuleEval)}
func (ws *WorkersType) Build(rids []int64) {
rules := make(map[string]*models.AlertRule)
for i := 0; i < len(rids); i++ {
rule := memsto.AlertRuleCache.Get(rids[i])
if rule == nil {
continue
}
hash := str.MD5(fmt.Sprintf("%d_%d_%s",
rule.Id,
rule.PromEvalInterval,
rule.PromQl,
))
rules[hash] = rule
}
// stop old
for hash := range Workers.rules {
if _, has := rules[hash]; !has {
Workers.rules[hash].Stop()
delete(Workers.rules, hash)
}
}
// start new
for hash := range rules {
if _, has := Workers.rules[hash]; has {
// already exists
continue
}
elst, err := models.AlertCurEventGetByRule(rules[hash].Id)
if err != nil {
logger.Errorf("worker_build: AlertCurEventGetByRule failed: %v", err)
continue
}
firemap := make(map[string]*models.AlertCurEvent)
for i := 0; i < len(elst); i++ {
elst[i].DB2Mem()
firemap[elst[i].Hash] = elst[i]
}
fires := NewAlertCurEventMap()
fires.SetAll(firemap)
re := &RuleEval{
rule: rules[hash],
quit: make(chan struct{}),
fires: fires,
pendings: NewAlertCurEventMap(),
}
go re.Start()
Workers.rules[hash] = re
}
}
func (ws *WorkersType) BuildRe(rids []int64) {
rules := make(map[string]*models.RecordingRule)
for i := 0; i < len(rids); i++ {
rule := memsto.RecordingRuleCache.Get(rids[i])
if rule == nil {
continue
}
if rule.Disabled == 1 {
continue
}
hash := str.MD5(fmt.Sprintf("%d_%d_%s_%s",
rule.Id,
rule.PromEvalInterval,
rule.PromQl,
rule.AppendTags,
))
rules[hash] = rule
}
// stop old
for hash := range Workers.recordRules {
if _, has := rules[hash]; !has {
Workers.recordRules[hash].Stop()
delete(Workers.recordRules, hash)
}
}
// start new
for hash := range rules {
if _, has := Workers.recordRules[hash]; has {
// already exists
continue
}
re := RecordingRuleEval{
rule: rules[hash],
quit: make(chan struct{}),
}
go re.Start()
Workers.recordRules[hash] = re
}
}
func (r *RuleEval) Judge(clusterName string, vectors []conv.Vector) {
now := time.Now().Unix()
alertingKeys, ruleExists := r.MakeNewEvent("inner", now, clusterName, vectors)
if !ruleExists {
return
}
// handle recovered events
r.recoverRule(alertingKeys, now)
}
func (r *RuleEval) MakeNewEvent(from string, now int64, clusterName string, vectors []conv.Vector) (map[string]struct{}, bool) {
// 有可能rule的一些配置已经发生变化比如告警接收人、callbacks等
// 这些信息的修改是不会引起worker restart的但是确实会影响告警处理逻辑
// 所以这里直接从memsto.AlertRuleCache中获取并覆盖
curRule := memsto.AlertRuleCache.Get(r.rule.Id)
if curRule == nil {
return map[string]struct{}{}, false
}
r.rule = curRule
count := len(vectors)
alertingKeys := make(map[string]struct{})
for i := 0; i < count; i++ {
// compute hash
hash := str.MD5(fmt.Sprintf("%d_%s", r.rule.Id, vectors[i].Key))
alertingKeys[hash] = struct{}{}
// rule disabled in this time span?
if isNoneffective(vectors[i].Timestamp, r.rule) {
logger.Debugf("event_disabled: rule_eval:%d rule:%v timestamp:%d", r.rule.Id, r.rule, vectors[i].Timestamp)
continue
}
// handle series tags
tagsMap := make(map[string]string)
for label, value := range vectors[i].Labels {
tagsMap[string(label)] = string(value)
}
// handle rule tags
for _, tag := range r.rule.AppendTagsJSON {
arr := strings.SplitN(tag, "=", 2)
tagsMap[arr[0]] = arr[1]
}
tagsMap["rulename"] = r.rule.Name
// handle target note
targetIdent, has := tagsMap["ident"]
targetNote := ""
if has {
target, exists := memsto.TargetCache.Get(string(targetIdent))
if exists {
targetNote = target.Note
// 对于包含ident的告警事件check一下ident所属bg和rule所属bg是否相同
// 如果告警规则选择了只在本BG生效那其他BG的机器就不能因此规则产生告警
if r.rule.EnableInBG == 1 && target.GroupId != r.rule.GroupId {
logger.Debugf("event_enable_in_bg: rule_eval:%d", r.rule.Id)
continue
}
} else if strings.Contains(r.rule.PromQl, "target_up") {
// target 已经不存在了,可能是被删除了
continue
}
}
event := &models.AlertCurEvent{
TriggerTime: vectors[i].Timestamp,
TagsMap: tagsMap,
GroupId: r.rule.GroupId,
RuleName: r.rule.Name,
}
bg := memsto.BusiGroupCache.GetByBusiGroupId(r.rule.GroupId)
if bg != nil {
event.GroupName = bg.Name
}
// isMuted only need TriggerTime RuleName and TagsMap
if IsMuted(event) {
logger.Infof("event_muted: rule_id=%d %s", r.rule.Id, vectors[i].Key)
continue
}
tagsArr := labelMapToArr(tagsMap)
sort.Strings(tagsArr)
event.Cluster = clusterName
event.Cate = r.rule.Cate
event.Hash = hash
event.RuleId = r.rule.Id
event.RuleName = r.rule.Name
event.RuleNote = r.rule.Note
event.RuleProd = r.rule.Prod
event.RuleAlgo = r.rule.Algorithm
event.Severity = r.rule.Severity
event.PromForDuration = r.rule.PromForDuration
event.PromQl = r.rule.PromQl
event.PromEvalInterval = r.rule.PromEvalInterval
event.Callbacks = r.rule.Callbacks
event.CallbacksJSON = r.rule.CallbacksJSON
event.RunbookUrl = r.rule.RunbookUrl
event.NotifyRecovered = r.rule.NotifyRecovered
event.NotifyChannels = r.rule.NotifyChannels
event.NotifyChannelsJSON = r.rule.NotifyChannelsJSON
event.NotifyGroups = r.rule.NotifyGroups
event.NotifyGroupsJSON = r.rule.NotifyGroupsJSON
event.TargetIdent = string(targetIdent)
event.TargetNote = targetNote
event.TriggerValue = readableValue(vectors[i].Value)
event.TagsJSON = tagsArr
event.Tags = strings.Join(tagsArr, ",,")
event.IsRecovered = false
event.LastEvalTime = now
if from != "inner" {
event.LastEvalTime = event.TriggerTime
}
r.handleNewEvent(event)
}
return alertingKeys, true
}
func readableValue(value float64) string {
ret := fmt.Sprintf("%.5f", value)
ret = strings.TrimRight(ret, "0")
return strings.TrimRight(ret, ".")
}
func labelMapToArr(m map[string]string) []string {
numLabels := len(m)
labelStrings := make([]string, 0, numLabels)
for label, value := range m {
labelStrings = append(labelStrings, fmt.Sprintf("%s=%s", label, value))
}
if numLabels > 1 {
sort.Strings(labelStrings)
}
return labelStrings
}
func (r *RuleEval) handleNewEvent(event *models.AlertCurEvent) {
if event.PromForDuration == 0 {
r.fireEvent(event)
return
}
var preTriggerTime int64
preEvent, has := r.pendings.Get(event.Hash)
if has {
r.pendings.UpdateLastEvalTime(event.Hash, event.LastEvalTime)
preTriggerTime = preEvent.TriggerTime
} else {
r.pendings.Set(event.Hash, event)
preTriggerTime = event.TriggerTime
}
if event.LastEvalTime-preTriggerTime+int64(event.PromEvalInterval) >= int64(event.PromForDuration) {
r.fireEvent(event)
}
}
func (r *RuleEval) fireEvent(event *models.AlertCurEvent) {
if fired, has := r.fires.Get(event.Hash); has {
r.fires.UpdateLastEvalTime(event.Hash, event.LastEvalTime)
if r.rule.NotifyRepeatStep == 0 {
// 说明不想重复通知那就直接返回了nothing to do
return
}
// 之前发送过告警了,这次是否要继续发送,要看是否过了通道静默时间
if event.LastEvalTime > fired.LastSentTime+int64(r.rule.NotifyRepeatStep)*60 {
if r.rule.NotifyMaxNumber == 0 {
// 最大可以发送次数如果是0表示不想限制最大发送次数一直发即可
event.NotifyCurNumber = fired.NotifyCurNumber + 1
event.FirstTriggerTime = fired.FirstTriggerTime
r.pushEventToQueue(event)
} else {
// 有最大发送次数的限制,就要看已经发了几次了,是否达到了最大发送次数
if fired.NotifyCurNumber >= r.rule.NotifyMaxNumber {
return
} else {
event.NotifyCurNumber = fired.NotifyCurNumber + 1
event.FirstTriggerTime = fired.FirstTriggerTime
r.pushEventToQueue(event)
}
}
}
} else {
event.NotifyCurNumber = 1
event.FirstTriggerTime = event.TriggerTime
r.pushEventToQueue(event)
}
}
func (r *RuleEval) recoverRule(alertingKeys map[string]struct{}, now int64) {
for _, hash := range r.pendings.Keys() {
if _, has := alertingKeys[hash]; has {
continue
}
r.pendings.Delete(hash)
}
for hash, event := range r.fires.GetAll() {
if _, has := alertingKeys[hash]; has {
continue
}
r.recoverEvent(hash, event, now)
}
}
func (r *RuleEval) RecoverEvent(hash string, now int64, value float64) {
curRule := memsto.AlertRuleCache.Get(r.rule.Id)
if curRule == nil {
return
}
r.rule = curRule
r.pendings.Delete(hash)
event, has := r.fires.Get(hash)
if !has {
return
}
event.TriggerValue = fmt.Sprintf("%.5f", value)
r.recoverEvent(hash, event, now)
}
func (r *RuleEval) recoverEvent(hash string, event *models.AlertCurEvent, now int64) {
// 如果配置了留观时长,就不能立马恢复了
if r.rule.RecoverDuration > 0 && now-event.LastEvalTime < r.rule.RecoverDuration {
return
}
// 没查到触发阈值的vector姑且就认为这个vector的值恢复了
// 我确实无法分辨是prom中有值但是未满足阈值所以没返回还是prom中确实丢了一些点导致没有数据可以返回尴尬
r.fires.Delete(hash)
r.pendings.Delete(hash)
event.IsRecovered = true
event.LastEvalTime = now
// 可能是因为调整了promql才恢复的所以事件里边要体现最新的promql否则用户会比较困惑
// 当然其实rule的各个字段都可能发生变化了都更新一下吧
event.RuleName = r.rule.Name
event.RuleNote = r.rule.Note
event.RuleProd = r.rule.Prod
event.RuleAlgo = r.rule.Algorithm
event.Severity = r.rule.Severity
event.PromForDuration = r.rule.PromForDuration
event.PromQl = r.rule.PromQl
event.PromEvalInterval = r.rule.PromEvalInterval
event.Callbacks = r.rule.Callbacks
event.CallbacksJSON = r.rule.CallbacksJSON
event.RunbookUrl = r.rule.RunbookUrl
event.NotifyRecovered = r.rule.NotifyRecovered
event.NotifyChannels = r.rule.NotifyChannels
event.NotifyChannelsJSON = r.rule.NotifyChannelsJSON
event.NotifyGroups = r.rule.NotifyGroups
event.NotifyGroupsJSON = r.rule.NotifyGroupsJSON
r.pushEventToQueue(event)
}
func (r *RuleEval) pushEventToQueue(event *models.AlertCurEvent) {
if !event.IsRecovered {
event.LastSentTime = event.LastEvalTime
r.fires.Set(event.Hash, event)
}
promstat.CounterAlertsTotal.WithLabelValues(event.Cluster).Inc()
LogEvent(event, "push_queue")
if !EventQueue.PushFront(event) {
logger.Warningf("event_push_queue: queue is full")
}
}
func filterRecordingRules() {
ids := memsto.RecordingRuleCache.GetRuleIds()
count := len(ids)
mines := make([]int64, 0, count)
for i := 0; i < count; i++ {
node, err := naming.HashRing.GetNode(fmt.Sprint(ids[i]))
if err != nil {
logger.Warning("failed to get node from hashring:", err)
continue
}
if node == config.C.Heartbeat.Endpoint {
mines = append(mines, ids[i])
}
}
Workers.BuildRe(mines)
}
type RecordingRuleEval struct {
rule *models.RecordingRule
quit chan struct{}
}
func (r RecordingRuleEval) Stop() {
logger.Infof("recording_rule_eval:%d stopping", r.RuleID())
close(r.quit)
}
func (r RecordingRuleEval) RuleID() int64 {
return r.rule.Id
}
func (r RecordingRuleEval) Start() {
logger.Infof("recording_rule_eval:%d started", r.RuleID())
for {
select {
case <-r.quit:
// logger.Infof("rule_eval:%d stopped", r.RuleID())
return
default:
r.Work()
interval := r.rule.PromEvalInterval
if interval <= 0 {
interval = 10
}
time.Sleep(time.Duration(interval) * time.Second)
}
}
}
func (r RecordingRuleEval) Work() {
promql := strings.TrimSpace(r.rule.PromQl)
if promql == "" {
logger.Errorf("recording_rule_eval:%d promql is blank", r.RuleID())
return
}
if config.ReaderClient.IsNil() {
log.Println("reader client is nil")
return
}
value, warnings, err := config.ReaderClient.GetCli().Query(context.Background(), promql, time.Now())
if err != nil {
logger.Errorf("recording_rule_eval:%d promql:%s, error:%v", r.RuleID(), promql, err)
return
}
if len(warnings) > 0 {
logger.Errorf("recording_rule_eval:%d promql:%s, warnings:%v", r.RuleID(), promql, warnings)
return
}
ts := conv.ConvertToTimeSeries(value, r.rule)
if len(ts) != 0 {
for _, v := range ts {
writer.Writers.PushSample(r.rule.Name, v)
}
}
}
type RuleEvalForExternalType struct {
sync.RWMutex
rules map[int64]RuleEval
}
var RuleEvalForExternal = RuleEvalForExternalType{rules: make(map[int64]RuleEval)}
func (re *RuleEvalForExternalType) Build() {
rids := memsto.AlertRuleCache.GetRuleIds()
rules := make(map[int64]*models.AlertRule)
for i := 0; i < len(rids); i++ {
rule := memsto.AlertRuleCache.Get(rids[i])
if rule == nil {
continue
}
re.Lock()
rules[rule.Id] = rule
re.Unlock()
}
// stop old
for rid := range re.rules {
if _, has := rules[rid]; !has {
re.Lock()
delete(re.rules, rid)
re.Unlock()
}
}
// start new
re.Lock()
defer re.Unlock()
for rid := range rules {
if _, has := re.rules[rid]; has {
// already exists
continue
}
elst, err := models.AlertCurEventGetByRule(rules[rid].Id)
if err != nil {
logger.Errorf("worker_build: AlertCurEventGetByRule failed: %v", err)
continue
}
firemap := make(map[string]*models.AlertCurEvent)
for i := 0; i < len(elst); i++ {
elst[i].DB2Mem()
firemap[elst[i].Hash] = elst[i]
}
fires := NewAlertCurEventMap()
fires.SetAll(firemap)
newRe := RuleEval{
rule: rules[rid],
quit: make(chan struct{}),
fires: fires,
pendings: NewAlertCurEventMap(),
}
re.rules[rid] = newRe
}
}
func (re *RuleEvalForExternalType) Get(rid int64) (RuleEval, bool) {
rule := memsto.AlertRuleCache.Get(rid)
if rule == nil {
return RuleEval{}, false
}
re.RLock()
defer re.RUnlock()
if ret, has := re.rules[rid]; has {
// already exists
return ret, has
}
return RuleEval{}, false
}

View File

@@ -41,7 +41,7 @@ func toRedis() {
return
}
if config.ReaderClient.IsNil() {
if config.ReaderClients.IsNil(config.C.ClusterName) {
return
}
@@ -53,7 +53,7 @@ func toRedis() {
Idents.Remove(key)
} else {
// use now as timestamp to redis
err := storage.Redis.HSet(context.Background(), redisKey(config.ReaderClient.GetClusterName()), key, now).Err()
err := storage.Redis.HSet(context.Background(), redisKey(config.C.ClusterName), key, now).Err()
if err != nil {
logger.Errorf("redis hset idents failed: %v", err)
}
@@ -96,7 +96,8 @@ func loopPushMetrics(ctx context.Context) {
}
func pushMetrics() {
isLeader, err := naming.IamLeader()
clusterName := config.C.ClusterName
isLeader, err := naming.IamLeader(clusterName)
if err != nil {
logger.Errorf("handle_idents: %v", err)
return
@@ -107,12 +108,6 @@ func pushMetrics() {
return
}
clusterName := config.ReaderClient.GetClusterName()
if clusterName == "" {
logger.Warning("cluster name is blank")
return
}
// get all the target heartbeat timestamp
ret, err := storage.Redis.HGetAll(context.Background(), redisKey(clusterName)).Result()
if err != nil {

View File

@@ -99,26 +99,26 @@ func loopSyncAlertMutes() {
func syncAlertMutes() error {
start := time.Now()
clusterName := config.ReaderClient.GetClusterName()
if clusterName == "" {
AlertMuteCache.Reset()
logger.Warning("cluster name is blank")
clusterNames := config.ReaderClients.GetClusterNames()
if len(clusterNames) == 0 {
AlertRuleCache.Reset()
logger.Warning("cluster is blank")
return nil
}
stat, err := models.AlertMuteStatistics(clusterName)
stat, err := models.AlertMuteStatistics("")
if err != nil {
return errors.WithMessage(err, "failed to exec AlertMuteStatistics")
}
if !AlertMuteCache.StatChanged(stat.Total, stat.LastUpdated) {
promstat.GaugeCronDuration.WithLabelValues(clusterName, "sync_alert_mutes").Set(0)
promstat.GaugeSyncNumber.WithLabelValues(clusterName, "sync_alert_mutes").Set(0)
promstat.GaugeCronDuration.WithLabelValues("sync_alert_mutes").Set(0)
promstat.GaugeSyncNumber.WithLabelValues("sync_alert_mutes").Set(0)
logger.Debug("alert mutes not changed")
return nil
}
lst, err := models.AlertMuteGetsByCluster(clusterName)
lst, err := models.AlertMuteGetsByCluster("")
if err != nil {
return errors.WithMessage(err, "failed to exec AlertMuteGetsByCluster")
}
@@ -138,8 +138,8 @@ func syncAlertMutes() error {
AlertMuteCache.Set(oks, stat.Total, stat.LastUpdated)
ms := time.Since(start).Milliseconds()
promstat.GaugeCronDuration.WithLabelValues(clusterName, "sync_alert_mutes").Set(float64(ms))
promstat.GaugeSyncNumber.WithLabelValues(clusterName, "sync_alert_mutes").Set(float64(len(lst)))
promstat.GaugeCronDuration.WithLabelValues("sync_alert_mutes").Set(float64(ms))
promstat.GaugeSyncNumber.WithLabelValues("sync_alert_mutes").Set(float64(len(lst)))
logger.Infof("timer: sync mutes done, cost: %dms, number: %d", ms, len(lst))
return nil

View File

@@ -96,26 +96,26 @@ func loopSyncAlertRules() {
func syncAlertRules() error {
start := time.Now()
clusterName := config.ReaderClient.GetClusterName()
if clusterName == "" {
clusterNames := config.ReaderClients.GetClusterNames()
if len(clusterNames) == 0 {
AlertRuleCache.Reset()
logger.Warning("cluster name is blank")
logger.Warning("cluster is blank")
return nil
}
stat, err := models.AlertRuleStatistics(clusterName)
stat, err := models.AlertRuleStatistics("")
if err != nil {
return errors.WithMessage(err, "failed to exec AlertRuleStatistics")
}
if !AlertRuleCache.StatChanged(stat.Total, stat.LastUpdated) {
promstat.GaugeCronDuration.WithLabelValues(clusterName, "sync_alert_rules").Set(0)
promstat.GaugeSyncNumber.WithLabelValues(clusterName, "sync_alert_rules").Set(0)
promstat.GaugeCronDuration.WithLabelValues("sync_alert_rules").Set(0)
promstat.GaugeSyncNumber.WithLabelValues("sync_alert_rules").Set(0)
logger.Debug("alert rules not changed")
return nil
}
lst, err := models.AlertRuleGetsByCluster(clusterName)
lst, err := models.AlertRuleGetsByCluster("")
if err != nil {
return errors.WithMessage(err, "failed to exec AlertRuleGetsByCluster")
}
@@ -128,8 +128,8 @@ func syncAlertRules() error {
AlertRuleCache.Set(m, stat.Total, stat.LastUpdated)
ms := time.Since(start).Milliseconds()
promstat.GaugeCronDuration.WithLabelValues(clusterName, "sync_alert_rules").Set(float64(ms))
promstat.GaugeSyncNumber.WithLabelValues(clusterName, "sync_alert_rules").Set(float64(len(m)))
promstat.GaugeCronDuration.WithLabelValues("sync_alert_rules").Set(float64(ms))
promstat.GaugeSyncNumber.WithLabelValues("sync_alert_rules").Set(float64(len(m)))
logger.Infof("timer: sync rules done, cost: %dms, number: %d", ms, len(m))
return nil

View File

@@ -102,26 +102,26 @@ func loopSyncAlertSubscribes() {
func syncAlertSubscribes() error {
start := time.Now()
clusterName := config.ReaderClient.GetClusterName()
if clusterName == "" {
clusterNames := config.ReaderClients.GetClusterNames()
if len(clusterNames) == 0 {
AlertSubscribeCache.Reset()
logger.Warning("cluster name is blank")
logger.Warning("cluster is blank")
return nil
}
stat, err := models.AlertSubscribeStatistics(clusterName)
stat, err := models.AlertSubscribeStatistics("")
if err != nil {
return errors.WithMessage(err, "failed to exec AlertSubscribeStatistics")
}
if !AlertSubscribeCache.StatChanged(stat.Total, stat.LastUpdated) {
promstat.GaugeCronDuration.WithLabelValues(clusterName, "sync_alert_subscribes").Set(0)
promstat.GaugeSyncNumber.WithLabelValues(clusterName, "sync_alert_subscribes").Set(0)
promstat.GaugeCronDuration.WithLabelValues("sync_alert_subscribes").Set(0)
promstat.GaugeSyncNumber.WithLabelValues("sync_alert_subscribes").Set(0)
logger.Debug("alert subscribes not changed")
return nil
}
lst, err := models.AlertSubscribeGetsByCluster(clusterName)
lst, err := models.AlertSubscribeGetsByCluster("")
if err != nil {
return errors.WithMessage(err, "failed to exec AlertSubscribeGetsByCluster")
}
@@ -141,8 +141,8 @@ func syncAlertSubscribes() error {
AlertSubscribeCache.Set(subs, stat.Total, stat.LastUpdated)
ms := time.Since(start).Milliseconds()
promstat.GaugeCronDuration.WithLabelValues(clusterName, "sync_alert_subscribes").Set(float64(ms))
promstat.GaugeSyncNumber.WithLabelValues(clusterName, "sync_alert_subscribes").Set(float64(len(lst)))
promstat.GaugeCronDuration.WithLabelValues("sync_alert_subscribes").Set(float64(ms))
promstat.GaugeSyncNumber.WithLabelValues("sync_alert_subscribes").Set(float64(len(lst)))
logger.Infof("timer: sync subscribes done, cost: %dms, number: %d", ms, len(lst))
return nil

View File

@@ -9,7 +9,6 @@ import (
"github.com/toolkits/pkg/logger"
"github.com/didi/nightingale/v5/src/models"
"github.com/didi/nightingale/v5/src/server/config"
promstat "github.com/didi/nightingale/v5/src/server/stat"
)
@@ -79,13 +78,9 @@ func syncBusiGroups() error {
return errors.WithMessage(err, "failed to exec BusiGroupStatistics")
}
clusterName := config.ReaderClient.GetClusterName()
if !BusiGroupCache.StatChanged(stat.Total, stat.LastUpdated) {
if clusterName != "" {
promstat.GaugeCronDuration.WithLabelValues(clusterName, "sync_busi_groups").Set(0)
promstat.GaugeSyncNumber.WithLabelValues(clusterName, "sync_busi_groups").Set(0)
}
promstat.GaugeCronDuration.WithLabelValues("sync_busi_groups").Set(0)
promstat.GaugeSyncNumber.WithLabelValues("sync_busi_groups").Set(0)
logger.Debug("busi_group not changed")
return nil
@@ -99,10 +94,8 @@ func syncBusiGroups() error {
BusiGroupCache.Set(m, stat.Total, stat.LastUpdated)
ms := time.Since(start).Milliseconds()
if clusterName != "" {
promstat.GaugeCronDuration.WithLabelValues(clusterName, "sync_busi_groups").Set(float64(ms))
promstat.GaugeSyncNumber.WithLabelValues(clusterName, "sync_busi_groups").Set(float64(len(m)))
}
promstat.GaugeCronDuration.WithLabelValues("sync_busi_groups").Set(float64(ms))
promstat.GaugeSyncNumber.WithLabelValues("sync_busi_groups").Set(float64(len(m)))
logger.Infof("timer: sync busi groups done, cost: %dms, number: %d", ms, len(m))

View File

@@ -0,0 +1,44 @@
package memsto
import (
"sync"
)
type LogSampleCacheType struct {
sync.RWMutex
m map[string]map[string]struct{} // map[labelName]map[labelValue]struct{}
}
var LogSampleCache = LogSampleCacheType{
m: make(map[string]map[string]struct{}),
}
func (l *LogSampleCacheType) Set(m map[string][]string) {
l.Lock()
for k, v := range m {
l.m[k] = make(map[string]struct{})
for _, vv := range v {
l.m[k][vv] = struct{}{}
}
}
l.Unlock()
}
func (l *LogSampleCacheType) Get() map[string]map[string]struct{} {
l.RLock()
defer l.RUnlock()
return l.m
}
func (l *LogSampleCacheType) Clean() {
l.Lock()
l.m = make(map[string]map[string]struct{})
l.Unlock()
}
func (l *LogSampleCacheType) Len() int {
l.RLock()
defer l.RUnlock()
return len(l.m)
}

View File

@@ -95,21 +95,27 @@ func loopSyncRecordingRules() {
func syncRecordingRules() error {
start := time.Now()
clusterName := config.ReaderClient.GetClusterName()
if clusterName == "" {
clusterNames := config.ReaderClients.GetClusterNames()
if len(clusterNames) == 0 {
RecordingRuleCache.Reset()
logger.Warning("cluster name is blank")
logger.Warning("cluster is blank")
return nil
}
var clusterName string
// 只有一个集群使用单集群模式如果大于1个集群则获取全部的规则
if len(clusterNames) == 1 {
clusterName = clusterNames[0]
}
stat, err := models.RecordingRuleStatistics(clusterName)
if err != nil {
return errors.WithMessage(err, "failed to exec RecordingRuleStatistics")
}
if !RecordingRuleCache.StatChanged(stat.Total, stat.LastUpdated) {
promstat.GaugeCronDuration.WithLabelValues(clusterName, "sync_recording_rules").Set(0)
promstat.GaugeSyncNumber.WithLabelValues(clusterName, "sync_recording_rules").Set(0)
promstat.GaugeCronDuration.WithLabelValues("sync_recording_rules").Set(0)
promstat.GaugeSyncNumber.WithLabelValues("sync_recording_rules").Set(0)
logger.Debug("recoding rules not changed")
return nil
}
@@ -127,8 +133,8 @@ func syncRecordingRules() error {
RecordingRuleCache.Set(m, stat.Total, stat.LastUpdated)
ms := time.Since(start).Milliseconds()
promstat.GaugeCronDuration.WithLabelValues(clusterName, "sync_recording_rules").Set(float64(ms))
promstat.GaugeSyncNumber.WithLabelValues(clusterName, "sync_recording_rules").Set(float64(len(m)))
promstat.GaugeCronDuration.WithLabelValues("sync_recording_rules").Set(float64(ms))
promstat.GaugeSyncNumber.WithLabelValues("sync_recording_rules").Set(float64(len(m)))
logger.Infof("timer: sync recording rules done, cost: %dms, number: %d", ms, len(m))
return nil

View File

@@ -103,7 +103,7 @@ func loopSyncTargets() {
func syncTargets() error {
start := time.Now()
clusterName := config.ReaderClient.GetClusterName()
clusterName := config.C.ClusterName
if clusterName == "" {
TargetCache.Reset()
logger.Warning("cluster name is blank")
@@ -116,8 +116,8 @@ func syncTargets() error {
}
if !TargetCache.StatChanged(stat.Total, stat.LastUpdated) {
promstat.GaugeCronDuration.WithLabelValues(clusterName, "sync_targets").Set(0)
promstat.GaugeSyncNumber.WithLabelValues(clusterName, "sync_targets").Set(0)
promstat.GaugeCronDuration.WithLabelValues("sync_targets").Set(0)
promstat.GaugeSyncNumber.WithLabelValues("sync_targets").Set(0)
logger.Debug("targets not changed")
return nil
}
@@ -145,8 +145,8 @@ func syncTargets() error {
TargetCache.Set(m, stat.Total, stat.LastUpdated)
ms := time.Since(start).Milliseconds()
promstat.GaugeCronDuration.WithLabelValues(clusterName, "sync_targets").Set(float64(ms))
promstat.GaugeSyncNumber.WithLabelValues(clusterName, "sync_targets").Set(float64(len(lst)))
promstat.GaugeCronDuration.WithLabelValues("sync_targets").Set(float64(ms))
promstat.GaugeSyncNumber.WithLabelValues("sync_targets").Set(float64(len(lst)))
logger.Infof("timer: sync targets done, cost: %dms, number: %d", ms, len(lst))
return nil

View File

@@ -9,7 +9,6 @@ import (
"github.com/toolkits/pkg/logger"
"github.com/didi/nightingale/v5/src/models"
"github.com/didi/nightingale/v5/src/server/config"
promstat "github.com/didi/nightingale/v5/src/server/stat"
)
@@ -124,13 +123,9 @@ func syncUsers() error {
return errors.WithMessage(err, "failed to exec UserStatistics")
}
clusterName := config.ReaderClient.GetClusterName()
if !UserCache.StatChanged(stat.Total, stat.LastUpdated) {
if clusterName != "" {
promstat.GaugeCronDuration.WithLabelValues(clusterName, "sync_users").Set(0)
promstat.GaugeSyncNumber.WithLabelValues(clusterName, "sync_users").Set(0)
}
promstat.GaugeCronDuration.WithLabelValues("sync_users").Set(0)
promstat.GaugeSyncNumber.WithLabelValues("sync_users").Set(0)
logger.Debug("users not changed")
return nil
@@ -149,10 +144,8 @@ func syncUsers() error {
UserCache.Set(m, stat.Total, stat.LastUpdated)
ms := time.Since(start).Milliseconds()
if clusterName != "" {
promstat.GaugeCronDuration.WithLabelValues(clusterName, "sync_users").Set(float64(ms))
promstat.GaugeSyncNumber.WithLabelValues(clusterName, "sync_users").Set(float64(len(m)))
}
promstat.GaugeCronDuration.WithLabelValues("sync_users").Set(float64(ms))
promstat.GaugeSyncNumber.WithLabelValues("sync_users").Set(float64(len(m)))
logger.Infof("timer: sync users done, cost: %dms, number: %d", ms, len(m))

View File

@@ -9,7 +9,6 @@ import (
"github.com/toolkits/pkg/logger"
"github.com/didi/nightingale/v5/src/models"
"github.com/didi/nightingale/v5/src/server/config"
promstat "github.com/didi/nightingale/v5/src/server/stat"
)
@@ -106,13 +105,9 @@ func syncUserGroups() error {
return errors.WithMessage(err, "failed to exec UserGroupStatistics")
}
clusterName := config.ReaderClient.GetClusterName()
if !UserGroupCache.StatChanged(stat.Total, stat.LastUpdated) {
if clusterName != "" {
promstat.GaugeCronDuration.WithLabelValues(clusterName, "sync_user_groups").Set(0)
promstat.GaugeSyncNumber.WithLabelValues(clusterName, "sync_user_groups").Set(0)
}
promstat.GaugeCronDuration.WithLabelValues("sync_user_groups").Set(0)
promstat.GaugeSyncNumber.WithLabelValues("sync_user_groups").Set(0)
logger.Debug("user_group not changed")
return nil
@@ -150,10 +145,8 @@ func syncUserGroups() error {
UserGroupCache.Set(m, stat.Total, stat.LastUpdated)
ms := time.Since(start).Milliseconds()
if clusterName != "" {
promstat.GaugeCronDuration.WithLabelValues(clusterName, "sync_user_groups").Set(float64(ms))
promstat.GaugeSyncNumber.WithLabelValues(clusterName, "sync_user_groups").Set(float64(len(m)))
}
promstat.GaugeCronDuration.WithLabelValues("sync_user_groups").Set(float64(ms))
promstat.GaugeSyncNumber.WithLabelValues("sync_user_groups").Set(float64(len(m)))
logger.Infof("timer: sync user groups done, cost: %dms, number: %d", ms, len(m))

View File

@@ -9,51 +9,56 @@ import (
const NodeReplicas = 500
type ConsistentHashRing struct {
type ClusterHashRingType struct {
sync.RWMutex
ring *consistent.Consistent
Rings map[string]*consistent.Consistent
}
// for alert_rule sharding
var HashRing = NewConsistentHashRing(int32(NodeReplicas), []string{})
var ClusterHashRing = ClusterHashRingType{Rings: make(map[string]*consistent.Consistent)}
func (chr *ConsistentHashRing) GetNode(pk string) (string, error) {
chr.RLock()
defer chr.RUnlock()
return chr.ring.Get(pk)
}
func (chr *ConsistentHashRing) Set(r *consistent.Consistent) {
chr.Lock()
defer chr.Unlock()
chr.ring = r
}
func (chr *ConsistentHashRing) GetRing() *consistent.Consistent {
chr.RLock()
defer chr.RUnlock()
return chr.ring
}
func NewConsistentHashRing(replicas int32, nodes []string) *ConsistentHashRing {
ret := &ConsistentHashRing{ring: consistent.New()}
ret.ring.NumberOfReplicas = int(replicas)
func NewConsistentHashRing(replicas int32, nodes []string) *consistent.Consistent {
ret := consistent.New()
ret.NumberOfReplicas = int(replicas)
for i := 0; i < len(nodes); i++ {
ret.ring.Add(nodes[i])
ret.Add(nodes[i])
}
return ret
}
func RebuildConsistentHashRing(nodes []string) {
func RebuildConsistentHashRing(cluster string, nodes []string) {
r := consistent.New()
r.NumberOfReplicas = NodeReplicas
for i := 0; i < len(nodes); i++ {
r.Add(nodes[i])
}
HashRing.Set(r)
logger.Infof("hash ring rebuild %+v", r.Members())
ClusterHashRing.Set(cluster, r)
logger.Infof("hash ring %s rebuild %+v", cluster, r.Members())
}
func (chr *ClusterHashRingType) GetNode(cluster, pk string) (string, error) {
chr.RLock()
defer chr.RUnlock()
_, exists := chr.Rings[cluster]
if !exists {
chr.Rings[cluster] = NewConsistentHashRing(int32(NodeReplicas), []string{})
}
return chr.Rings[cluster].Get(pk)
}
func (chr *ClusterHashRingType) IsHit(cluster string, pk string, currentNode string) bool {
node, err := chr.GetNode(cluster, pk)
if err != nil {
logger.Debugf("cluster:%s pk:%s failed to get node from hashring:%v", cluster, pk, err)
return false
}
return node == currentNode
}
func (chr *ClusterHashRingType) Set(cluster string, r *consistent.Consistent) {
chr.RLock()
defer chr.RUnlock()
chr.Rings[cluster] = r
}

View File

@@ -14,9 +14,10 @@ import (
)
// local servers
var localss string
var localss map[string]string
func Heartbeat(ctx context.Context) error {
localss = make(map[string]string)
if err := heartbeat(); err != nil {
fmt.Println("failed to heartbeat:", err)
return err
@@ -37,35 +38,66 @@ func loopHeartbeat() {
}
func heartbeat() error {
cluster := ""
var clusters []string
var err error
if config.C.ReaderFrom == "config" {
cluster = config.C.ClusterName
// 在配置文件维护实例和集群的对应关系
for i := 0; i < len(config.C.Readers); i++ {
clusters = append(clusters, config.C.Readers[i].ClusterName)
err := models.AlertingEngineHeartbeatWithCluster(config.C.Heartbeat.Endpoint, config.C.Readers[i].ClusterName)
if err != nil {
logger.Warningf("heartbeat with cluster %s err:%v", config.C.Readers[i].ClusterName, err)
continue
}
}
} else {
// 在页面上维护实例和集群的对应关系
clusters, err = models.AlertingEngineGetClusters(config.C.Heartbeat.Endpoint)
if err != nil {
return err
}
if len(clusters) == 0 {
// 实例刚刚部署,还没有在页面配置 cluster 的情况,先使用配置文件中的 cluster 上报心跳
for i := 0; i < len(config.C.Readers); i++ {
err := models.AlertingEngineHeartbeatWithCluster(config.C.Heartbeat.Endpoint, config.C.Readers[i].ClusterName)
if err != nil {
logger.Warningf("heartbeat with cluster %s err:%v", config.C.Readers[i].ClusterName, err)
continue
}
}
}
err := models.AlertingEngineHeartbeat(config.C.Heartbeat.Endpoint)
if err != nil {
return err
}
}
err := models.AlertingEngineHeartbeat(config.C.Heartbeat.Endpoint, cluster)
if err != nil {
return err
}
for i := 0; i < len(clusters); i++ {
servers, err := ActiveServers(clusters[i])
if err != nil {
logger.Warningf("hearbeat %s get active server err:", clusters[i], err)
continue
}
servers, err := ActiveServers()
if err != nil {
return err
}
sort.Strings(servers)
newss := strings.Join(servers, " ")
sort.Strings(servers)
newss := strings.Join(servers, " ")
if newss != localss {
RebuildConsistentHashRing(servers)
localss = newss
oldss, exists := localss[clusters[i]]
if exists && oldss == newss {
continue
}
RebuildConsistentHashRing(clusters[i], servers)
localss[clusters[i]] = newss
}
return nil
}
func ActiveServers() ([]string, error) {
cluster, err := models.AlertingEngineGetCluster(config.C.Heartbeat.Endpoint)
if err != nil {
return nil, err
func ActiveServers(cluster string) ([]string, error) {
if cluster == "" {
return nil, fmt.Errorf("cluster is empty")
}
// 30秒内有心跳就认为是活的

View File

@@ -7,8 +7,8 @@ import (
"github.com/toolkits/pkg/logger"
)
func IamLeader() (bool, error) {
servers, err := ActiveServers()
func IamLeader(cluster string) (bool, error) {
servers, err := ActiveServers(cluster)
if err != nil {
logger.Errorf("failed to get active servers: %v", err)
return false, err

View File

@@ -14,7 +14,6 @@ import (
"github.com/didi/nightingale/v5/src/pkg/aop"
"github.com/didi/nightingale/v5/src/server/config"
"github.com/didi/nightingale/v5/src/server/naming"
promstat "github.com/didi/nightingale/v5/src/server/stat"
)
@@ -69,7 +68,7 @@ func configRoute(r *gin.Engine, version string, reloadFunc func()) {
})
r.GET("/servers/active", func(c *gin.Context) {
lst, err := naming.ActiveServers()
lst, err := naming.ActiveServers(ginx.QueryStr(c, "cluster"))
ginx.NewRender(c).Data(lst, err)
})
@@ -101,6 +100,10 @@ func configRoute(r *gin.Engine, version string, reloadFunc func()) {
r.GET("/metrics", gin.WrapH(promhttp.Handler()))
r.GET("/log-sample-filter", logSampleFilterGet)
r.POST("/log-sample-filter", logSampleFilterAdd)
r.DELETE("/log-sample-filter", logSampleFilterDel)
service := r.Group("/v1/n9e")
service.POST("/event", pushEventToQueue)
service.POST("/make-event", makeEvent)

View File

@@ -3,7 +3,6 @@ package router
import (
"compress/gzip"
"compress/zlib"
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
@@ -17,14 +16,17 @@ import (
promstat "github.com/didi/nightingale/v5/src/server/stat"
"github.com/didi/nightingale/v5/src/server/writer"
"github.com/gin-gonic/gin"
"github.com/mailru/easyjson"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/prompb"
)
//easyjson:json
type TimeSeries struct {
Series []*DatadogMetric `json:"series"`
}
//easyjson:json
type DatadogMetric struct {
Metric string `json:"metric"`
Points []DatadogPoint `json:"points"`
@@ -32,6 +34,7 @@ type DatadogMetric struct {
Tags []string `json:"tags,omitempty"`
}
//easyjson:json
type DatadogPoint [2]float64
func (m *DatadogMetric) Clean() error {
@@ -214,7 +217,7 @@ func datadogSeries(c *gin.Context) {
}
var series TimeSeries
err = json.Unmarshal(bs, &series)
err = easyjson.Unmarshal(bs, &series)
if err != nil {
c.String(400, err.Error())
return
@@ -263,13 +266,22 @@ func datadogSeries(c *gin.Context) {
}
}
writer.Writers.PushSample(item.Metric, pt)
LogSample(c.Request.RemoteAddr, pt)
if config.C.WriterOpt.ShardingKey == "ident" {
if ident == "" {
writer.Writers.PushSample("-", pt)
} else {
writer.Writers.PushSample(ident, pt)
}
} else {
writer.Writers.PushSample(item.Metric, pt)
}
succ++
}
if succ > 0 {
cn := config.ReaderClient.GetClusterName()
cn := config.C.ClusterName
if cn != "" {
promstat.CounterSampleTotal.WithLabelValues(cn, "datadog").Add(float64(succ))
}

View File

@@ -0,0 +1,334 @@
// Code generated by easyjson for marshaling/unmarshaling. DO NOT EDIT.
package router
import (
json "encoding/json"
easyjson "github.com/mailru/easyjson"
jlexer "github.com/mailru/easyjson/jlexer"
jwriter "github.com/mailru/easyjson/jwriter"
)
// suppress unused package warning
var (
_ *json.RawMessage
_ *jlexer.Lexer
_ *jwriter.Writer
_ easyjson.Marshaler
)
func easyjsonF301f710DecodeGithubComDidiNightingaleV5SrcServerRouter(in *jlexer.Lexer, out *TimeSeries) {
isTopLevel := in.IsStart()
if in.IsNull() {
if isTopLevel {
in.Consumed()
}
in.Skip()
return
}
in.Delim('{')
for !in.IsDelim('}') {
key := in.UnsafeFieldName(false)
in.WantColon()
if in.IsNull() {
in.Skip()
in.WantComma()
continue
}
switch key {
case "series":
if in.IsNull() {
in.Skip()
out.Series = nil
} else {
in.Delim('[')
if out.Series == nil {
if !in.IsDelim(']') {
out.Series = make([]*DatadogMetric, 0, 8)
} else {
out.Series = []*DatadogMetric{}
}
} else {
out.Series = (out.Series)[:0]
}
for !in.IsDelim(']') {
var v1 *DatadogMetric
if in.IsNull() {
in.Skip()
v1 = nil
} else {
if v1 == nil {
v1 = new(DatadogMetric)
}
(*v1).UnmarshalEasyJSON(in)
}
out.Series = append(out.Series, v1)
in.WantComma()
}
in.Delim(']')
}
default:
in.SkipRecursive()
}
in.WantComma()
}
in.Delim('}')
if isTopLevel {
in.Consumed()
}
}
func easyjsonF301f710EncodeGithubComDidiNightingaleV5SrcServerRouter(out *jwriter.Writer, in TimeSeries) {
out.RawByte('{')
first := true
_ = first
{
const prefix string = ",\"series\":"
out.RawString(prefix[1:])
if in.Series == nil && (out.Flags&jwriter.NilSliceAsEmpty) == 0 {
out.RawString("null")
} else {
out.RawByte('[')
for v2, v3 := range in.Series {
if v2 > 0 {
out.RawByte(',')
}
if v3 == nil {
out.RawString("null")
} else {
(*v3).MarshalEasyJSON(out)
}
}
out.RawByte(']')
}
}
out.RawByte('}')
}
// MarshalJSON supports json.Marshaler interface
func (v TimeSeries) MarshalJSON() ([]byte, error) {
w := jwriter.Writer{}
easyjsonF301f710EncodeGithubComDidiNightingaleV5SrcServerRouter(&w, v)
return w.Buffer.BuildBytes(), w.Error
}
// MarshalEasyJSON supports easyjson.Marshaler interface
func (v TimeSeries) MarshalEasyJSON(w *jwriter.Writer) {
easyjsonF301f710EncodeGithubComDidiNightingaleV5SrcServerRouter(w, v)
}
// UnmarshalJSON supports json.Unmarshaler interface
func (v *TimeSeries) UnmarshalJSON(data []byte) error {
r := jlexer.Lexer{Data: data}
easyjsonF301f710DecodeGithubComDidiNightingaleV5SrcServerRouter(&r, v)
return r.Error()
}
// UnmarshalEasyJSON supports easyjson.Unmarshaler interface
func (v *TimeSeries) UnmarshalEasyJSON(l *jlexer.Lexer) {
easyjsonF301f710DecodeGithubComDidiNightingaleV5SrcServerRouter(l, v)
}
func easyjsonF301f710DecodeGithubComDidiNightingaleV5SrcServerRouter1(in *jlexer.Lexer, out *DatadogPoint) {
isTopLevel := in.IsStart()
if in.IsNull() {
in.Skip()
} else {
in.Delim('[')
v4 := 0
for !in.IsDelim(']') {
if v4 < 2 {
(*out)[v4] = float64(in.Float64())
v4++
} else {
in.SkipRecursive()
}
in.WantComma()
}
in.Delim(']')
}
if isTopLevel {
in.Consumed()
}
}
func easyjsonF301f710EncodeGithubComDidiNightingaleV5SrcServerRouter1(out *jwriter.Writer, in DatadogPoint) {
out.RawByte('[')
for v5 := range in {
if v5 > 0 {
out.RawByte(',')
}
out.Float64(float64((in)[v5]))
}
out.RawByte(']')
}
// MarshalJSON supports json.Marshaler interface
func (v DatadogPoint) MarshalJSON() ([]byte, error) {
w := jwriter.Writer{}
easyjsonF301f710EncodeGithubComDidiNightingaleV5SrcServerRouter1(&w, v)
return w.Buffer.BuildBytes(), w.Error
}
// MarshalEasyJSON supports easyjson.Marshaler interface
func (v DatadogPoint) MarshalEasyJSON(w *jwriter.Writer) {
easyjsonF301f710EncodeGithubComDidiNightingaleV5SrcServerRouter1(w, v)
}
// UnmarshalJSON supports json.Unmarshaler interface
func (v *DatadogPoint) UnmarshalJSON(data []byte) error {
r := jlexer.Lexer{Data: data}
easyjsonF301f710DecodeGithubComDidiNightingaleV5SrcServerRouter1(&r, v)
return r.Error()
}
// UnmarshalEasyJSON supports easyjson.Unmarshaler interface
func (v *DatadogPoint) UnmarshalEasyJSON(l *jlexer.Lexer) {
easyjsonF301f710DecodeGithubComDidiNightingaleV5SrcServerRouter1(l, v)
}
func easyjsonF301f710DecodeGithubComDidiNightingaleV5SrcServerRouter2(in *jlexer.Lexer, out *DatadogMetric) {
isTopLevel := in.IsStart()
if in.IsNull() {
if isTopLevel {
in.Consumed()
}
in.Skip()
return
}
in.Delim('{')
for !in.IsDelim('}') {
key := in.UnsafeFieldName(false)
in.WantColon()
if in.IsNull() {
in.Skip()
in.WantComma()
continue
}
switch key {
case "metric":
out.Metric = string(in.String())
case "points":
if in.IsNull() {
in.Skip()
out.Points = nil
} else {
in.Delim('[')
if out.Points == nil {
if !in.IsDelim(']') {
out.Points = make([]DatadogPoint, 0, 4)
} else {
out.Points = []DatadogPoint{}
}
} else {
out.Points = (out.Points)[:0]
}
for !in.IsDelim(']') {
var v6 DatadogPoint
(v6).UnmarshalEasyJSON(in)
out.Points = append(out.Points, v6)
in.WantComma()
}
in.Delim(']')
}
case "host":
out.Host = string(in.String())
case "tags":
if in.IsNull() {
in.Skip()
out.Tags = nil
} else {
in.Delim('[')
if out.Tags == nil {
if !in.IsDelim(']') {
out.Tags = make([]string, 0, 4)
} else {
out.Tags = []string{}
}
} else {
out.Tags = (out.Tags)[:0]
}
for !in.IsDelim(']') {
var v7 string
v7 = string(in.String())
out.Tags = append(out.Tags, v7)
in.WantComma()
}
in.Delim(']')
}
default:
in.SkipRecursive()
}
in.WantComma()
}
in.Delim('}')
if isTopLevel {
in.Consumed()
}
}
func easyjsonF301f710EncodeGithubComDidiNightingaleV5SrcServerRouter2(out *jwriter.Writer, in DatadogMetric) {
out.RawByte('{')
first := true
_ = first
{
const prefix string = ",\"metric\":"
out.RawString(prefix[1:])
out.String(string(in.Metric))
}
{
const prefix string = ",\"points\":"
out.RawString(prefix)
if in.Points == nil && (out.Flags&jwriter.NilSliceAsEmpty) == 0 {
out.RawString("null")
} else {
out.RawByte('[')
for v8, v9 := range in.Points {
if v8 > 0 {
out.RawByte(',')
}
(v9).MarshalEasyJSON(out)
}
out.RawByte(']')
}
}
{
const prefix string = ",\"host\":"
out.RawString(prefix)
out.String(string(in.Host))
}
if len(in.Tags) != 0 {
const prefix string = ",\"tags\":"
out.RawString(prefix)
{
out.RawByte('[')
for v10, v11 := range in.Tags {
if v10 > 0 {
out.RawByte(',')
}
out.String(string(v11))
}
out.RawByte(']')
}
}
out.RawByte('}')
}
// MarshalJSON supports json.Marshaler interface
func (v DatadogMetric) MarshalJSON() ([]byte, error) {
w := jwriter.Writer{}
easyjsonF301f710EncodeGithubComDidiNightingaleV5SrcServerRouter2(&w, v)
return w.Buffer.BuildBytes(), w.Error
}
// MarshalEasyJSON supports easyjson.Marshaler interface
func (v DatadogMetric) MarshalEasyJSON(w *jwriter.Writer) {
easyjsonF301f710EncodeGithubComDidiNightingaleV5SrcServerRouter2(w, v)
}
// UnmarshalJSON supports json.Unmarshaler interface
func (v *DatadogMetric) UnmarshalJSON(data []byte) error {
r := jlexer.Lexer{Data: data}
easyjsonF301f710DecodeGithubComDidiNightingaleV5SrcServerRouter2(&r, v)
return r.Error()
}
// UnmarshalEasyJSON supports easyjson.Unmarshaler interface
func (v *DatadogMetric) UnmarshalEasyJSON(l *jlexer.Lexer) {
easyjsonF301f710DecodeGithubComDidiNightingaleV5SrcServerRouter2(l, v)
}

View File

@@ -0,0 +1,18 @@
// Code generated by easyjson for marshaling/unmarshaling. DO NOT EDIT.
package router
import (
json "encoding/json"
easyjson "github.com/mailru/easyjson"
jlexer "github.com/mailru/easyjson/jlexer"
jwriter "github.com/mailru/easyjson/jwriter"
)
// suppress unused package warning
var (
_ *json.RawMessage
_ *jlexer.Lexer
_ *jwriter.Writer
_ easyjson.Marshaler
)

View File

@@ -5,16 +5,17 @@ import (
"strings"
"time"
"github.com/didi/nightingale/v5/src/models"
"github.com/didi/nightingale/v5/src/server/common/conv"
"github.com/didi/nightingale/v5/src/server/config"
"github.com/didi/nightingale/v5/src/server/engine"
promstat "github.com/didi/nightingale/v5/src/server/stat"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/str"
"github.com/didi/nightingale/v5/src/models"
"github.com/didi/nightingale/v5/src/pkg/poster"
"github.com/didi/nightingale/v5/src/server/common/conv"
"github.com/didi/nightingale/v5/src/server/config"
"github.com/didi/nightingale/v5/src/server/engine"
"github.com/didi/nightingale/v5/src/server/naming"
promstat "github.com/didi/nightingale/v5/src/server/stat"
)
func pushEventToQueue(c *gin.Context) {
@@ -39,14 +40,17 @@ func pushEventToQueue(c *gin.Context) {
event.TagsMap[arr[0]] = arr[1]
}
// isMuted only need TriggerTime RuleName and TagsMap
if engine.IsMuted(event) {
if engine.EventMuteStra.IsMuted(nil, event) {
logger.Infof("event_muted: rule_id=%d %s", event.RuleId, event.Hash)
ginx.NewRender(c).Message(nil)
return
}
if err := event.ParseRuleNote(); err != nil {
if err := event.ParseRule("rule_name"); err != nil {
event.RuleName = fmt.Sprintf("failed to parse rule name: %v", err)
}
if err := event.ParseRule("rule_note"); err != nil {
event.RuleNote = fmt.Sprintf("failed to parse rule note: %v", err)
}
@@ -63,10 +67,7 @@ func pushEventToQueue(c *gin.Context) {
event.NotifyChannels = strings.Join(event.NotifyChannelsJSON, " ")
event.NotifyGroups = strings.Join(event.NotifyGroupsJSON, " ")
cn := config.ReaderClient.GetClusterName()
if cn != "" {
promstat.CounterAlertsTotal.WithLabelValues(cn).Inc()
}
promstat.CounterAlertsTotal.WithLabelValues(event.Cluster).Inc()
engine.LogEvent(event, "http_push_queue")
if !engine.EventQueue.PushFront(event) {
@@ -87,34 +88,60 @@ type eventForm struct {
func judgeEvent(c *gin.Context) {
var form eventForm
ginx.BindJSON(c, &form)
re, exists := engine.RuleEvalForExternal.Get(form.RuleId)
ruleContext, exists := engine.GetExternalAlertRule(form.Cluster, form.RuleId)
if !exists {
ginx.Bomb(200, "rule not exists")
}
re.Judge(form.Cluster, form.Vectors)
ruleContext.HandleVectors(form.Vectors, "http")
ginx.NewRender(c).Message(nil)
}
func makeEvent(c *gin.Context) {
var events []*eventForm
ginx.BindJSON(c, &events)
now := time.Now().Unix()
//now := time.Now().Unix()
for i := 0; i < len(events); i++ {
re, exists := engine.RuleEvalForExternal.Get(events[i].RuleId)
node, err := naming.ClusterHashRing.GetNode(events[i].Cluster, fmt.Sprintf("%d", events[i].RuleId))
if err != nil {
logger.Warningf("event:%+v get node err:%v", events[i], err)
ginx.Bomb(200, "event node not exists")
}
if node != config.C.Heartbeat.Endpoint {
err := forwardEvent(events[i], node)
if err != nil {
logger.Warningf("event:%+v forward err:%v", events[i], err)
ginx.Bomb(200, "event forward error")
}
continue
}
ruleContext, exists := engine.GetExternalAlertRule(events[i].Cluster, events[i].RuleId)
logger.Debugf("handle event:%+v exists:%v", events[i], exists)
if !exists {
ginx.Bomb(200, "rule not exists")
}
if events[i].Alert {
go re.MakeNewEvent("http", now, events[i].Cluster, events[i].Vectors)
go ruleContext.HandleVectors(events[i].Vectors, "http")
} else {
for _, vector := range events[i].Vectors {
hash := str.MD5(fmt.Sprintf("%d_%s", events[i].RuleId, vector.Key))
now := vector.Timestamp
go re.RecoverEvent(hash, now, vector.Value)
alertVector := engine.NewAlertVector(ruleContext, nil, vector, "http")
readableString := vector.ReadableValue()
go ruleContext.RecoverSingle(alertVector.Hash(), vector.Timestamp, &readableString)
}
}
}
ginx.NewRender(c).Message(nil)
}
// event 不归本实例处理,转发给对应的实例
func forwardEvent(event *eventForm, instance string) error {
ur := fmt.Sprintf("http://%s/v1/n9e/make-event", instance)
res, code, err := poster.PostJSON(ur, time.Second*5, []*eventForm{event}, 3)
if err != nil {
return err
}
logger.Infof("forward event: result=succ url=%s code=%d event:%v response=%s", ur, code, event, string(res))
return nil
}

View File

@@ -0,0 +1,55 @@
package router
import (
"github.com/didi/nightingale/v5/src/server/memsto"
"github.com/gin-gonic/gin"
"github.com/prometheus/prometheus/prompb"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
)
func logSampleFilterAdd(c *gin.Context) {
var f map[string][]string
ginx.BindJSON(c, &f)
memsto.LogSampleCache.Set(f)
c.JSON(200, "ok")
}
func logSampleFilterGet(c *gin.Context) {
c.JSON(200, memsto.LogSampleCache.Get())
}
func logSampleFilterDel(c *gin.Context) {
memsto.LogSampleCache.Clean()
c.JSON(200, "ok")
}
func LogSample(remoteAddr string, v *prompb.TimeSeries) {
if memsto.LogSampleCache.Len() == 0 {
return
}
labelMap := make(map[string]string)
for i := 0; i < len(v.Labels); i++ {
labelMap[v.Labels[i].Name] = v.Labels[i].Value
}
filterMap := memsto.LogSampleCache.Get()
for k, v := range filterMap {
// 在指标 labels 中找过滤的 label key ,如果找不到,直接返回
lableValue, exists := labelMap[k]
if !exists {
return
}
// key 存在,在过滤条件中找指标的 label value如果找不到直接返回
_, exists = v[lableValue]
if !exists {
return
}
}
// 每个过滤条件都在 指标的 labels 中找到了
logger.Debugf("recv sample from:%s sample:%s", remoteAddr, v.String())
}

View File

@@ -2,10 +2,14 @@ package router
import (
"net/http"
"strconv"
"strings"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"github.com/didi/nightingale/v5/src/models"
"github.com/didi/nightingale/v5/src/server/config"
"github.com/didi/nightingale/v5/src/server/idents"
"github.com/didi/nightingale/v5/src/server/memsto"
"github.com/didi/nightingale/v5/src/server/naming"
@@ -48,12 +52,28 @@ func userGroupGet(c *gin.Context) {
}
func alertRuleLocationGet(c *gin.Context) {
id := ginx.QueryStr(c, "id")
node, err := naming.HashRing.GetNode(id)
if err != nil {
http.Error(c.Writer, err.Error(), http.StatusInternalServerError)
id := ginx.QueryInt64(c, "id")
rule := memsto.AlertRuleCache.Get(id)
if rule == nil {
http.Error(c.Writer, "rule not found", http.StatusNotFound)
return
}
var clusters []string
if rule.Cluster == models.ClusterAll {
clusters = config.ReaderClients.GetClusterNames()
} else {
clusters = strings.Fields(rule.Cluster)
}
c.JSON(200, gin.H{"id": id, "node": node})
var arr []gin.H
for _, cluster := range clusters {
node, err := naming.ClusterHashRing.GetNode(cluster, strconv.FormatInt(id, 10))
if err != nil {
http.Error(c.Writer, err.Error(), http.StatusInternalServerError)
return
}
arr = append(arr, gin.H{"id": id, "cluster": cluster, "node": node})
}
c.JSON(200, gin.H{"list": arr})
}

View File

@@ -2,7 +2,6 @@ package router
import (
"compress/gzip"
"encoding/json"
"fmt"
"io/ioutil"
"strconv"
@@ -16,10 +15,12 @@ import (
promstat "github.com/didi/nightingale/v5/src/server/stat"
"github.com/didi/nightingale/v5/src/server/writer"
"github.com/gin-gonic/gin"
"github.com/mailru/easyjson"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/prompb"
)
//easyjson:json
type FalconMetric struct {
Metric string `json:"metric"`
Endpoint string `json:"endpoint"`
@@ -29,6 +30,9 @@ type FalconMetric struct {
Tags string `json:"tags"`
}
//easyjson:json
type FalconMetricArr []FalconMetric
func (m *FalconMetric) Clean(ts int64) error {
if m.Metric == "" {
return fmt.Errorf("metric is blank")
@@ -140,6 +144,7 @@ func (m *FalconMetric) ToProm() (*prompb.TimeSeries, string, error) {
}
func falconPush(c *gin.Context) {
var bs []byte
var err error
var r *gzip.Reader
@@ -162,13 +167,13 @@ func falconPush(c *gin.Context) {
return
}
var arr []FalconMetric
var arr FalconMetricArr
if bs[0] == '[' {
err = json.Unmarshal(bs, &arr)
err = easyjson.Unmarshal(bs, &arr)
} else {
var one FalconMetric
err = json.Unmarshal(bs, &one)
err = easyjson.Unmarshal(bs, &one)
arr = []FalconMetric{one}
}
@@ -208,13 +213,22 @@ func falconPush(c *gin.Context) {
}
}
writer.Writers.PushSample(arr[i].Metric, pt)
LogSample(c.Request.RemoteAddr, pt)
if config.C.WriterOpt.ShardingKey == "ident" {
if ident == "" {
writer.Writers.PushSample("-", pt)
} else {
writer.Writers.PushSample(ident, pt)
}
} else {
writer.Writers.PushSample(arr[i].Metric, pt)
}
succ++
}
if succ > 0 {
cn := config.ReaderClient.GetClusterName()
cn := config.C.ClusterName
if cn != "" {
promstat.CounterSampleTotal.WithLabelValues(cn, "openfalcon").Add(float64(succ))
}

View File

@@ -0,0 +1,191 @@
// Code generated by easyjson for marshaling/unmarshaling. DO NOT EDIT.
package router
import (
json "encoding/json"
easyjson "github.com/mailru/easyjson"
jlexer "github.com/mailru/easyjson/jlexer"
jwriter "github.com/mailru/easyjson/jwriter"
)
// suppress unused package warning
var (
_ *json.RawMessage
_ *jlexer.Lexer
_ *jwriter.Writer
_ easyjson.Marshaler
)
func easyjson61ba9b47DecodeGithubComDidiNightingaleV5SrcServerRouter(in *jlexer.Lexer, out *FalconMetricArr) {
isTopLevel := in.IsStart()
if in.IsNull() {
in.Skip()
*out = nil
} else {
in.Delim('[')
if *out == nil {
if !in.IsDelim(']') {
*out = make(FalconMetricArr, 0, 0)
} else {
*out = FalconMetricArr{}
}
} else {
*out = (*out)[:0]
}
for !in.IsDelim(']') {
var v1 FalconMetric
(v1).UnmarshalEasyJSON(in)
*out = append(*out, v1)
in.WantComma()
}
in.Delim(']')
}
if isTopLevel {
in.Consumed()
}
}
func easyjson61ba9b47EncodeGithubComDidiNightingaleV5SrcServerRouter(out *jwriter.Writer, in FalconMetricArr) {
if in == nil && (out.Flags&jwriter.NilSliceAsEmpty) == 0 {
out.RawString("null")
} else {
out.RawByte('[')
for v2, v3 := range in {
if v2 > 0 {
out.RawByte(',')
}
(v3).MarshalEasyJSON(out)
}
out.RawByte(']')
}
}
// MarshalJSON supports json.Marshaler interface
func (v FalconMetricArr) MarshalJSON() ([]byte, error) {
w := jwriter.Writer{}
easyjson61ba9b47EncodeGithubComDidiNightingaleV5SrcServerRouter(&w, v)
return w.Buffer.BuildBytes(), w.Error
}
// MarshalEasyJSON supports easyjson.Marshaler interface
func (v FalconMetricArr) MarshalEasyJSON(w *jwriter.Writer) {
easyjson61ba9b47EncodeGithubComDidiNightingaleV5SrcServerRouter(w, v)
}
// UnmarshalJSON supports json.Unmarshaler interface
func (v *FalconMetricArr) UnmarshalJSON(data []byte) error {
r := jlexer.Lexer{Data: data}
easyjson61ba9b47DecodeGithubComDidiNightingaleV5SrcServerRouter(&r, v)
return r.Error()
}
// UnmarshalEasyJSON supports easyjson.Unmarshaler interface
func (v *FalconMetricArr) UnmarshalEasyJSON(l *jlexer.Lexer) {
easyjson61ba9b47DecodeGithubComDidiNightingaleV5SrcServerRouter(l, v)
}
func easyjson61ba9b47DecodeGithubComDidiNightingaleV5SrcServerRouter1(in *jlexer.Lexer, out *FalconMetric) {
isTopLevel := in.IsStart()
if in.IsNull() {
if isTopLevel {
in.Consumed()
}
in.Skip()
return
}
in.Delim('{')
for !in.IsDelim('}') {
key := in.UnsafeFieldName(false)
in.WantColon()
if in.IsNull() {
in.Skip()
in.WantComma()
continue
}
switch key {
case "metric":
out.Metric = string(in.String())
case "endpoint":
out.Endpoint = string(in.String())
case "timestamp":
out.Timestamp = int64(in.Int64())
case "value":
if m, ok := out.ValueUnTyped.(easyjson.Unmarshaler); ok {
m.UnmarshalEasyJSON(in)
} else if m, ok := out.ValueUnTyped.(json.Unmarshaler); ok {
_ = m.UnmarshalJSON(in.Raw())
} else {
out.ValueUnTyped = in.Interface()
}
case "tags":
out.Tags = string(in.String())
default:
in.SkipRecursive()
}
in.WantComma()
}
in.Delim('}')
if isTopLevel {
in.Consumed()
}
}
func easyjson61ba9b47EncodeGithubComDidiNightingaleV5SrcServerRouter1(out *jwriter.Writer, in FalconMetric) {
out.RawByte('{')
first := true
_ = first
{
const prefix string = ",\"metric\":"
out.RawString(prefix[1:])
out.String(string(in.Metric))
}
{
const prefix string = ",\"endpoint\":"
out.RawString(prefix)
out.String(string(in.Endpoint))
}
{
const prefix string = ",\"timestamp\":"
out.RawString(prefix)
out.Int64(int64(in.Timestamp))
}
{
const prefix string = ",\"value\":"
out.RawString(prefix)
if m, ok := in.ValueUnTyped.(easyjson.Marshaler); ok {
m.MarshalEasyJSON(out)
} else if m, ok := in.ValueUnTyped.(json.Marshaler); ok {
out.Raw(m.MarshalJSON())
} else {
out.Raw(json.Marshal(in.ValueUnTyped))
}
}
{
const prefix string = ",\"tags\":"
out.RawString(prefix)
out.String(string(in.Tags))
}
out.RawByte('}')
}
// MarshalJSON supports json.Marshaler interface
func (v FalconMetric) MarshalJSON() ([]byte, error) {
w := jwriter.Writer{}
easyjson61ba9b47EncodeGithubComDidiNightingaleV5SrcServerRouter1(&w, v)
return w.Buffer.BuildBytes(), w.Error
}
// MarshalEasyJSON supports easyjson.Marshaler interface
func (v FalconMetric) MarshalEasyJSON(w *jwriter.Writer) {
easyjson61ba9b47EncodeGithubComDidiNightingaleV5SrcServerRouter1(w, v)
}
// UnmarshalJSON supports json.Unmarshaler interface
func (v *FalconMetric) UnmarshalJSON(data []byte) error {
r := jlexer.Lexer{Data: data}
easyjson61ba9b47DecodeGithubComDidiNightingaleV5SrcServerRouter1(&r, v)
return r.Error()
}
// UnmarshalEasyJSON supports easyjson.Unmarshaler interface
func (v *FalconMetric) UnmarshalEasyJSON(l *jlexer.Lexer) {
easyjson61ba9b47DecodeGithubComDidiNightingaleV5SrcServerRouter1(l, v)
}

View File

@@ -2,7 +2,6 @@ package router
import (
"compress/gzip"
"encoding/json"
"fmt"
"io/ioutil"
"strconv"
@@ -20,8 +19,11 @@ import (
"github.com/didi/nightingale/v5/src/server/memsto"
promstat "github.com/didi/nightingale/v5/src/server/stat"
"github.com/didi/nightingale/v5/src/server/writer"
"github.com/mailru/easyjson"
_ "github.com/mailru/easyjson/gen"
)
// easyjson:json
type HTTPMetric struct {
Metric string `json:"metric"`
Timestamp int64 `json:"timestamp"`
@@ -30,6 +32,9 @@ type HTTPMetric struct {
Tags map[string]string `json:"tags"`
}
//easyjson:json
type HTTPMetricArr []HTTPMetric
func (m *HTTPMetric) Clean(ts int64) error {
if m.Metric == "" {
return fmt.Errorf("metric is blank")
@@ -146,13 +151,13 @@ func handleOpenTSDB(c *gin.Context) {
return
}
var arr []HTTPMetric
var arr HTTPMetricArr
if bs[0] == '[' {
err = json.Unmarshal(bs, &arr)
err = easyjson.Unmarshal(bs, &arr)
} else {
var one HTTPMetric
err = json.Unmarshal(bs, &one)
err = easyjson.Unmarshal(bs, &one)
arr = []HTTPMetric{one}
}
@@ -202,13 +207,22 @@ func handleOpenTSDB(c *gin.Context) {
}
}
writer.Writers.PushSample(arr[i].Metric, pt)
LogSample(c.Request.RemoteAddr, pt)
if config.C.WriterOpt.ShardingKey == "ident" {
if host == "" {
writer.Writers.PushSample("-", pt)
} else {
writer.Writers.PushSample(host, pt)
}
} else {
writer.Writers.PushSample(arr[i].Metric, pt)
}
succ++
}
if succ > 0 {
cn := config.ReaderClient.GetClusterName()
cn := config.C.ClusterName
if cn != "" {
promstat.CounterSampleTotal.WithLabelValues(cn, "opentsdb").Add(float64(succ))
}

View File

@@ -0,0 +1,214 @@
// Code generated by easyjson for marshaling/unmarshaling. DO NOT EDIT.
package router
import (
json "encoding/json"
easyjson "github.com/mailru/easyjson"
jlexer "github.com/mailru/easyjson/jlexer"
jwriter "github.com/mailru/easyjson/jwriter"
)
// suppress unused package warning
var (
_ *json.RawMessage
_ *jlexer.Lexer
_ *jwriter.Writer
_ easyjson.Marshaler
)
func easyjson30864de9DecodeGithubComDidiNightingaleV5SrcServerRouter(in *jlexer.Lexer, out *HTTPMetricArr) {
isTopLevel := in.IsStart()
if in.IsNull() {
in.Skip()
*out = nil
} else {
in.Delim('[')
if *out == nil {
if !in.IsDelim(']') {
*out = make(HTTPMetricArr, 0, 1)
} else {
*out = HTTPMetricArr{}
}
} else {
*out = (*out)[:0]
}
for !in.IsDelim(']') {
var v1 HTTPMetric
(v1).UnmarshalEasyJSON(in)
*out = append(*out, v1)
in.WantComma()
}
in.Delim(']')
}
if isTopLevel {
in.Consumed()
}
}
func easyjson30864de9EncodeGithubComDidiNightingaleV5SrcServerRouter(out *jwriter.Writer, in HTTPMetricArr) {
if in == nil && (out.Flags&jwriter.NilSliceAsEmpty) == 0 {
out.RawString("null")
} else {
out.RawByte('[')
for v2, v3 := range in {
if v2 > 0 {
out.RawByte(',')
}
(v3).MarshalEasyJSON(out)
}
out.RawByte(']')
}
}
// MarshalJSON supports json.Marshaler interface
func (v HTTPMetricArr) MarshalJSON() ([]byte, error) {
w := jwriter.Writer{}
easyjson30864de9EncodeGithubComDidiNightingaleV5SrcServerRouter(&w, v)
return w.Buffer.BuildBytes(), w.Error
}
// MarshalEasyJSON supports easyjson.Marshaler interface
func (v HTTPMetricArr) MarshalEasyJSON(w *jwriter.Writer) {
easyjson30864de9EncodeGithubComDidiNightingaleV5SrcServerRouter(w, v)
}
// UnmarshalJSON supports json.Unmarshaler interface
func (v *HTTPMetricArr) UnmarshalJSON(data []byte) error {
r := jlexer.Lexer{Data: data}
easyjson30864de9DecodeGithubComDidiNightingaleV5SrcServerRouter(&r, v)
return r.Error()
}
// UnmarshalEasyJSON supports easyjson.Unmarshaler interface
func (v *HTTPMetricArr) UnmarshalEasyJSON(l *jlexer.Lexer) {
easyjson30864de9DecodeGithubComDidiNightingaleV5SrcServerRouter(l, v)
}
func easyjson30864de9DecodeGithubComDidiNightingaleV5SrcServerRouter1(in *jlexer.Lexer, out *HTTPMetric) {
isTopLevel := in.IsStart()
if in.IsNull() {
if isTopLevel {
in.Consumed()
}
in.Skip()
return
}
in.Delim('{')
for !in.IsDelim('}') {
key := in.UnsafeFieldName(false)
in.WantColon()
if in.IsNull() {
in.Skip()
in.WantComma()
continue
}
switch key {
case "metric":
out.Metric = string(in.String())
case "timestamp":
out.Timestamp = int64(in.Int64())
case "value":
if m, ok := out.ValueUnTyped.(easyjson.Unmarshaler); ok {
m.UnmarshalEasyJSON(in)
} else if m, ok := out.ValueUnTyped.(json.Unmarshaler); ok {
_ = m.UnmarshalJSON(in.Raw())
} else {
out.ValueUnTyped = in.Interface()
}
case "tags":
if in.IsNull() {
in.Skip()
} else {
in.Delim('{')
out.Tags = make(map[string]string)
for !in.IsDelim('}') {
key := string(in.String())
in.WantColon()
var v4 string
v4 = string(in.String())
(out.Tags)[key] = v4
in.WantComma()
}
in.Delim('}')
}
default:
in.SkipRecursive()
}
in.WantComma()
}
in.Delim('}')
if isTopLevel {
in.Consumed()
}
}
func easyjson30864de9EncodeGithubComDidiNightingaleV5SrcServerRouter1(out *jwriter.Writer, in HTTPMetric) {
out.RawByte('{')
first := true
_ = first
{
const prefix string = ",\"metric\":"
out.RawString(prefix[1:])
out.String(string(in.Metric))
}
{
const prefix string = ",\"timestamp\":"
out.RawString(prefix)
out.Int64(int64(in.Timestamp))
}
{
const prefix string = ",\"value\":"
out.RawString(prefix)
if m, ok := in.ValueUnTyped.(easyjson.Marshaler); ok {
m.MarshalEasyJSON(out)
} else if m, ok := in.ValueUnTyped.(json.Marshaler); ok {
out.Raw(m.MarshalJSON())
} else {
out.Raw(json.Marshal(in.ValueUnTyped))
}
}
{
const prefix string = ",\"tags\":"
out.RawString(prefix)
if in.Tags == nil && (out.Flags&jwriter.NilMapAsEmpty) == 0 {
out.RawString(`null`)
} else {
out.RawByte('{')
v5First := true
for v5Name, v5Value := range in.Tags {
if v5First {
v5First = false
} else {
out.RawByte(',')
}
out.String(string(v5Name))
out.RawByte(':')
out.String(string(v5Value))
}
out.RawByte('}')
}
}
out.RawByte('}')
}
// MarshalJSON supports json.Marshaler interface
func (v HTTPMetric) MarshalJSON() ([]byte, error) {
w := jwriter.Writer{}
easyjson30864de9EncodeGithubComDidiNightingaleV5SrcServerRouter1(&w, v)
return w.Buffer.BuildBytes(), w.Error
}
// MarshalEasyJSON supports easyjson.Marshaler interface
func (v HTTPMetric) MarshalEasyJSON(w *jwriter.Writer) {
easyjson30864de9EncodeGithubComDidiNightingaleV5SrcServerRouter1(w, v)
}
// UnmarshalJSON supports json.Unmarshaler interface
func (v *HTTPMetric) UnmarshalJSON(data []byte) error {
r := jlexer.Lexer{Data: data}
easyjson30864de9DecodeGithubComDidiNightingaleV5SrcServerRouter1(&r, v)
return r.Error()
}
// UnmarshalEasyJSON supports easyjson.Unmarshaler interface
func (v *HTTPMetric) UnmarshalEasyJSON(l *jlexer.Lexer) {
easyjson30864de9DecodeGithubComDidiNightingaleV5SrcServerRouter1(l, v)
}

View File

@@ -37,12 +37,12 @@ func queryPromql(c *gin.Context) {
var f promqlForm
ginx.BindJSON(c, &f)
if config.ReaderClient.IsNil() {
if config.ReaderClients.IsNil(config.C.ClusterName) {
c.String(500, "reader client is nil")
return
}
value, warnings, err := config.ReaderClient.GetCli().Query(c.Request.Context(), f.PromQL, time.Now())
value, warnings, err := config.ReaderClients.GetCli(config.C.ClusterName).Query(c.Request.Context(), f.PromQL, time.Now())
if err != nil {
c.String(500, "promql:%s error:%v", f.PromQL, err)
return
@@ -104,6 +104,10 @@ func remoteWrite(c *gin.Context) {
// find ident label
for j := 0; j < len(req.Timeseries[i].Labels); j++ {
if req.Timeseries[i].Labels[j].Name == "host" {
req.Timeseries[i].Labels[j].Name = "ident"
}
if req.Timeseries[i].Labels[j].Name == "ident" {
ident = req.Timeseries[i].Labels[j].Value
}
@@ -143,10 +147,20 @@ func remoteWrite(c *gin.Context) {
}
}
writer.Writers.PushSample(metric, req.Timeseries[i])
LogSample(c.Request.RemoteAddr, req.Timeseries[i])
if config.C.WriterOpt.ShardingKey == "ident" {
if ident == "" {
writer.Writers.PushSample("-", req.Timeseries[i])
} else {
writer.Writers.PushSample(ident, req.Timeseries[i])
}
} else {
writer.Writers.PushSample(metric, req.Timeseries[i])
}
}
cn := config.ReaderClient.GetClusterName()
cn := config.C.ClusterName
if cn != "" {
promstat.CounterSampleTotal.WithLabelValues(cn, "prometheus").Add(float64(count))
}

View File

@@ -28,6 +28,7 @@ import (
type Server struct {
ConfigFile string
Version string
Key string
}
type ServerOption func(*Server)
@@ -44,6 +45,12 @@ func SetVersion(v string) ServerOption {
}
}
func SetKey(k string) ServerOption {
return func(s *Server) {
s.Key = k
}
}
// Run run server
func Run(opts ...ServerOption) {
code := 1
@@ -92,7 +99,7 @@ func (s Server) initialize() (func(), error) {
fns.Add(cancel)
// parse config file
config.MustLoad(s.ConfigFile)
config.MustLoad(s.Key, s.ConfigFile)
// init i18n
i18n.Init()

View File

@@ -16,7 +16,7 @@ var (
Subsystem: subsystem,
Name: "cron_duration",
Help: "Cron method use duration, unit: ms.",
}, []string{"cluster", "name"})
}, []string{"name"})
// 从数据库同步数据的时候,同步的条数
GaugeSyncNumber = prometheus.NewGaugeVec(prometheus.GaugeOpts{
@@ -24,7 +24,7 @@ var (
Subsystem: subsystem,
Name: "cron_sync_number",
Help: "Cron sync number.",
}, []string{"cluster", "name"})
}, []string{"name"})
// 从各个接收接口接收到的监控数据总量
CounterSampleTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
@@ -43,12 +43,12 @@ var (
}, []string{"cluster"})
// 内存中的告警事件队列的长度
GaugeAlertQueueSize = prometheus.NewGaugeVec(prometheus.GaugeOpts{
GaugeAlertQueueSize = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "alert_queue_size",
Help: "The size of alert queue.",
}, []string{"cluster"})
})
// 数据转发队列,各个队列的长度
GaugeSampleQueueSize = prometheus.NewGaugeVec(prometheus.GaugeOpts{

111
src/server/writer/queue.go Normal file
View File

@@ -0,0 +1,111 @@
package writer
import (
"container/list"
"sync"
"github.com/prometheus/prometheus/prompb"
)
type SafeList struct {
sync.RWMutex
L *list.List
}
func NewSafeList() *SafeList {
return &SafeList{L: list.New()}
}
func (sl *SafeList) PushFront(v interface{}) *list.Element {
sl.Lock()
e := sl.L.PushFront(v)
sl.Unlock()
return e
}
func (sl *SafeList) PushFrontBatch(vs []interface{}) {
sl.Lock()
for _, item := range vs {
sl.L.PushFront(item)
}
sl.Unlock()
}
func (sl *SafeList) PopBack(max int) []*prompb.TimeSeries {
sl.Lock()
count := sl.L.Len()
if count == 0 {
sl.Unlock()
return []*prompb.TimeSeries{}
}
if count > max {
count = max
}
items := make([]*prompb.TimeSeries, 0, count)
for i := 0; i < count; i++ {
item := sl.L.Remove(sl.L.Back())
sample, ok := item.(*prompb.TimeSeries)
if ok {
items = append(items, sample)
}
}
sl.Unlock()
return items
}
func (sl *SafeList) RemoveAll() {
sl.Lock()
sl.L.Init()
sl.Unlock()
}
func (sl *SafeList) Len() int {
sl.RLock()
size := sl.L.Len()
sl.RUnlock()
return size
}
// SafeList with Limited Size
type SafeListLimited struct {
maxSize int
SL *SafeList
}
func NewSafeListLimited(maxSize int) *SafeListLimited {
return &SafeListLimited{SL: NewSafeList(), maxSize: maxSize}
}
func (sll *SafeListLimited) PopBack(max int) []*prompb.TimeSeries {
return sll.SL.PopBack(max)
}
func (sll *SafeListLimited) PushFront(v interface{}) bool {
if sll.SL.Len() >= sll.maxSize {
return false
}
sll.SL.PushFront(v)
return true
}
func (sll *SafeListLimited) PushFrontBatch(vs []interface{}) bool {
if sll.SL.Len() >= sll.maxSize {
return false
}
sll.SL.PushFrontBatch(vs)
return true
}
func (sll *SafeListLimited) RemoveAll() {
sll.SL.RemoveAll()
}
func (sll *SafeListLimited) Len() int {
return sll.SL.Len()
}

View File

@@ -37,7 +37,7 @@ func (w WriterType) writeRelabel(items []*prompb.TimeSeries) []*prompb.TimeSerie
return ritems
}
func (w WriterType) Write(index int, items []*prompb.TimeSeries, headers ...map[string]string) {
func (w WriterType) Write(cluster string, index int, items []*prompb.TimeSeries, headers ...map[string]string) {
if len(items) == 0 {
return
}
@@ -49,9 +49,8 @@ func (w WriterType) Write(index int, items []*prompb.TimeSeries, headers ...map[
start := time.Now()
defer func() {
cn := config.ReaderClient.GetClusterName()
if cn != "" {
promstat.ForwardDuration.WithLabelValues(cn, fmt.Sprint(index)).Observe(time.Since(start).Seconds())
if cluster != "" {
promstat.ForwardDuration.WithLabelValues(cluster, fmt.Sprint(index)).Observe(time.Since(start).Seconds())
}
}()
@@ -130,70 +129,50 @@ func (w WriterType) Post(req []byte, headers ...map[string]string) error {
type WritersType struct {
globalOpt config.WriterGlobalOpt
backends map[string]WriterType
chans map[int]chan *prompb.TimeSeries
queues map[string]map[int]*SafeListLimited
}
func (ws *WritersType) Put(name string, writer WriterType) {
ws.backends[name] = writer
}
// PushSample Push one sample to chan, hash by ident
// @Author: quzhihao
func (ws *WritersType) PushSample(ident string, v interface{}) {
func (ws *WritersType) PushSample(ident string, v interface{}, clusters ...string) {
hashkey := crc32.ChecksumIEEE([]byte(ident)) % uint32(ws.globalOpt.QueueCount)
c, ok := ws.chans[int(hashkey)]
cluster := config.C.ClusterName
if len(clusters) > 0 {
cluster = clusters[0]
}
if _, ok := ws.queues[cluster]; !ok {
// 待写入的集群不存在
logger.Warningf("Write cluster:%s not found, v:%+v", cluster, v)
return
}
c, ok := ws.queues[cluster][int(hashkey)]
if ok {
select {
case c <- v.(*prompb.TimeSeries):
default:
logger.Warningf("Write channel(%s) full, current channel size: %d", ident, len(c))
succ := c.PushFront(v)
if !succ {
logger.Warningf("Write cluster:%s channel(%s) full, current channel size: %d", cluster, ident, c.Len())
}
}
}
// StartConsumer every ident channel has a consumer, start it
// @Author: quzhihao
func (ws *WritersType) StartConsumer(index int, ch chan *prompb.TimeSeries) {
var (
batch = ws.globalOpt.QueuePopSize
series = make([]*prompb.TimeSeries, 0, batch)
batchCounter int
)
func (ws *WritersType) StartConsumer(index int, ch *SafeListLimited, clusterName string) {
for {
select {
case item := <-ch:
// has data, no need to close
series = append(series, item)
batchCounter++
if batchCounter >= ws.globalOpt.QueuePopSize {
ws.post(index, series)
// reset
batchCounter = 0
series = make([]*prompb.TimeSeries, 0, batch)
}
case <-time.After(time.Second):
if len(series) > 0 {
ws.post(index, series)
// reset
batchCounter = 0
series = make([]*prompb.TimeSeries, 0, batch)
}
series := ch.PopBack(ws.globalOpt.QueuePopSize)
if len(series) == 0 {
time.Sleep(time.Millisecond * 400)
continue
}
}
}
// post post series to TSDB
// @Author: quzhihao
func (ws *WritersType) post(index int, series []*prompb.TimeSeries) {
header := map[string]string{"hash": fmt.Sprintf("%s-%d", config.C.Heartbeat.Endpoint, index)}
for key := range ws.backends {
go ws.backends[key].Write(index, series, header)
for key := range ws.backends {
if ws.backends[key].Opts.ClusterName != clusterName {
continue
}
go ws.backends[key].Write(clusterName, index, series)
}
}
}
@@ -207,12 +186,15 @@ var Writers = NewWriters()
func Init(opts []config.WriterOptions, globalOpt config.WriterGlobalOpt) error {
Writers.globalOpt = globalOpt
Writers.chans = make(map[int]chan *prompb.TimeSeries)
// init channels
for i := 0; i < globalOpt.QueueCount; i++ {
Writers.chans[i] = make(chan *prompb.TimeSeries, Writers.globalOpt.QueueMaxSize)
go Writers.StartConsumer(i, Writers.chans[i])
Writers.queues = make(map[string]map[int]*SafeListLimited)
for _, opt := range opts {
if _, ok := Writers.queues[opt.ClusterName]; !ok {
Writers.queues[opt.ClusterName] = make(map[int]*SafeListLimited)
for i := 0; i < globalOpt.QueueCount; i++ {
Writers.queues[opt.ClusterName][i] = NewSafeListLimited(Writers.globalOpt.QueueMaxSize)
go Writers.StartConsumer(i, Writers.queues[opt.ClusterName][i], opt.ClusterName)
}
}
}
go reportChanSize()
@@ -253,16 +235,18 @@ func Init(opts []config.WriterOptions, globalOpt config.WriterGlobalOpt) error {
}
func reportChanSize() {
clusterName := config.ReaderClient.GetClusterName()
clusterName := config.C.ClusterName
if clusterName == "" {
return
}
for {
time.Sleep(time.Second * 3)
for i, c := range Writers.chans {
size := len(c)
promstat.GaugeSampleQueueSize.WithLabelValues(clusterName, fmt.Sprint(i)).Set(float64(size))
for cluster, m := range Writers.queues {
for i, c := range m {
size := c.Len()
promstat.GaugeSampleQueueSize.WithLabelValues(cluster, fmt.Sprint(i)).Set(float64(size))
}
}
}
}

View File

@@ -20,8 +20,10 @@ type RedisConfig struct {
DB int
UseTLS bool
tls.ClientConfig
RedisType string
MasterName string
RedisType string
MasterName string
SentinelUsername string
SentinelPassword string
}
var DB *gorm.DB
@@ -87,11 +89,13 @@ func InitRedis(cfg RedisConfig) (func(), error) {
case "sentinel":
redisOptions := &redis.FailoverOptions{
MasterName: cfg.MasterName,
SentinelAddrs: strings.Split(cfg.Address, ","),
Username: cfg.Username,
Password: cfg.Password,
DB: cfg.DB,
MasterName: cfg.MasterName,
SentinelAddrs: strings.Split(cfg.Address, ","),
Username: cfg.Username,
Password: cfg.Password,
DB: cfg.DB,
SentinelUsername: cfg.SentinelUsername,
SentinelPassword: cfg.SentinelPassword,
}
if cfg.UseTLS {

View File

@@ -9,11 +9,14 @@ import (
"github.com/gin-gonic/gin"
"github.com/koding/multiconfig"
"github.com/didi/nightingale/v5/src/pkg/cas"
"github.com/didi/nightingale/v5/src/pkg/httpx"
"github.com/didi/nightingale/v5/src/pkg/ldapx"
"github.com/didi/nightingale/v5/src/pkg/logx"
"github.com/didi/nightingale/v5/src/pkg/oauth2x"
"github.com/didi/nightingale/v5/src/pkg/oidcc"
"github.com/didi/nightingale/v5/src/pkg/ormx"
"github.com/didi/nightingale/v5/src/pkg/secu"
"github.com/didi/nightingale/v5/src/pkg/tls"
"github.com/didi/nightingale/v5/src/storage"
)
@@ -23,7 +26,40 @@ var (
once sync.Once
)
func MustLoad(fpaths ...string) {
func DealConfigCrypto(key string) {
decryptDsn, err := secu.DealWithDecrypt(C.DB.DSN, key)
if err != nil {
fmt.Println("failed to decrypt the db dsn", err)
os.Exit(1)
}
C.DB.DSN = decryptDsn
decryptRedisPwd, err := secu.DealWithDecrypt(C.Redis.Password, key)
if err != nil {
fmt.Println("failed to decrypt the redis password", err)
os.Exit(1)
}
C.Redis.Password = decryptRedisPwd
decryptIbexPwd, err := secu.DealWithDecrypt(C.Ibex.BasicAuthPass, key)
if err != nil {
fmt.Println("failed to decrypt the ibex password", err)
os.Exit(1)
}
C.Ibex.BasicAuthPass = decryptIbexPwd
for index, v := range C.Clusters {
decryptClusterPwd, err := secu.DealWithDecrypt(v.BasicAuthPass, key)
if err != nil {
fmt.Printf("failed to decrypt the clusters password: %s , error: %s", v.BasicAuthPass, err.Error())
os.Exit(1)
}
C.Clusters[index].BasicAuthPass = decryptClusterPwd
}
}
func MustLoad(key string, fpaths ...string) {
once.Do(func() {
loaders := []multiconfig.Loader{
&multiconfig.TagLoader{},
@@ -63,6 +99,8 @@ func MustLoad(fpaths ...string) {
m.MustLoad(C)
DealConfigCrypto(key)
if !strings.HasPrefix(C.Ibex.Address, "http") {
C.Ibex.Address = "http://" + C.Ibex.Address
}
@@ -99,6 +137,8 @@ type Config struct {
Clusters []ClusterOptions
Ibex Ibex
OIDC oidcc.Config
CAS cas.Config
OAuth oauth2x.Config
TargetMetrics map[string]string
}

View File

@@ -134,8 +134,14 @@ func configRoute(r *gin.Engine, version string) {
pages.POST("/auth/logout", jwtMock(), logoutPost)
pages.POST("/auth/refresh", jwtMock(), refreshPost)
pages.GET("/auth/sso-config", ssoConfigGet)
pages.GET("/auth/redirect", loginRedirect)
pages.GET("/auth/redirect/cas", loginRedirectCas)
pages.GET("/auth/redirect/oauth", loginRedirectOAuth)
pages.GET("/auth/callback", loginCallback)
pages.GET("/auth/callback/cas", loginCallbackCas)
pages.GET("/auth/callback/oauth", loginCallbackOAuth)
pages.GET("/auth/perms", allPerms)
pages.GET("/metrics/desc", metricsDescGetFile)
pages.POST("/metrics/desc", metricsDescGetMap)
@@ -296,6 +302,8 @@ func configRoute(r *gin.Engine, version string) {
pages.GET("/servers", auth(), admin(), serversGet)
pages.PUT("/server/:id", auth(), admin(), serverBindCluster)
pages.POST("/servers", auth(), admin(), serverAddCluster)
pages.DELETE("/servers", auth(), admin(), serverDelCluster)
}
service := r.Group("/v1/n9e")
@@ -305,6 +313,7 @@ func configRoute(r *gin.Engine, version string) {
{
service.Any("/prometheus/*url", prometheusProxy)
service.POST("/users", userAddPost)
service.GET("/users", userFindAll)
service.GET("/targets", targetGets)
service.GET("/targets/tags", targetGetTags)
@@ -331,5 +340,8 @@ func configRoute(r *gin.Engine, version string) {
service.PUT("/configs", configsPut)
service.POST("/configs", configsPost)
service.DELETE("/configs", configsDel)
service.POST("/conf-prop/encrypt", confPropEncrypt)
service.POST("/conf-prop/decrypt", confPropDecrypt)
}
}

View File

@@ -144,7 +144,14 @@ func boardPutConfigs(c *gin.Context) {
ginx.BindJSON(c, &f)
me := c.MustGet("user").(*models.User)
bo := Board(ginx.UrlParamInt64(c, "bid"))
bid := ginx.UrlParamStr(c, "bid")
bo, err := models.BoardGet("id = ? or ident = ?", bid, bid)
ginx.Dangerous(err)
if bo == nil {
ginx.Bomb(http.StatusNotFound, "No such dashboard")
}
// check permission
bgrwCheck(c, bo.GroupId)

View File

@@ -0,0 +1,62 @@
package router
import (
"github.com/didi/nightingale/v5/src/pkg/secu"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
)
type confPropCrypto struct {
Data string `json:"data" binding:"required"`
Key string `json:"key" binding:"required"`
}
func confPropEncrypt(c *gin.Context) {
var f confPropCrypto
ginx.BindJSON(c, &f)
k := len(f.Key)
switch k {
default:
c.String(400, "The key length should be 16, 24 or 32")
return
case 16, 24, 32:
break
}
s, err := secu.DealWithEncrypt(f.Data, f.Key)
if err != nil {
c.String(500, err.Error())
}
c.JSON(200, gin.H{
"src": f.Data,
"key": f.Key,
"encrypt": s,
})
}
func confPropDecrypt(c *gin.Context) {
var f confPropCrypto
ginx.BindJSON(c, &f)
k := len(f.Key)
switch k {
default:
c.String(400, "The key length should be 16, 24 or 32")
return
case 16, 24, 32:
break
}
s, err := secu.DealWithDecrypt(f.Data, f.Key)
if err != nil {
c.String(500, err.Error())
}
c.JSON(200, gin.H{
"src": f.Data,
"key": f.Key,
"decrypt": s,
})
}

View File

@@ -13,6 +13,8 @@ import (
"github.com/toolkits/pkg/logger"
"github.com/didi/nightingale/v5/src/models"
"github.com/didi/nightingale/v5/src/pkg/cas"
"github.com/didi/nightingale/v5/src/pkg/oauth2x"
"github.com/didi/nightingale/v5/src/pkg/oidcc"
"github.com/didi/nightingale/v5/src/webapi/config"
)
@@ -212,11 +214,19 @@ func loginCallback(c *gin.Context) {
if user != nil {
if config.C.OIDC.CoverAttributes {
user.Nickname = ret.Nickname
user.Email = ret.Email
user.Phone = ret.Phone
user.UpdateAt = time.Now().Unix()
if ret.Nickname != "" {
user.Nickname = ret.Nickname
}
if ret.Email != "" {
user.Email = ret.Email
}
if ret.Phone != "" {
user.Phone = ret.Phone
}
user.UpdateAt = time.Now().Unix()
user.Update("email", "nickname", "phone", "update_at")
}
} else {
@@ -259,3 +269,229 @@ func loginCallback(c *gin.Context) {
RefreshToken: ts.RefreshToken,
}, nil)
}
type RedirectOutput struct {
Redirect string `json:"redirect"`
State string `json:"state"`
}
func loginRedirectCas(c *gin.Context) {
redirect := ginx.QueryStr(c, "redirect", "/")
v, exists := c.Get("userid")
if exists {
userid := v.(int64)
user, err := models.UserGetById(userid)
ginx.Dangerous(err)
if user == nil {
ginx.Bomb(200, "user not found")
}
if user.Username != "" { // already login
ginx.NewRender(c).Data(redirect, nil)
return
}
}
if !config.C.CAS.Enable {
logger.Error("cas is not enable")
ginx.NewRender(c).Data("", nil)
return
}
redirect, state, err := cas.Authorize(redirect)
ginx.Dangerous(err)
ginx.NewRender(c).Data(RedirectOutput{
Redirect: redirect,
State: state,
}, err)
}
func loginCallbackCas(c *gin.Context) {
ticket := ginx.QueryStr(c, "ticket", "")
state := ginx.QueryStr(c, "state", "")
ret, err := cas.ValidateServiceTicket(c.Request.Context(), ticket, state)
if err != nil {
logger.Errorf("ValidateServiceTicket: %s", err)
ginx.NewRender(c).Data("", err)
return
}
user, err := models.UserGet("username=?", ret.Username)
if err != nil {
logger.Errorf("UserGet: %s", err)
}
ginx.Dangerous(err)
if user != nil {
if config.C.CAS.CoverAttributes {
if ret.Nickname != "" {
user.Nickname = ret.Nickname
}
if ret.Email != "" {
user.Email = ret.Email
}
if ret.Phone != "" {
user.Phone = ret.Phone
}
user.UpdateAt = time.Now().Unix()
ginx.Dangerous(user.Update("email", "nickname", "phone", "update_at"))
}
} else {
now := time.Now().Unix()
user = &models.User{
Username: ret.Username,
Password: "******",
Nickname: ret.Nickname,
Portrait: "",
Roles: strings.Join(config.C.CAS.DefaultRoles, " "),
RolesLst: config.C.CAS.DefaultRoles,
Contacts: []byte("{}"),
Phone: ret.Phone,
Email: ret.Email,
CreateAt: now,
UpdateAt: now,
CreateBy: "CAS",
UpdateBy: "CAS",
}
// create user from cas
ginx.Dangerous(user.Add())
}
// set user login state
userIdentity := fmt.Sprintf("%d-%s", user.Id, user.Username)
ts, err := createTokens(config.C.JWTAuth.SigningKey, userIdentity)
if err != nil {
logger.Errorf("createTokens: %s", err)
}
ginx.Dangerous(err)
ginx.Dangerous(createAuth(c.Request.Context(), userIdentity, ts))
redirect := "/"
if ret.Redirect != "/login" {
redirect = ret.Redirect
}
ginx.NewRender(c).Data(CallbackOutput{
Redirect: redirect,
User: user,
AccessToken: ts.AccessToken,
RefreshToken: ts.RefreshToken,
}, nil)
}
func loginRedirectOAuth(c *gin.Context) {
redirect := ginx.QueryStr(c, "redirect", "/")
v, exists := c.Get("userid")
if exists {
userid := v.(int64)
user, err := models.UserGetById(userid)
ginx.Dangerous(err)
if user == nil {
ginx.Bomb(200, "user not found")
}
if user.Username != "" { // already login
ginx.NewRender(c).Data(redirect, nil)
return
}
}
if !config.C.OAuth.Enable {
ginx.NewRender(c).Data("", nil)
return
}
redirect, err := oauth2x.Authorize(redirect)
ginx.Dangerous(err)
ginx.NewRender(c).Data(redirect, err)
}
func loginCallbackOAuth(c *gin.Context) {
code := ginx.QueryStr(c, "code", "")
state := ginx.QueryStr(c, "state", "")
ret, err := oauth2x.Callback(c.Request.Context(), code, state)
if err != nil {
logger.Debugf("sso.callback() get ret %+v error %v", ret, err)
ginx.NewRender(c).Data(CallbackOutput{}, err)
return
}
user, err := models.UserGet("username=?", ret.Username)
ginx.Dangerous(err)
if user != nil {
if config.C.OAuth.CoverAttributes {
if ret.Nickname != "" {
user.Nickname = ret.Nickname
}
if ret.Email != "" {
user.Email = ret.Email
}
if ret.Phone != "" {
user.Phone = ret.Phone
}
user.UpdateAt = time.Now().Unix()
user.Update("email", "nickname", "phone", "update_at")
}
} else {
now := time.Now().Unix()
user = &models.User{
Username: ret.Username,
Password: "******",
Nickname: ret.Nickname,
Phone: ret.Phone,
Email: ret.Email,
Portrait: "",
Roles: strings.Join(config.C.OAuth.DefaultRoles, " "),
RolesLst: config.C.OAuth.DefaultRoles,
Contacts: []byte("{}"),
CreateAt: now,
UpdateAt: now,
CreateBy: "oauth2",
UpdateBy: "oauth2",
}
// create user from oidc
ginx.Dangerous(user.Add())
}
// set user login state
userIdentity := fmt.Sprintf("%d-%s", user.Id, user.Username)
ts, err := createTokens(config.C.JWTAuth.SigningKey, userIdentity)
ginx.Dangerous(err)
ginx.Dangerous(createAuth(c.Request.Context(), userIdentity, ts))
redirect := "/"
if ret.Redirect != "/login" {
redirect = ret.Redirect
}
ginx.NewRender(c).Data(CallbackOutput{
Redirect: redirect,
User: user,
AccessToken: ts.AccessToken,
RefreshToken: ts.RefreshToken,
}, nil)
}
type SsoConfigOutput struct {
OidcDisplayName string `json:"oidcDisplayName"`
CasDisplayName string `json:"casDisplayName"`
OauthDisplayName string `json:"oauthDisplayName"`
}
func ssoConfigGet(c *gin.Context) {
ginx.NewRender(c).Data(SsoConfigOutput{
OidcDisplayName: oidcc.GetDisplayName(),
CasDisplayName: cas.GetDisplayName(),
OauthDisplayName: oauth2x.GetDisplayName(),
}, nil)
}

View File

@@ -19,3 +19,18 @@ func permsGets(c *gin.Context) {
lst, err := models.OperationsOfRole(strings.Fields(user.Roles))
ginx.NewRender(c).Data(lst, err)
}
func allPerms(c *gin.Context) {
roles, err := models.RoleGetsAll()
ginx.Dangerous(err)
m := make(map[string][]string)
for _, r := range roles {
lst, err := models.OperationsOfRole(strings.Fields(r.Name))
if err != nil {
continue
}
m[r.Name] = lst
}
ginx.NewRender(c).Data(m, err)
}

View File

@@ -13,7 +13,8 @@ func serversGet(c *gin.Context) {
}
type serverBindClusterForm struct {
Cluster string `json:"cluster"`
Cluster string `json:"cluster"`
Instance string `json:"instance"`
}
// 用户为某个 n9e-server 分配一个集群也可以清空设置cluster为空字符串即可
@@ -33,3 +34,18 @@ func serverBindCluster(c *gin.Context) {
ginx.NewRender(c).Message(ae.UpdateCluster(f.Cluster))
}
func serverAddCluster(c *gin.Context) {
var f serverBindClusterForm
ginx.BindJSON(c, &f)
ginx.NewRender(c).Message(models.AlertingEngineAdd(f.Instance, f.Cluster))
}
func serverDelCluster(c *gin.Context) {
var f idsForm
ginx.BindJSON(c, &f)
f.Verify()
ginx.NewRender(c).Message(models.AlertingEngineDel(f.Ids))
}

View File

@@ -11,6 +11,22 @@ import (
"github.com/didi/nightingale/v5/src/pkg/ormx"
)
func userFindAll(c *gin.Context) {
limit := ginx.QueryInt(c, "limit", 20)
query := ginx.QueryStr(c, "query", "")
total, err := models.UserTotal(query)
ginx.Dangerous(err)
list, err := models.UserGets(query, limit, ginx.Offset(c, limit))
ginx.Dangerous(err)
ginx.NewRender(c).Data(gin.H{
"list": list,
"total": total,
}, nil)
}
func userGets(c *gin.Context) {
limit := ginx.QueryInt(c, "limit", 20)
query := ginx.QueryStr(c, "query", "")

View File

@@ -10,9 +10,11 @@ import (
"github.com/toolkits/pkg/i18n"
"github.com/didi/nightingale/v5/src/models"
"github.com/didi/nightingale/v5/src/pkg/cas"
"github.com/didi/nightingale/v5/src/pkg/httpx"
"github.com/didi/nightingale/v5/src/pkg/ldapx"
"github.com/didi/nightingale/v5/src/pkg/logx"
"github.com/didi/nightingale/v5/src/pkg/oauth2x"
"github.com/didi/nightingale/v5/src/pkg/oidcc"
"github.com/didi/nightingale/v5/src/storage"
"github.com/didi/nightingale/v5/src/webapi/config"
@@ -24,6 +26,7 @@ import (
type Webapi struct {
ConfigFile string
Version string
Key string
}
type WebapiOption func(*Webapi)
@@ -40,6 +43,12 @@ func SetVersion(v string) WebapiOption {
}
}
func SetKey(k string) WebapiOption {
return func(s *Webapi) {
s.Key = k
}
}
// Run run webapi
func Run(opts ...WebapiOption) {
code := 1
@@ -83,7 +92,7 @@ EXIT:
func (a Webapi) initialize() (func(), error) {
// parse config file
config.MustLoad(a.ConfigFile)
config.MustLoad(a.Key, a.ConfigFile)
// init i18n
i18n.Init(config.C.I18N)
@@ -94,6 +103,12 @@ func (a Webapi) initialize() (func(), error) {
// init oidc
oidcc.Init(config.C.OIDC)
// init cas
cas.Init(config.C.CAS)
// init oauth
oauth2x.Init(config.C.OAuth)
// init logger
loggerClean, err := logx.Init(config.C.Log)
if err != nil {