Compare commits

..

75 Commits

Author SHA1 Message Date
ning
df9ba52e71 network bg label migrate 2024-11-08 10:32:19 +08:00
ning
ba6d2b664d fix build 2024-11-05 20:14:13 +08:00
Yening Qin
2ddcf507f9 update td query (#2267) 2024-11-05 20:01:11 +08:00
Yening Qin
17cc588a9d refactor: update pushgw append label (#2263) 2024-11-04 18:06:15 +08:00
Yening Qin
e9c7eef546 optimize tdentine (#2261) 2024-11-04 17:32:46 +08:00
Yening Qin
7451ad2e23 update default engine name (#2244) 2024-10-28 15:07:17 +08:00
ulricqin
31b3434e87 Update README.md 2024-10-22 14:19:33 +08:00
ning
2576a0f815 fix: edge get all configs 2024-10-21 19:30:13 +08:00
ning
0ac4bc7421 docs: update linux dashboard tpl 2024-10-21 18:07:52 +08:00
ning
95e6ea98f4 refactor: prom client query api add retry 2024-10-21 17:57:31 +08:00
ning
dc60c74c0d docs: update automq dashboard tpl 2024-10-21 16:50:36 +08:00
ning
a15adc196d docs: update linux dashboard tpl 2024-10-21 16:35:53 +08:00
ning
f89ef04e85 refactor: optimize code robustness 2024-10-21 14:54:48 +08:00
Yening Qin
f55cd9b32e feat: config access log in web (#2227) 2024-10-21 12:11:19 +08:00
Xu Bin
305a898f8b feat: alert recover ckeck (#2226) 2024-10-21 12:07:54 +08:00
Yening Qin
60c31d8eb2 feat: support query set opration (#2225) 2024-10-20 21:18:12 +08:00
ning
7da49a8c68 refactor: update go.mod 2024-10-20 14:04:31 +08:00
flashbo
65b1410b09 refactor: support output logs to one file (#2209) 2024-10-20 14:02:44 +08:00
ning
3901671c0e docs: update n9e.sql 2024-10-18 15:24:33 +08:00
Xu Bin
9c02937e81 refactor: alert mute retain (#2223) 2024-10-18 12:08:31 +08:00
flashbo
0a255ee33a fix: unbind bgids when delete target (#2219) 2024-10-16 10:00:08 +08:00
Xu Bin
8dc198b4b1 fix: smtp update (#2213) 2024-10-12 11:37:14 +08:00
Yening Qin
9696f63a71 rename tpl name 2024-10-11 16:23:57 +08:00
Xu Bin
03f56f73b4 feat: ldap support multi basecn (#2198) 2024-10-08 16:06:21 +08:00
Ulric Qin
7b415c91af update qrcode 2024-10-08 15:40:34 +08:00
flashbo
2abf089444 feat: rule list add user nickname (#2201) 2024-10-08 15:25:25 +08:00
mt
e504dab359 fix: update router_alert_cur_event.go (#2210) 2024-10-03 00:27:31 +08:00
710leo
989ed62e8d refactor: update GetAnomalyPoint 2024-09-29 19:34:25 +08:00
nl594
b7197d10eb docs: add new ipmi dashboard (#2204)
* add new ipmi dashboard

* Update IPMI_by_prometheus.json

---------

Co-authored-by: niulong <niulong@xylink.com>
Co-authored-by: Yening Qin <710leo@gmail.com>
2024-09-29 13:24:56 +08:00
Xu Bin
f4de256388 refactor: target delete hook (#2202) 2024-09-27 15:43:57 +08:00
Xu Bin
3f5126923f feat: get build payload by UUID (#2203) 2024-09-27 15:43:18 +08:00
flashbo
5d3e70bc4c refactor: datasouce support force save (#2200) 2024-09-27 14:40:48 +08:00
710leo
bb2c5202ad Merge branch 'main' of github.com:ccfos/nightingale 2024-09-27 14:26:48 +08:00
710leo
3acf3d7bf9 refactor: migrate target bg 2024-09-27 14:26:35 +08:00
shardingHe
a79810b15d add deployment & statefulset dashboard (#2196)
Co-authored-by: shardingHe <wangzihe@flashcat.cloud>
2024-09-26 17:38:47 +08:00
710leo
f61cb532f8 Merge branch 'main' of github.com:ccfos/nightingale 2024-09-26 15:49:28 +08:00
710leo
34a5a752f4 refactor: update aconf check 2024-09-26 15:49:14 +08:00
Ulric Qin
9be3deeebd update wechat qrcode 2024-09-26 10:14:16 +08:00
710leo
2ceed84120 fix: host alert host filter by busigroup 2024-09-25 15:07:54 +08:00
710leo
8fbe257090 docs: update i18n 2024-09-24 16:27:51 +08:00
710leo
ae35d780c6 refactor: update busigroup del api 2024-09-24 15:49:14 +08:00
710leo
4d2cdfce53 optimize target fill group 2024-09-24 15:29:51 +08:00
710leo
a0e4d0d46e refactor: target bind api 2024-09-24 15:20:21 +08:00
710leo
dd07d04e2f refactor: update target api 2024-09-24 14:37:27 +08:00
710leo
61203e8b75 feat: add boards api 2024-09-24 10:27:43 +08:00
710leo
f24bc53c94 refactor: update target bind group api 2024-09-23 13:13:09 +08:00
710leo
ef6abe3fdc refactor: update target bind api 2024-09-22 23:00:32 +08:00
710leo
461361d3d0 fix: heartbeat api auth check for n9e-edge 2024-09-22 21:05:31 +08:00
710leo
52b3afbd97 fix: recovery event tags map split 2024-09-22 19:14:24 +08:00
710leo
652439bb85 Merge branch 'main' of github.com:ccfos/nightingale 2024-09-22 00:33:40 +08:00
710leo
6f0c13d4e7 fix: edge target cache 2024-09-22 00:33:28 +08:00
ulricqin
c9f46bad02 Remove duplicate fields UseTLS 2024-09-21 20:44:02 +08:00
710leo
75146f3626 docs: add target_busi_group sql 2024-09-20 18:14:15 +08:00
710leo
50aafbd73d refactor: update target query 2024-09-20 18:09:07 +08:00
710leo
b975cb3c9d refactor: update append_labels 2024-09-20 16:24:47 +08:00
flashbo
11deb4ba26 feat: host bind muti group (#2185) 2024-09-19 20:32:08 +08:00
flashbo
ec927297d6 feat:support query alert event by rule id (#2179) 2024-09-19 11:04:14 +08:00
Yening Qin
f476d7cd63 fix: incorrect content in feishucard when sending a large number of messages (#2180) 2024-09-18 18:00:13 +08:00
ulricqin
410f3bbceb Update README.md wechat qrcode 2024-09-18 08:13:42 +08:00
cui fliter
2ad53d6862 refactor: make uids in NotifyTarget (#2169) 2024-09-13 19:26:18 +08:00
710leo
fc392e4af1 docs: update linux metrics tpl 2024-09-13 19:10:33 +08:00
fangpsh
9c83c7881a docs: update oom_kill alert rule tpl (#2170)
Co-authored-by: fangpsh <fangpsh@zego.im>
2024-09-13 19:07:08 +08:00
flashbo
f1259d1dff refactor: alert rule callback url dedup (#2165) 2024-09-13 16:24:04 +08:00
Yening Qin
d9d59b3205 fix: recording rule update (#2168) 2024-09-13 16:20:48 +08:00
Ulric Qin
d11cfb0278 Merge branch 'main' of github.com:ccfos/nightingale 2024-09-09 11:49:37 +08:00
Ulric Qin
5adcfc6eaa update README 2024-09-09 11:49:26 +08:00
710leo
037152ad72 refactor: update alert-cur-event api 2024-09-03 18:17:28 +08:00
Ulric Qin
2de304d4f2 move sqlite.sql to docker dir 2024-09-03 17:51:35 +08:00
Ulric Qin
03c56d048f modify column trigger_value to text 2024-09-03 17:50:13 +08:00
Ulric Qin
1cddb4eca0 Merge branch 'main' of github.com:ccfos/nightingale 2024-09-03 17:46:57 +08:00
Ulric Qin
2dc033944d bugfix InitBuiltinPayloads 2024-09-03 17:46:43 +08:00
flashbo
63e6c78e71 feat: targets support multi idents query (#2119) 2024-09-03 15:32:05 +08:00
Ulric Qin
e1f04eebe7 update README 2024-08-30 17:58:49 +08:00
Yening Qin
ce17e09f66 feat: notify event add target info (#2137)
* fix: tpl center update (#2125)
* put target into alert cur event (#2128)

---------

Co-authored-by: Xu Bin <140785332+Reditiny@users.noreply.github.com>
Co-authored-by: flashbo <36443248+lwb0214@users.noreply.github.com>
2024-08-30 16:28:31 +08:00
710leo
c98c1d3b90 docs: update sql 2024-08-30 16:19:50 +08:00
124 changed files with 5772 additions and 4606 deletions

View File

@@ -88,12 +88,13 @@
- 报告Bug优先推荐提交[夜莺GitHub Issue](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Fbug&projects=&template=bug_report.yml)
- 推荐完整浏览[夜莺文档站点](https://flashcat.cloud/docs/content/flashcat-monitor/nightingale-v7/introduction/),了解更多信息
- 推荐搜索关注夜莺公众号,第一时间获取社区动态:`夜莺监控Nightingale`
- 日常问题交流推荐加入[知识星球](https://download.flashcat.cloud/ulric/20240319095409.png),也可以加我微信 `picobyte`,备注:`夜莺加群-<公司>-<姓名>` 拉入微信群,不过研发人员主要是关注 github issue 和星球,微信群关注较少
- 日常问题交流
- QQ群730841964
- [加入微信群](https://download.flashcat.cloud/ulric/20241022141621.png),如果二维码过期了,可以联系我(我的微信:`picobyte`)拉群,备注: `夜莺互助群`
## 广受关注
[![Stargazers over time](https://api.star-history.com/svg?repos=ccfos/nightingale&type=Date)](https://star-history.com/#ccfos/nightingale&Date)
## 社区共建
- ❇️ 请阅读浏览[夜莺开源项目和社区治理架构草案](./doc/community-governance.md),真诚欢迎每一位用户、开发者、公司以及组织,使用夜莺监控、积极反馈 Bug、提交功能需求、分享最佳实践共建专业、活跃的夜莺开源社区。
- ❤️ 夜莺贡献者

View File

@@ -60,10 +60,6 @@ func (a *Alert) PreCheck(configDir string) {
a.Heartbeat.Interval = 1000
}
if a.Heartbeat.EngineName == "" {
a.Heartbeat.EngineName = "default"
}
if a.EngineDelay == 0 {
a.EngineDelay = 30
}

View File

@@ -16,7 +16,6 @@ import (
"github.com/ccfos/nightingale/v6/alert/sender"
"github.com/ccfos/nightingale/v6/conf"
"github.com/ccfos/nightingale/v6/dumper"
"github.com/ccfos/nightingale/v6/ibex"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
@@ -27,6 +26,8 @@ import (
"github.com/ccfos/nightingale/v6/pushgw/writer"
"github.com/ccfos/nightingale/v6/storage"
"github.com/ccfos/nightingale/v6/tdengine"
"github.com/flashcatcloud/ibex/src/cmd/ibex"
)
func Initialize(configDir string, cryptoKey string) (func(), error) {
@@ -40,14 +41,14 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
return nil, err
}
ctx := ctx.NewContext(context.Background(), nil, false, config.CenterApi)
var redis storage.Redis
redis, err = storage.NewRedis(config.Redis)
if err != nil {
return nil, err
}
ctx := ctx.NewContext(context.Background(), nil, redis, false, config.CenterApi)
syncStats := memsto.NewSyncStats()
alertStats := astats.NewSyncStats()
@@ -61,6 +62,7 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
userCache := memsto.NewUserCache(ctx, syncStats)
userGroupCache := memsto.NewUserGroupCache(ctx, syncStats)
taskTplsCache := memsto.NewTaskTplCache(ctx)
configCvalCache := memsto.NewCvalCache(ctx, syncStats)
promClients := prom.NewPromClient(ctx)
tdengineClients := tdengine.NewTdengineClient(ctx, config.Alert.Heartbeat)
@@ -69,17 +71,18 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache, alertRuleCache, notifyConfigCache, taskTplsCache, dsCache, ctx, promClients, tdengineClients, userCache, userGroupCache)
r := httpx.GinEngine(config.Global.RunMode, config.HTTP)
r := httpx.GinEngine(config.Global.RunMode, config.HTTP,
configCvalCache.PrintBodyPaths, configCvalCache.PrintAccessLog)
rt := router.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors)
if config.Ibex.Enable {
ibex.ServerStart(ctx, false, nil, redis, config.HTTP.APIForService.BasicAuth, config.Alert.Heartbeat, &config.CenterApi, r, nil, config.Ibex, config.HTTP.Port)
ibex.ServerStart(false, nil, redis, config.HTTP.APIForService.BasicAuth, config.Alert.Heartbeat, &config.CenterApi, r, nil, config.Ibex, config.HTTP.Port)
}
rt.Config(r)
dumper.ConfigRouter(r)
httpClean := httpx.Init(config.HTTP, context.Background(), r)
httpClean := httpx.Init(config.HTTP, r)
return func() {
logxClean()

View File

@@ -5,18 +5,20 @@ import (
"math"
"strings"
"github.com/ccfos/nightingale/v6/models"
"github.com/prometheus/common/model"
)
type AnomalyPoint struct {
Key string `json:"key"`
Labels model.Metric `json:"labels"`
Timestamp int64 `json:"timestamp"`
Value float64 `json:"value"`
Severity int `json:"severity"`
Triggered bool `json:"triggered"`
Query string `json:"query"`
Values string `json:"values"`
Key string `json:"key"`
Labels model.Metric `json:"labels"`
Timestamp int64 `json:"timestamp"`
Value float64 `json:"value"`
Severity int `json:"severity"`
Triggered bool `json:"triggered"`
Query string `json:"query"`
Values string `json:"values"`
RecoverConfig models.RecoverConfig `json:"recover_config"`
}
func NewAnomalyPoint(key string, labels map[string]string, ts int64, value float64, severity int) AnomalyPoint {

View File

@@ -286,6 +286,7 @@ func (e *Dispatch) SendCallbacks(rule *models.AlertRule, notifyTarget *NotifyTar
uids := notifyTarget.ToUidList()
urls := notifyTarget.ToCallbackList()
whMap := notifyTarget.ToWebhookMap()
for _, urlStr := range urls {
if len(urlStr) == 0 {
continue
@@ -293,6 +294,11 @@ func (e *Dispatch) SendCallbacks(rule *models.AlertRule, notifyTarget *NotifyTar
cbCtx := sender.BuildCallBackContext(e.ctx, urlStr, rule, []*models.AlertCurEvent{event}, uids, e.userCache, e.alerting.WebhookBatchSend, e.Astats)
if wh, ok := whMap[cbCtx.CallBackURL]; ok && wh.Enable {
logger.Debugf("SendCallbacks: webhook[%s] is in global conf.", cbCtx.CallBackURL)
continue
}
if strings.HasPrefix(urlStr, "${ibex}") {
e.CallBacks[models.IbexDomain].CallBack(cbCtx)
continue

View File

@@ -100,8 +100,32 @@ func (s *NotifyTarget) ToWebhookList() []*models.Webhook {
return webhooks
}
func (s *NotifyTarget) ToWebhookMap() map[string]*models.Webhook {
webhookMap := make(map[string]*models.Webhook, len(s.webhooks))
for _, wh := range s.webhooks {
if wh.Batch == 0 {
wh.Batch = 1000
}
if wh.Timeout == 0 {
wh.Timeout = 10
}
if wh.RetryCount == 0 {
wh.RetryCount = 10
}
if wh.RetryInterval == 0 {
wh.RetryInterval = 10
}
webhookMap[wh.Url] = wh
}
return webhookMap
}
func (s *NotifyTarget) ToUidList() []int64 {
uids := make([]int64, len(s.userMap))
uids := make([]int64, 0, len(s.userMap))
for uid, _ := range s.userMap {
uids = append(uids, uid)
}

View File

@@ -5,6 +5,7 @@ import (
"encoding/json"
"fmt"
"math"
"reflect"
"sort"
"strings"
"time"
@@ -160,13 +161,13 @@ func (arw *AlertRuleWorker) Eval() {
now := time.Now().Unix()
for _, point := range pointsMap {
str := fmt.Sprintf("%v", point.Value)
arw.processor.RecoverSingle(process.Hash(cachedRule.Id, arw.processor.DatasourceId(), point), now, &str)
arw.processor.RecoverSingle(true, process.Hash(cachedRule.Id, arw.processor.DatasourceId(), point), now, &str)
}
} else {
now := time.Now().Unix()
for _, point := range recoverPoints {
str := fmt.Sprintf("%v", point.Value)
arw.processor.RecoverSingle(process.Hash(cachedRule.Id, arw.processor.DatasourceId(), point), now, &str)
arw.processor.RecoverSingle(true, process.Hash(cachedRule.Id, arw.processor.DatasourceId(), point), now, &str)
}
}
@@ -267,7 +268,7 @@ func (arw *AlertRuleWorker) GetTdengineAnomalyPoint(rule *models.AlertRule, dsId
if len(ruleQuery.Queries) > 0 {
seriesStore := make(map[uint64]models.DataResp)
// 将不同查询的 hash 索引分组存放
seriesTagIndexes := make([]map[uint64][]uint64, 0)
seriesTagIndexes := make(map[string]map[uint64][]uint64)
for _, query := range ruleQuery.Queries {
seriesTagIndex := make(map[uint64][]uint64)
@@ -281,7 +282,7 @@ func (arw *AlertRuleWorker) GetTdengineAnomalyPoint(rule *models.AlertRule, dsId
continue
}
series, err := cli.Query(query)
series, err := cli.Query(query,0)
arw.processor.Stats.CounterQueryDataTotal.WithLabelValues(fmt.Sprintf("%d", arw.datasourceId)).Inc()
if err != nil {
logger.Warningf("rule_eval rid:%d query data error: %v", rule.Id, err)
@@ -292,7 +293,13 @@ func (arw *AlertRuleWorker) GetTdengineAnomalyPoint(rule *models.AlertRule, dsId
// 此条日志很重要,是告警判断的现场值
logger.Debugf("rule_eval rid:%d req:%+v resp:%+v", rule.Id, query, series)
MakeSeriesMap(series, seriesTagIndex, seriesStore)
seriesTagIndexes = append(seriesTagIndexes, seriesTagIndex)
ref, err := GetQueryRef(query)
if err != nil {
logger.Warningf("rule_eval rid:%d query ref error: %v query:%+v", rule.Id, err, query)
arw.processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.processor.DatasourceId()), GET_RULE_CONFIG).Inc()
continue
}
seriesTagIndexes[ref] = seriesTagIndex
}
points, recoverPoints = GetAnomalyPoint(rule.Id, ruleQuery, seriesTagIndexes, seriesStore)
@@ -369,11 +376,6 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) []common.Anom
}
m["ident"] = target.Ident
bg := arw.processor.BusiGroupCache.GetByBusiGroupId(target.GroupId)
if bg != nil && bg.LabelEnable == 1 {
m["busigroup"] = bg.LabelValue
}
lst = append(lst, common.NewAnomalyPoint(trigger.Type, m, now, float64(now-target.UpdateAt), trigger.Severity))
}
case "offset":
@@ -422,11 +424,6 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) []common.Anom
}
m["ident"] = host
bg := arw.processor.BusiGroupCache.GetByBusiGroupId(target.GroupId)
if bg != nil && bg.LabelEnable == 1 {
m["busigroup"] = bg.LabelValue
}
lst = append(lst, common.NewAnomalyPoint(trigger.Type, m, now, float64(offset), trigger.Severity))
}
case "pct_target_miss":
@@ -455,7 +452,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) []common.Anom
return lst
}
func GetAnomalyPoint(ruleId int64, ruleQuery models.RuleQuery, seriesTagIndexes []map[uint64][]uint64, seriesStore map[uint64]models.DataResp) ([]common.AnomalyPoint, []common.AnomalyPoint) {
func GetAnomalyPoint(ruleId int64, ruleQuery models.RuleQuery, seriesTagIndexes map[string]map[uint64][]uint64, seriesStore map[uint64]models.DataResp) ([]common.AnomalyPoint, []common.AnomalyPoint) {
points := []common.AnomalyPoint{}
recoverPoints := []common.AnomalyPoint{}
@@ -463,61 +460,13 @@ func GetAnomalyPoint(ruleId int64, ruleQuery models.RuleQuery, seriesTagIndexes
return points, recoverPoints
}
if len(seriesTagIndexes) == 0 {
return points, recoverPoints
}
for _, trigger := range ruleQuery.Triggers {
// seriesTagIndex 的 key 仅做分组使用value 为每组 series 的 hash
seriesTagIndex := make(map[uint64][]uint64)
if len(trigger.Joins) == 0 {
// 没有 join 条件,走原逻辑
last := seriesTagIndexes[0]
for i := 1; i < len(seriesTagIndexes); i++ {
last = originalJoin(last, seriesTagIndexes[i])
}
seriesTagIndex = last
} else {
// 有 join 条件,按条件依次合并
if len(seriesTagIndexes) != len(trigger.Joins)+1 {
logger.Errorf("rule_eval rid:%d queries' count: %d not match join condition's count: %d", ruleId, len(seriesTagIndexes), len(trigger.Joins))
continue
}
last := seriesTagIndexes[0]
lastRehashed := rehashSet(last, seriesStore, trigger.Joins[0].On)
for i := range trigger.Joins {
cur := seriesTagIndexes[i+1]
switch trigger.Joins[i].JoinType {
case "original":
last = originalJoin(last, cur)
case "none":
last = noneJoin(last, cur)
case "cartesian":
last = cartesianJoin(last, cur)
case "inner_join":
curRehashed := rehashSet(cur, seriesStore, trigger.Joins[i].On)
lastRehashed = onJoin(lastRehashed, curRehashed, Inner)
last = flatten(lastRehashed)
case "left_join":
curRehashed := rehashSet(cur, seriesStore, trigger.Joins[i].On)
lastRehashed = onJoin(lastRehashed, curRehashed, Left)
last = flatten(lastRehashed)
case "right_join":
curRehashed := rehashSet(cur, seriesStore, trigger.Joins[i].On)
lastRehashed = onJoin(curRehashed, lastRehashed, Right)
last = flatten(lastRehashed)
case "left_exclude":
curRehashed := rehashSet(cur, seriesStore, trigger.Joins[i].On)
lastRehashed = exclude(lastRehashed, curRehashed)
last = flatten(lastRehashed)
case "right_exclude":
curRehashed := rehashSet(cur, seriesStore, trigger.Joins[i].On)
lastRehashed = exclude(curRehashed, lastRehashed)
last = flatten(lastRehashed)
default:
logger.Warningf("rule_eval rid:%d join type:%s not support", ruleId, trigger.Joins[i].JoinType)
}
}
seriesTagIndex = last
}
seriesTagIndex := ProcessJoins(ruleId, trigger, seriesTagIndexes, seriesStore)
for _, seriesHash := range seriesTagIndex {
sort.Slice(seriesHash, func(i, j int) bool {
@@ -564,23 +513,37 @@ func GetAnomalyPoint(ruleId int64, ruleQuery models.RuleQuery, seriesTagIndexes
}
point := common.AnomalyPoint{
Key: sample.MetricName(),
Labels: sample.Metric,
Timestamp: int64(ts),
Value: value,
Values: values,
Severity: trigger.Severity,
Triggered: isTriggered,
Query: fmt.Sprintf("query:%+v trigger:%+v", ruleQuery.Queries, trigger),
Key: sample.MetricName(),
Labels: sample.Metric,
Timestamp: int64(ts),
Value: value,
Values: values,
Severity: trigger.Severity,
Triggered: isTriggered,
Query: fmt.Sprintf("query:%+v trigger:%+v", ruleQuery.Queries, trigger),
RecoverConfig: trigger.RecoverConfig,
}
if sample.Query != "" {
point.Query = sample.Query
}
// 恢复条件判断经过讨论是只在表达式模式下支持,表达式模式会通过 isTriggered 判断是告警点还是恢复点
// 1. 不设置恢复判断,满足恢复条件产生 recoverPoint 恢复,无数据不产生 anomalyPoint 恢复
// 2. 设置满足条件才恢复,仅可通过产生 recoverPoint 恢复,不能通过不产生 anomalyPoint 恢复
// 3. 设置无数据不恢复,仅可通过产生 recoverPoint 恢复,不产生 anomalyPoint 恢复
if isTriggered {
points = append(points, point)
} else {
switch trigger.RecoverConfig.JudgeType {
case models.Origin:
// 对齐原实现 do nothing
case models.RecoverOnCondition:
// 额外判断恢复条件,满足才恢复
fulfill := parser.Calc(trigger.RecoverConfig.RecoverExp, m)
if !fulfill {
continue
}
}
recoverPoints = append(recoverPoints, point)
}
}
@@ -613,7 +576,7 @@ func flatten(rehashed map[uint64][][]uint64) map[uint64][]uint64 {
// [[A3{data_base=2, table=board}B2{data_base=2, table=alert}][A4{data_base=2, table=alert}B2{data_base=2, table=alert}]]
func onJoin(reHashTagIndex1 map[uint64][][]uint64, reHashTagIndex2 map[uint64][][]uint64, joinType JoinType) map[uint64][][]uint64 {
reHashTagIndex := make(map[uint64][][]uint64)
for rehash, _ := range reHashTagIndex1 {
for rehash := range reHashTagIndex1 {
if _, ok := reHashTagIndex2[rehash]; ok {
// 若有 rehash 相同的记录,两两合并
for i1 := range reHashTagIndex1[rehash] {
@@ -656,6 +619,7 @@ func rehashSet(seriesTagIndex1 map[uint64][]uint64, seriesStore map[uint64]model
if !exists {
continue
}
rehash := hash.GetTargetTagHash(series.Metric, on)
if _, ok := reHashTagIndex[rehash]; !ok {
reHashTagIndex[rehash] = make([][]uint64, 0)
@@ -747,3 +711,100 @@ func mergeNewArray(arg ...[]uint64) []uint64 {
}
return res
}
func ProcessJoins(ruleId int64, trigger models.Trigger, seriesTagIndexes map[string]map[uint64][]uint64, seriesStore map[uint64]models.DataResp) map[uint64][]uint64 {
last := make(map[uint64][]uint64)
if len(seriesTagIndexes) == 0 {
return last
}
if len(trigger.Joins) == 0 {
idx := 0
for _, seriesTagIndex := range seriesTagIndexes {
if idx == 0 {
last = seriesTagIndex
} else {
last = originalJoin(last, seriesTagIndex)
}
idx++
}
return last
}
// 有 join 条件,按条件依次合并
if len(seriesTagIndexes) < len(trigger.Joins)+1 {
logger.Errorf("rule_eval rid:%d queries' count: %d not match join condition's count: %d", ruleId, len(seriesTagIndexes), len(trigger.Joins))
return nil
}
last = seriesTagIndexes[trigger.JoinRef]
lastRehashed := rehashSet(last, seriesStore, trigger.Joins[0].On)
for i := range trigger.Joins {
cur := seriesTagIndexes[trigger.Joins[i].Ref]
switch trigger.Joins[i].JoinType {
case "original":
last = originalJoin(last, cur)
case "none":
last = noneJoin(last, cur)
case "cartesian":
last = cartesianJoin(last, cur)
case "inner_join":
curRehashed := rehashSet(cur, seriesStore, trigger.Joins[i].On)
lastRehashed = onJoin(lastRehashed, curRehashed, Inner)
last = flatten(lastRehashed)
case "left_join":
curRehashed := rehashSet(cur, seriesStore, trigger.Joins[i].On)
lastRehashed = onJoin(lastRehashed, curRehashed, Left)
last = flatten(lastRehashed)
case "right_join":
curRehashed := rehashSet(cur, seriesStore, trigger.Joins[i].On)
lastRehashed = onJoin(curRehashed, lastRehashed, Right)
last = flatten(lastRehashed)
case "left_exclude":
curRehashed := rehashSet(cur, seriesStore, trigger.Joins[i].On)
lastRehashed = exclude(lastRehashed, curRehashed)
last = flatten(lastRehashed)
case "right_exclude":
curRehashed := rehashSet(cur, seriesStore, trigger.Joins[i].On)
lastRehashed = exclude(curRehashed, lastRehashed)
last = flatten(lastRehashed)
default:
logger.Warningf("rule_eval rid:%d join type:%s not support", ruleId, trigger.Joins[i].JoinType)
}
}
return last
}
func GetQueryRef(query interface{}) (string, error) {
// 首先检查是否为 map
if m, ok := query.(map[string]interface{}); ok {
if ref, exists := m["ref"]; exists {
if refStr, ok := ref.(string); ok {
return refStr, nil
}
return "", fmt.Errorf("ref 字段不是字符串类型")
}
return "", fmt.Errorf("query 中没有找到 ref 字段")
}
// 如果不是 map则按原来的方式处理结构体
v := reflect.ValueOf(query)
if v.Kind() == reflect.Ptr {
v = v.Elem()
}
if v.Kind() != reflect.Struct {
return "", fmt.Errorf("query not a struct or map")
}
refField := v.FieldByName("Ref")
if !refField.IsValid() {
return "", fmt.Errorf("not find ref field")
}
if refField.Kind() != reflect.String {
return "", fmt.Errorf("ref not a string")
}
return refField.String(), nil
}

View File

@@ -114,7 +114,7 @@ func BgNotMatchMuteStrategy(rule *models.AlertRule, event *models.AlertCurEvent,
target, exists := targetCache.Get(ident)
// 对于包含ident的告警事件check一下ident所属bg和rule所属bg是否相同
// 如果告警规则选择了只在本BG生效那其他BG的机器就不能因此规则产生告警
if exists && target.GroupId != rule.GroupId {
if exists && !target.MatchGroupId(rule.GroupId) {
logger.Debugf("[%s] mute: rule_eval:%d cluster:%s", "BgNotMatchMuteStrategy", rule.Id, event.Cluster)
return true
}

View File

@@ -170,7 +170,9 @@ func (p *Processor) Handle(anomalyPoints []common.AnomalyPoint, from string, inh
p.handleEvent(events)
}
p.HandleRecover(alertingKeys, now, inhibit)
if from == "inner" {
p.HandleRecover(alertingKeys, now, inhibit)
}
}
func (p *Processor) BuildEvent(anomalyPoint common.AnomalyPoint, from string, now int64) *models.AlertCurEvent {
@@ -211,6 +213,15 @@ func (p *Processor) BuildEvent(anomalyPoint common.AnomalyPoint, from string, no
event.Severity = anomalyPoint.Severity
event.ExtraConfig = p.rule.ExtraConfigJSON
event.PromQl = anomalyPoint.Query
event.RecoverConfig = anomalyPoint.RecoverConfig
if p.target != "" {
if pt, exist := p.TargetCache.Get(p.target); exist {
event.Target = pt
} else {
logger.Infof("Target[ident: %s] doesn't exist in cache.", p.target)
}
}
if event.TriggerValues != "" && strings.Count(event.TriggerValues, "$") > 1 {
// TriggerValues 有多个变量,将多个变量都放到 TriggerValue 中
@@ -282,7 +293,7 @@ func (p *Processor) HandleRecover(alertingKeys map[string]struct{}, now int64, i
}
hashArr := make([]string, 0, len(alertingKeys))
for hash := range p.fires.GetAll() {
for hash, _ := range p.fires.GetAll() {
if _, has := alertingKeys[hash]; has {
continue
}
@@ -301,7 +312,7 @@ func (p *Processor) HandleRecoverEvent(hashArr []string, now int64, inhibit bool
if !inhibit {
for _, hash := range hashArr {
p.RecoverSingle(hash, now, nil)
p.RecoverSingle(false, hash, now, nil)
}
return
}
@@ -329,11 +340,11 @@ func (p *Processor) HandleRecoverEvent(hashArr []string, now int64, inhibit bool
}
for _, event := range eventMap {
p.RecoverSingle(event.Hash, now, nil)
p.RecoverSingle(false, event.Hash, now, nil)
}
}
func (p *Processor) RecoverSingle(hash string, now int64, value *string, values ...string) {
func (p *Processor) RecoverSingle(byRecover bool, hash string, now int64, value *string, values ...string) {
cachedRule := p.rule
if cachedRule == nil {
return
@@ -359,6 +370,12 @@ func (p *Processor) RecoverSingle(hash string, now int64, value *string, values
}
}
// 如果设置了恢复条件,则不能在此处恢复,必须依靠 recoverPoint 来恢复
if event.RecoverConfig.JudgeType != models.Origin && !byRecover {
logger.Debugf("rule_eval:%s event:%v not recover", p.Key(), event)
return
}
if value != nil {
event.TriggerValue = *value
if len(values) > 0 {
@@ -497,6 +514,7 @@ func (p *Processor) pushEventToQueue(e *models.AlertCurEvent) {
func (p *Processor) RecoverAlertCurEventFromDb() {
p.pendings = NewAlertCurEventMap(nil)
p.pendingsUseByRecover = NewAlertCurEventMap(nil)
curEvents, err := models.AlertCurEventGetByRuleIdAndDsId(p.ctx, p.rule.Id, p.datasourceId)
if err != nil {
@@ -518,6 +536,11 @@ func (p *Processor) RecoverAlertCurEventFromDb() {
}
event.DB2Mem()
target, exists := p.TargetCache.Get(event.TargetIdent)
if exists {
event.Target = target
}
fireMap[event.Hash] = event
e := *event
pendingsUseByRecoverMap[event.Hash] = &e

View File

@@ -56,11 +56,12 @@ func (rrc *RecordRuleContext) Key() string {
}
func (rrc *RecordRuleContext) Hash() string {
return str.MD5(fmt.Sprintf("%d_%s_%s_%d",
return str.MD5(fmt.Sprintf("%d_%s_%s_%d_%s",
rrc.rule.Id,
rrc.rule.CronPattern,
rrc.rule.PromQl,
rrc.datasourceId,
rrc.rule.AppendTags,
))
}

View File

@@ -34,7 +34,7 @@ func (rt *Router) pushEventToQueue(c *gin.Context) {
continue
}
arr := strings.Split(pair, "=")
arr := strings.SplitN(pair, "=", 2)
if len(arr) != 2 {
continue
}
@@ -129,7 +129,7 @@ func (rt *Router) makeEvent(c *gin.Context) {
} else {
for _, vector := range events[i].AnomalyPoints {
readableString := vector.ReadableValue()
go ruleWorker.RecoverSingle(process.Hash(events[i].RuleId, events[i].DatasourceId, vector), vector.Timestamp, &readableString)
go ruleWorker.RecoverSingle(false, process.Hash(events[i].RuleId, events[i].DatasourceId, vector), vector.Timestamp, &readableString)
}
}
}

View File

@@ -123,7 +123,7 @@ func InitEmailSender(ctx *ctx.Context, ncc *memsto.NotifyConfigCacheType) {
mailch = make(chan *EmailContext, 100000)
go updateSmtp(ctx, ncc)
smtpConfig = ncc.GetSMTP()
startEmailSender(ctx, smtpConfig)
go startEmailSender(ctx, smtpConfig)
}
func updateSmtp(ctx *ctx.Context, ncc *memsto.NotifyConfigCacheType) {
@@ -143,6 +143,7 @@ func startEmailSender(ctx *ctx.Context, smtp aconf.SMTPConfig) {
conf := smtp
if conf.Host == "" || conf.Port == 0 {
logger.Warning("SMTP configurations invalid")
<-mailQuit
return
}
logger.Infof("start email sender... conf.Host:%+v,conf.Port:%+v", conf.Host, conf.Port)

View File

@@ -56,8 +56,8 @@ const (
Triggered = "triggered"
)
var (
body = feishuCard{
func createFeishuCardBody() feishuCard {
return feishuCard{
feishu: feishu{Msgtype: "interactive"},
Card: Cards{
Config: Conf{
@@ -90,7 +90,7 @@ var (
},
},
}
)
}
func (fs *FeishuCardSender) CallBack(ctx CallBackContext) {
if len(ctx.Events) == 0 || len(ctx.CallBackURL) == 0 {
@@ -121,6 +121,7 @@ func (fs *FeishuCardSender) CallBack(ctx CallBackContext) {
}
SendTitle := fmt.Sprintf("🔔 %s", ctx.Events[0].RuleName)
body := createFeishuCardBody()
body.Card.Header.Title.Content = SendTitle
body.Card.Header.Template = color
body.Card.Elements[0].Text.Content = message
@@ -153,6 +154,7 @@ func (fs *FeishuCardSender) Send(ctx MessageContext) {
}
SendTitle := fmt.Sprintf("🔔 %s", ctx.Events[0].RuleName)
body := createFeishuCardBody()
body.Card.Header.Title.Content = SendTitle
body.Card.Header.Template = color
body.Card.Elements[0].Text.Content = message

View File

@@ -12,7 +12,8 @@ import (
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/storage"
imodels "github.com/flashcatcloud/ibex/src/models"
"github.com/flashcatcloud/ibex/src/storage"
"github.com/toolkits/pkg/logger"
)
@@ -42,7 +43,7 @@ func (c *IbexCallBacker) CallBack(ctx CallBackContext) {
}
func (c *IbexCallBacker) handleIbex(ctx *ctx.Context, url string, event *models.AlertCurEvent) {
if models.DB(ctx) == nil && ctx.IsCenter {
if imodels.DB() == nil && ctx.IsCenter {
logger.Warning("event_callback_ibex: db is nil")
return
}
@@ -107,7 +108,7 @@ func CallIbex(ctx *ctx.Context, id int64, host string,
continue
}
arr := strings.Split(pair, "=")
arr := strings.SplitN(pair, "=", 2)
if len(arr) != 2 {
continue
}
@@ -141,7 +142,7 @@ func CallIbex(ctx *ctx.Context, id int64, host string,
AlertTriggered: true,
}
id, err = TaskAdd(ctx, in, tpl.UpdateBy, ctx.IsCenter)
id, err = TaskAdd(in, tpl.UpdateBy, ctx.IsCenter)
if err != nil {
logger.Errorf("event_callback_ibex: call ibex fail: %v", err)
return
@@ -180,16 +181,21 @@ func canDoIbex(username string, tpl *models.TaskTpl, host string, targetCache *m
return false, nil
}
return target.GroupId == tpl.GroupId, nil
return target.MatchGroupId(tpl.GroupId), nil
}
func TaskAdd(ctx *ctx.Context, f models.TaskForm, authUser string, isCenter bool) (int64, error) {
func TaskAdd(f models.TaskForm, authUser string, isCenter bool) (int64, error) {
if storage.Cache == nil {
logger.Warning("event_callback_ibex: redis cache is nil")
return 0, fmt.Errorf("redis cache is nil")
}
hosts := cleanHosts(f.Hosts)
if len(hosts) == 0 {
return 0, fmt.Errorf("arg(hosts) empty")
}
taskMeta := &models.TaskMeta{
taskMeta := &imodels.TaskMeta{
Title: f.Title,
Account: f.Account,
Batch: f.Batch,
@@ -212,34 +218,34 @@ func TaskAdd(ctx *ctx.Context, f models.TaskForm, authUser string, isCenter bool
// 任务类型分为"告警规则触发"和"n9e center用户下发"两种;
// 边缘机房"告警规则触发"的任务不需要规划并且它可能是失联的无法使用db资源所以放入redis缓存中直接下发给agentd执行
if !isCenter && f.AlertTriggered {
if err := taskMeta.Create(ctx); err != nil {
if err := taskMeta.Create(); err != nil {
// 当网络不连通时生成唯一的id防止边缘机房中不同任务的id相同
// 方法是redis自增id去防止同一个机房的不同n9e edge生成的id相同
// 但没法防止不同边缘机房生成同样的id所以生成id的数据不会上报存入数据库只用于闭环执行。
taskMeta.Id, err = storage.IdGet(ctx.Redis)
taskMeta.Id, err = storage.IdGet()
if err != nil {
return 0, err
}
}
taskHost := models.TaskHost{
taskHost := imodels.TaskHost{
Id: taskMeta.Id,
Host: hosts[0],
Status: "running",
}
if err = taskHost.Create(ctx); err != nil {
if err = taskHost.Create(); err != nil {
logger.Warningf("task_add_fail: authUser=%s title=%s err=%s", authUser, taskMeta.Title, err.Error())
}
// 缓存任务元信息和待下发的任务
err = taskMeta.Cache(ctx, hosts[0])
err = taskMeta.Cache(hosts[0])
if err != nil {
return 0, err
}
} else {
// 如果是中心机房,还是保持之前的逻辑
err = taskMeta.Save(ctx, hosts, f.Action)
err = taskMeta.Save(hosts, f.Action)
if err != nil {
return 0, err
}

View File

@@ -42,6 +42,7 @@ func (fs *LarkCardSender) CallBack(ctx CallBackContext) {
}
SendTitle := fmt.Sprintf("🔔 %s", ctx.Events[0].RuleName)
body := createFeishuCardBody()
body.Card.Header.Title.Content = SendTitle
body.Card.Header.Template = color
body.Card.Elements[0].Text.Content = message
@@ -74,6 +75,7 @@ func (fs *LarkCardSender) Send(ctx MessageContext) {
}
SendTitle := fmt.Sprintf("🔔 %s", ctx.Events[0].RuleName)
body := createFeishuCardBody()
body.Card.Header.Title.Content = SendTitle
body.Card.Header.Template = color
body.Card.Elements[0].Text.Content = message

View File

@@ -166,10 +166,10 @@ func StartConsumer(ctx *ctx.Context, queue *WebhookQueue, popSize int, webhook *
retryCount := 0
for retryCount < webhook.RetryCount {
needRetry, res, err := sendWebhook(webhook, events, stats)
go RecordEvents(ctx, webhook, events, stats, res, err)
if !needRetry {
break
}
go RecordEvents(ctx, webhook, events, stats, res, err)
retryCount++
time.Sleep(time.Second * time.Duration(webhook.RetryInterval) * time.Duration(retryCount))
}

View File

@@ -14,6 +14,7 @@ type Center struct {
FlashDuty FlashDuty
EventHistoryGroupView bool
CleanNotifyRecordDay int
MigrateBusiGroupLabel bool
}
type Plugin struct {

View File

@@ -18,7 +18,6 @@ import (
"github.com/ccfos/nightingale/v6/conf"
"github.com/ccfos/nightingale/v6/cron"
"github.com/ccfos/nightingale/v6/dumper"
"github.com/ccfos/nightingale/v6/ibex"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/models/migrate"
@@ -34,6 +33,8 @@ import (
"github.com/ccfos/nightingale/v6/pushgw/writer"
"github.com/ccfos/nightingale/v6/storage"
"github.com/ccfos/nightingale/v6/tdengine"
"github.com/flashcatcloud/ibex/src/cmd/ibex"
)
func Initialize(configDir string, cryptoKey string) (func(), error) {
@@ -47,6 +48,10 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
cconf.MergeOperationConf()
if config.Alert.Heartbeat.EngineName == "" {
config.Alert.Heartbeat.EngineName = "default"
}
logxClean, err := logx.Init(config.Log)
if err != nil {
return nil, err
@@ -60,14 +65,7 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
if err != nil {
return nil, err
}
var redis storage.Redis
redis, err = storage.NewRedis(config.Redis)
if err != nil {
return nil, err
}
ctx := ctx.NewContext(context.Background(), db, redis, true)
ctx := ctx.NewContext(context.Background(), db, true)
migrate.Migrate(db)
models.InitRoot(ctx)
@@ -78,7 +76,12 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
return nil, err
}
integration.Init(ctx, config.Center.BuiltinIntegrationsDir)
go integration.Init(ctx, config.Center.BuiltinIntegrationsDir)
var redis storage.Redis
redis, err = storage.NewRedis(config.Redis)
if err != nil {
return nil, err
}
metas := metas.New(redis)
idents := idents.New(ctx, redis)
@@ -96,6 +99,7 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
userCache := memsto.NewUserCache(ctx, syncStats)
userGroupCache := memsto.NewUserGroupCache(ctx, syncStats)
taskTplCache := memsto.NewTaskTplCache(ctx)
configCvalCache := memsto.NewCvalCache(ctx, syncStats)
sso := sso.Init(config.Center, ctx, configCache)
promClients := prom.NewPromClient(ctx)
@@ -111,11 +115,18 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
go cron.CleanNotifyRecord(ctx, config.Center.CleanNotifyRecordDay)
alertrtRouter := alertrt.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors)
centerRouter := centerrt.New(config.HTTP, config.Center, config.Alert, config.Ibex, cconf.Operations, dsCache, notifyConfigCache, promClients, tdengineClients,
centerRouter := centerrt.New(config.HTTP, config.Center, config.Alert, config.Ibex,
cconf.Operations, dsCache, notifyConfigCache, promClients, tdengineClients,
redis, sso, ctx, metas, idents, targetCache, userCache, userGroupCache)
pushgwRouter := pushgwrt.New(config.HTTP, config.Pushgw, config.Alert, targetCache, busiGroupCache, idents, metas, writers, ctx)
r := httpx.GinEngine(config.Global.RunMode, config.HTTP)
go func() {
if models.CanMigrateBg(ctx) {
models.MigrateBg(ctx, pushgwRouter.Pushgw.BusiGroupLabelKey)
}
}()
r := httpx.GinEngine(config.Global.RunMode, config.HTTP, configCvalCache.PrintBodyPaths, configCvalCache.PrintAccessLog)
centerRouter.Config(r)
alertrtRouter.Config(r)
@@ -124,10 +135,10 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
if config.Ibex.Enable {
migrate.MigrateIbexTables(db)
ibex.ServerStart(ctx, true, db, redis, config.HTTP.APIForService.BasicAuth, config.Alert.Heartbeat, &config.CenterApi, r, centerRouter, config.Ibex, config.HTTP.Port)
ibex.ServerStart(true, db, redis, config.HTTP.APIForService.BasicAuth, config.Alert.Heartbeat, &config.CenterApi, r, centerRouter, config.Ibex, config.HTTP.Port)
}
httpClean := httpx.Init(config.HTTP, context.Background(), r)
httpClean := httpx.Init(config.HTTP, r)
return func() {
logxClean()

View File

@@ -16,6 +16,12 @@ import (
const SYSTEM = "system"
func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
err := models.InitBuiltinPayloads(ctx)
if err != nil {
logger.Warning("init old builtinPayloads fail ", err)
return
}
fp := builtinIntegrationsDir
if fp == "" {
fp = path.Join(runner.Cwd, "integrations")
@@ -92,6 +98,7 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
logger.Warning("update builtin component fail ", old, err)
}
}
component.ID = old.ID
}
// delete uuid is emtpy
@@ -141,13 +148,13 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
cate := strings.Replace(f, ".json", "", -1)
builtinAlert := models.BuiltinPayload{
Component: component.Ident,
Type: "alert",
Cate: cate,
Name: alert.Name,
Tags: alert.AppendTags,
Content: string(content),
UUID: alert.UUID,
ComponentID: component.ID,
Type: "alert",
Cate: cate,
Name: alert.Name,
Tags: alert.AppendTags,
Content: string(content),
UUID: alert.UUID,
}
old, err := models.BuiltinPayloadGet(ctx, "uuid = ?", alert.UUID)
@@ -165,6 +172,7 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
}
if old.UpdatedBy == SYSTEM {
old.ComponentID = component.ID
old.Content = string(content)
old.Name = alert.Name
old.Tags = alert.AppendTags
@@ -231,13 +239,13 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
}
builtinDashboard := models.BuiltinPayload{
Component: component.Ident,
Type: "dashboard",
Cate: "",
Name: dashboard.Name,
Tags: dashboard.Tags,
Content: string(content),
UUID: dashboard.UUID,
ComponentID: component.ID,
Type: "dashboard",
Cate: "",
Name: dashboard.Name,
Tags: dashboard.Tags,
Content: string(content),
UUID: dashboard.UUID,
}
old, err := models.BuiltinPayloadGet(ctx, "uuid = ?", dashboard.UUID)
@@ -255,6 +263,7 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
}
if old.UpdatedBy == SYSTEM {
old.ComponentID = component.ID
old.Content = string(content)
old.Name = dashboard.Name
old.Tags = dashboard.Tags

View File

@@ -16,6 +16,7 @@ import (
"github.com/ccfos/nightingale/v6/conf"
_ "github.com/ccfos/nightingale/v6/front/statik"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/aop"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/httpx"
@@ -51,9 +52,14 @@ type Router struct {
UserGroupCache *memsto.UserGroupCacheType
Ctx *ctx.Context
HeartbeatHook HeartbeatHookFunc
TargetDeleteHook models.TargetDeleteHookFunc
}
func New(httpConfig httpx.Config, center cconf.Center, alert aconf.Alert, ibex conf.Ibex, operations cconf.Operation, ds *memsto.DatasourceCacheType, ncc *memsto.NotifyConfigCacheType, pc *prom.PromClientMap, tdendgineClients *tdengine.TdengineClientMap, redis storage.Redis, sso *sso.SsoClient, ctx *ctx.Context, metaSet *metas.Set, idents *idents.Set, tc *memsto.TargetCacheType, uc *memsto.UserCacheType, ugc *memsto.UserGroupCacheType) *Router {
func New(httpConfig httpx.Config, center cconf.Center, alert aconf.Alert, ibex conf.Ibex,
operations cconf.Operation, ds *memsto.DatasourceCacheType, ncc *memsto.NotifyConfigCacheType,
pc *prom.PromClientMap, tdendgineClients *tdengine.TdengineClientMap, redis storage.Redis,
sso *sso.SsoClient, ctx *ctx.Context, metaSet *metas.Set, idents *idents.Set,
tc *memsto.TargetCacheType, uc *memsto.UserCacheType, ugc *memsto.UserGroupCacheType) *Router {
return &Router{
HTTP: httpConfig,
Center: center,
@@ -73,9 +79,14 @@ func New(httpConfig httpx.Config, center cconf.Center, alert aconf.Alert, ibex c
UserGroupCache: ugc,
Ctx: ctx,
HeartbeatHook: func(ident string) map[string]interface{} { return nil },
TargetDeleteHook: emptyDeleteHook,
}
}
func emptyDeleteHook(ctx *ctx.Context, idents []string) error {
return nil
}
func stat() gin.HandlerFunc {
return func(c *gin.Context) {
start := time.Now()
@@ -276,7 +287,7 @@ func (rt *Router) Config(r *gin.Engine) {
pages.POST("/targets/tags", rt.auth(), rt.user(), rt.perm("/targets/put"), rt.targetBindTagsByFE)
pages.DELETE("/targets/tags", rt.auth(), rt.user(), rt.perm("/targets/put"), rt.targetUnbindTagsByFE)
pages.PUT("/targets/note", rt.auth(), rt.user(), rt.perm("/targets/put"), rt.targetUpdateNote)
pages.PUT("/targets/bgid", rt.auth(), rt.user(), rt.perm("/targets/put"), rt.targetUpdateBgid)
pages.PUT("/targets/bgids", rt.auth(), rt.user(), rt.perm("/targets/put"), rt.targetBindBgids)
pages.POST("/builtin-cate-favorite", rt.auth(), rt.user(), rt.builtinCateFavoriteAdd)
pages.DELETE("/builtin-cate-favorite/:name", rt.auth(), rt.user(), rt.builtinCateFavoriteDel)
@@ -297,6 +308,7 @@ func (rt *Router) Config(r *gin.Engine) {
pages.POST("/busi-group/:id/board/:bid/clone", rt.auth(), rt.user(), rt.perm("/dashboards/add"), rt.bgrw(), rt.boardClone)
pages.POST("/busi-groups/boards/clones", rt.auth(), rt.user(), rt.perm("/dashboards/add"), rt.boardBatchClone)
pages.GET("/boards", rt.auth(), rt.user(), rt.boardGetsByBids)
pages.GET("/board/:bid", rt.boardGet)
pages.GET("/board/:bid/pure", rt.boardPureGet)
pages.PUT("/board/:bid", rt.auth(), rt.user(), rt.perm("/dashboards/put"), rt.boardPut)
@@ -324,7 +336,7 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/alert-rule/:arid/pure", rt.auth(), rt.user(), rt.perm("/alert-rules"), rt.alertRulePureGet)
pages.PUT("/busi-group/alert-rule/validate", rt.auth(), rt.user(), rt.perm("/alert-rules/put"), rt.alertRuleValidation)
pages.POST("/relabel-test", rt.auth(), rt.user(), rt.relabelTest)
pages.POST("/busi-group/:id/alert-rules/clone", rt.auth(), rt.user(), rt.perm("/alert-rules/post"), rt.bgrw(), rt.cloneToMachine)
pages.POST("/busi-group/:id/alert-rules/clone", rt.auth(), rt.user(), rt.perm("/alert-rules/add"), rt.bgrw(), rt.cloneToMachine)
pages.GET("/busi-groups/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules"), rt.recordingRuleGetsByGids)
pages.GET("/busi-group/:id/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules"), rt.recordingRuleGets)
@@ -463,6 +475,7 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/builtin-payload/:id", rt.auth(), rt.user(), rt.perm("/built-in-components"), rt.builtinPayloadGet)
pages.PUT("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/built-in-components/put"), rt.builtinPayloadsPut)
pages.DELETE("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/built-in-components/del"), rt.builtinPayloadsDel)
pages.GET("/builtin-payload", rt.auth(), rt.user(), rt.builtinPayloadsGetByUUIDOrID)
}
r.GET("/api/n9e/versions", func(c *gin.Context) {
@@ -539,6 +552,7 @@ func (rt *Router) Config(r *gin.Engine) {
service.GET("/config/:id", rt.configGet)
service.GET("/configs", rt.configsGet)
service.GET("/config", rt.configGetByKey)
service.GET("/all-configs", rt.configGetAll)
service.PUT("/configs", rt.configsPut)
service.POST("/configs", rt.configsPost)
service.DELETE("/configs", rt.configsDel)
@@ -559,6 +573,8 @@ func (rt *Router) Config(r *gin.Engine) {
service.POST("/notify-record", rt.notificationRecordAdd)
service.GET("/alert-cur-events-del-by-hash", rt.alertCurEventDelByHash)
service.POST("/center/heartbeat", rt.heartbeat)
}
}

View File

@@ -65,7 +65,8 @@ func (rt *Router) alertCurEventsCard(c *gin.Context) {
ginx.Dangerous(err)
// 最多获取50000个获取太多也没啥意义
list, err := models.AlertCurEventGets(rt.Ctx, prods, bgids, stime, etime, severity, dsIds, cates, query, 50000, 0)
list, err := models.AlertCurEventsGet(rt.Ctx, prods, bgids, stime, etime, severity, dsIds,
cates, 0, query, 50000, 0)
ginx.Dangerous(err)
cardmap := make(map[string]*AlertCard)
@@ -162,13 +163,17 @@ func (rt *Router) alertCurEventsList(c *gin.Context) {
cates = strings.Split(cate, ",")
}
ruleId := ginx.QueryInt64(c, "rid", 0)
bgids, err := GetBusinessGroupIds(c, rt.Ctx, rt.Center.EventHistoryGroupView)
ginx.Dangerous(err)
total, err := models.AlertCurEventTotal(rt.Ctx, prods, bgids, stime, etime, severity, dsIds, cates, query)
total, err := models.AlertCurEventTotal(rt.Ctx, prods, bgids, stime, etime, severity, dsIds,
cates, ruleId, query)
ginx.Dangerous(err)
list, err := models.AlertCurEventGets(rt.Ctx, prods, bgids, stime, etime, severity, dsIds, cates, query, limit, ginx.Offset(c, limit))
list, err := models.AlertCurEventsGet(rt.Ctx, prods, bgids, stime, etime, severity, dsIds,
cates, ruleId, query, limit, ginx.Offset(c, limit))
ginx.Dangerous(err)
cache := make(map[int64]*models.UserGroup)
@@ -201,7 +206,9 @@ func (rt *Router) checkCurEventBusiGroupRWPermission(c *gin.Context, ids []int64
for i := 0; i < len(ids); i++ {
event, err := models.AlertCurEventGetById(rt.Ctx, ids[i])
ginx.Dangerous(err)
if event == nil {
continue
}
if _, has := set[event.GroupId]; !has {
rt.bgrwCheck(c, event.GroupId)
set[event.GroupId] = struct{}{}
@@ -227,6 +234,7 @@ func (rt *Router) alertCurEventGet(c *gin.Context) {
event.RuleConfigJson = ruleConfig
}
event.LastEvalTime = event.TriggerTime
ginx.NewRender(c).Data(event, nil)
}

View File

@@ -54,13 +54,17 @@ func (rt *Router) alertHisEventsList(c *gin.Context) {
cates = strings.Split(cate, ",")
}
ruleId := ginx.QueryInt64(c, "rid", 0)
bgids, err := GetBusinessGroupIds(c, rt.Ctx, rt.Center.EventHistoryGroupView)
ginx.Dangerous(err)
total, err := models.AlertHisEventTotal(rt.Ctx, prods, bgids, stime, etime, severity, recovered, dsIds, cates, query)
total, err := models.AlertHisEventTotal(rt.Ctx, prods, bgids, stime, etime, severity,
recovered, dsIds, cates, ruleId, query)
ginx.Dangerous(err)
list, err := models.AlertHisEventGets(rt.Ctx, prods, bgids, stime, etime, severity, recovered, dsIds, cates, query, limit, ginx.Offset(c, limit))
list, err := models.AlertHisEventGets(rt.Ctx, prods, bgids, stime, etime, severity, recovered,
dsIds, cates, ruleId, query, limit, ginx.Offset(c, limit))
ginx.Dangerous(err)
cache := make(map[int64]*models.UserGroup)

View File

@@ -37,6 +37,18 @@ func (rt *Router) alertRuleGets(c *gin.Context) {
ginx.NewRender(c).Data(ars, err)
}
func getAlertCueEventTimeRange(c *gin.Context) (stime, etime int64) {
stime = ginx.QueryInt64(c, "stime", 0)
etime = ginx.QueryInt64(c, "etime", 0)
if etime == 0 {
etime = time.Now().Unix()
}
if stime == 0 || stime >= etime {
stime = etime - 30*24*int64(time.Hour.Seconds())
}
return
}
func (rt *Router) alertRuleGetsByGids(c *gin.Context) {
gids := str.IdsInt64(ginx.QueryStr(c, "gids", ""), ",")
if len(gids) > 0 {
@@ -60,9 +72,30 @@ func (rt *Router) alertRuleGetsByGids(c *gin.Context) {
ars, err := models.AlertRuleGetsByBGIds(rt.Ctx, gids)
if err == nil {
cache := make(map[int64]*models.UserGroup)
rids := make([]int64, 0, len(ars))
names := make([]string, 0, len(ars))
for i := 0; i < len(ars); i++ {
ars[i].FillNotifyGroups(rt.Ctx, cache)
ars[i].FillSeverities()
rids = append(rids, ars[i].Id)
names = append(names, ars[i].UpdateBy)
}
stime, etime := getAlertCueEventTimeRange(c)
cnt := models.AlertCurEventCountByRuleId(rt.Ctx, rids, stime, etime)
if cnt != nil {
for i := 0; i < len(ars); i++ {
ars[i].CurEventCount = cnt[ars[i].Id]
}
}
users := models.UserMapGet(rt.Ctx, "username in (?)", names)
if users != nil {
for i := 0; i < len(ars); i++ {
if user, exist := users[ars[i].UpdateBy]; exist {
ars[i].UpdateByNickname = user.Nickname
}
}
}
}
ginx.NewRender(c).Data(ars, err)
@@ -492,7 +525,7 @@ func (rt *Router) relabelTest(c *gin.Context) {
labels := make([]prompb.Label, len(f.Tags))
for i, tag := range f.Tags {
label := strings.Split(tag, "=")
label := strings.SplitN(tag, "=", 2)
if len(label) != 2 {
ginx.Bomb(http.StatusBadRequest, "tag:%s format error", tag)
}
@@ -529,6 +562,15 @@ type identListForm struct {
IdentList []string `json:"ident_list"`
}
func containsIdentOperator(s string) bool {
pattern := `ident\s*(!=|!~|=~)`
matched, err := regexp.MatchString(pattern, s)
if err != nil {
return false
}
return matched
}
func (rt *Router) cloneToMachine(c *gin.Context) {
var f identListForm
ginx.BindJSON(c, &f)
@@ -550,10 +592,17 @@ func (rt *Router) cloneToMachine(c *gin.Context) {
reterr := make(map[string]map[string]string)
for i := range alertRules {
reterr[alertRules[i].Name] = make(map[string]string)
errMsg := make(map[string]string)
if alertRules[i].Cate != "prometheus" {
reterr[alertRules[i].Name]["all"] = "Only Prometheus rules can be cloned to machines"
errMsg["all"] = "Only Prometheus rule can be cloned to machines"
reterr[alertRules[i].Name] = errMsg
continue
}
if containsIdentOperator(alertRules[i].RuleConfig) {
errMsg["all"] = "promql is missing ident"
reterr[alertRules[i].Name] = errMsg
continue
}
@@ -562,7 +611,7 @@ func (rt *Router) cloneToMachine(c *gin.Context) {
newRule := &models.AlertRule{}
if err := copier.Copy(newRule, alertRules[i]); err != nil {
reterr[alertRules[i].Name][f.IdentList[j]] = fmt.Sprintf("fail to clone rule, err: %s", err)
errMsg[f.IdentList[j]] = fmt.Sprintf("fail to clone rule, err: %s", err)
continue
}
@@ -576,17 +625,21 @@ func (rt *Router) cloneToMachine(c *gin.Context) {
exist, err := models.AlertRuleExists(rt.Ctx, 0, newRule.GroupId, newRule.DatasourceIdsJson, newRule.Name)
if err != nil {
reterr[alertRules[i].Name][f.IdentList[j]] = err.Error()
errMsg[f.IdentList[j]] = err.Error()
continue
}
if exist {
reterr[alertRules[i].Name][f.IdentList[j]] = fmt.Sprintf("rule already exists, ruleName: %s", newRule.Name)
errMsg[f.IdentList[j]] = fmt.Sprintf("rule already exists, ruleName: %s", newRule.Name)
continue
}
newRules = append(newRules, newRule)
}
if len(errMsg) > 0 {
reterr[alertRules[i].Name] = errMsg
}
}
ginx.NewRender(c).Data(reterr, models.InsertAlertRule(rt.Ctx, newRules))

View File

@@ -94,6 +94,14 @@ func (rt *Router) boardGet(c *gin.Context) {
ginx.NewRender(c).Data(board, nil)
}
// 根据 bids 参数,获取多个 board
func (rt *Router) boardGetsByBids(c *gin.Context) {
bids := str.IdsInt64(ginx.QueryStr(c, "bids", ""), ",")
boards, err := models.BoardGetsByBids(rt.Ctx, bids)
ginx.Dangerous(err)
ginx.NewRender(c).Data(boards, err)
}
func (rt *Router) boardPureGet(c *gin.Context) {
board, err := models.BoardGetByID(rt.Ctx, ginx.UrlParamInt64(c, "bid"))
ginx.Dangerous(err)

View File

@@ -4,10 +4,15 @@ import (
"net/http"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"gorm.io/gorm"
)
const SYSTEM = "system"
func (rt *Router) builtinComponentsAdd(c *gin.Context) {
var lst []models.BuiltinComponent
ginx.BindJSON(c, &lst)
@@ -50,10 +55,31 @@ func (rt *Router) builtinComponentsPut(c *gin.Context) {
return
}
if bc.CreatedBy == SYSTEM {
req.Ident = bc.Ident
}
username := Username(c)
req.UpdatedBy = username
ginx.NewRender(c).Message(bc.Update(rt.Ctx, req))
err = models.DB(rt.Ctx).Transaction(func(tx *gorm.DB) error {
tCtx := &ctx.Context{
DB: tx,
}
txErr := models.BuiltinMetricBatchUpdateColumn(tCtx, "typ", bc.Ident, req.Ident, req.UpdatedBy)
if txErr != nil {
return txErr
}
txErr = bc.Update(tCtx, req)
if txErr != nil {
return txErr
}
return nil
})
ginx.NewRender(c).Message(err)
}
func (rt *Router) builtinComponentsDel(c *gin.Context) {

View File

@@ -6,6 +6,7 @@ import (
"strings"
"time"
"github.com/BurntSushi/toml"
"github.com/ccfos/nightingale/v6/models"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
@@ -52,15 +53,15 @@ func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
}
bp := models.BuiltinPayload{
Type: lst[i].Type,
Component: lst[i].Component,
Cate: lst[i].Cate,
Name: rule.Name,
Tags: rule.AppendTags,
UUID: rule.UUID,
Content: string(contentBytes),
CreatedBy: username,
UpdatedBy: username,
Type: lst[i].Type,
ComponentID: lst[i].ComponentID,
Cate: lst[i].Cate,
Name: rule.Name,
Tags: rule.AppendTags,
UUID: rule.UUID,
Content: string(contentBytes),
CreatedBy: username,
UpdatedBy: username,
}
if err := bp.Add(rt.Ctx, username); err != nil {
@@ -81,15 +82,15 @@ func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
}
bp := models.BuiltinPayload{
Type: lst[i].Type,
Component: lst[i].Component,
Cate: lst[i].Cate,
Name: alertRule.Name,
Tags: alertRule.AppendTags,
UUID: alertRule.UUID,
Content: lst[i].Content,
CreatedBy: username,
UpdatedBy: username,
Type: lst[i].Type,
ComponentID: lst[i].ComponentID,
Cate: lst[i].Cate,
Name: alertRule.Name,
Tags: alertRule.AppendTags,
UUID: alertRule.UUID,
Content: lst[i].Content,
CreatedBy: username,
UpdatedBy: username,
}
if err := bp.Add(rt.Ctx, username); err != nil {
@@ -115,15 +116,15 @@ func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
}
bp := models.BuiltinPayload{
Type: lst[i].Type,
Component: lst[i].Component,
Cate: lst[i].Cate,
Name: dashboard.Name,
Tags: dashboard.Tags,
UUID: dashboard.UUID,
Content: string(contentBytes),
CreatedBy: username,
UpdatedBy: username,
Type: lst[i].Type,
ComponentID: lst[i].ComponentID,
Cate: lst[i].Cate,
Name: dashboard.Name,
Tags: dashboard.Tags,
UUID: dashboard.UUID,
Content: string(contentBytes),
CreatedBy: username,
UpdatedBy: username,
}
if err := bp.Add(rt.Ctx, username); err != nil {
@@ -144,21 +145,29 @@ func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
}
bp := models.BuiltinPayload{
Type: lst[i].Type,
Component: lst[i].Component,
Cate: lst[i].Cate,
Name: dashboard.Name,
Tags: dashboard.Tags,
UUID: dashboard.UUID,
Content: lst[i].Content,
CreatedBy: username,
UpdatedBy: username,
Type: lst[i].Type,
ComponentID: lst[i].ComponentID,
Cate: lst[i].Cate,
Name: dashboard.Name,
Tags: dashboard.Tags,
UUID: dashboard.UUID,
Content: lst[i].Content,
CreatedBy: username,
UpdatedBy: username,
}
if err := bp.Add(rt.Ctx, username); err != nil {
reterr[bp.Name] = i18n.Sprintf(c.GetHeader("X-Language"), err.Error())
}
} else {
if lst[i].Type == "collect" {
c := make(map[string]interface{})
if _, err := toml.Decode(lst[i].Content, &c); err != nil {
reterr[lst[i].Name] = err.Error()
continue
}
}
if err := lst[i].Add(rt.Ctx, username); err != nil {
reterr[lst[i].Name] = i18n.Sprintf(c.GetHeader("X-Language"), err.Error())
}
@@ -171,19 +180,20 @@ func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
func (rt *Router) builtinPayloadsGets(c *gin.Context) {
typ := ginx.QueryStr(c, "type", "")
component := ginx.QueryStr(c, "component", "")
ComponentID := ginx.QueryInt64(c, "component_id", 0)
cate := ginx.QueryStr(c, "cate", "")
query := ginx.QueryStr(c, "query", "")
lst, err := models.BuiltinPayloadGets(rt.Ctx, typ, component, cate, query)
lst, err := models.BuiltinPayloadGets(rt.Ctx, uint64(ComponentID), typ, cate, query)
ginx.NewRender(c).Data(lst, err)
}
func (rt *Router) builtinPayloadcatesGet(c *gin.Context) {
typ := ginx.QueryStr(c, "type", "")
component := ginx.QueryStr(c, "component", "")
ComponentID := ginx.QueryInt64(c, "component_id", 0)
cates, err := models.BuiltinPayloadCates(rt.Ctx, typ, component)
cates, err := models.BuiltinPayloadCates(rt.Ctx, typ, uint64(ComponentID))
ginx.NewRender(c).Data(cates, err)
}
@@ -229,6 +239,11 @@ func (rt *Router) builtinPayloadsPut(c *gin.Context) {
req.Name = dashboard.Name
req.Tags = dashboard.Tags
} else if req.Type == "collect" {
c := make(map[string]interface{})
if _, err := toml.Decode(req.Content, &c); err != nil {
ginx.Bomb(http.StatusBadRequest, err.Error())
}
}
username := Username(c)
@@ -245,3 +260,15 @@ func (rt *Router) builtinPayloadsDel(c *gin.Context) {
ginx.NewRender(c).Message(models.BuiltinPayloadDels(rt.Ctx, req.Ids))
}
func (rt *Router) builtinPayloadsGetByUUIDOrID(c *gin.Context) {
uuid := ginx.QueryInt64(c, "uuid", 0)
// 优先以 uuid 为准
if uuid != 0 {
ginx.NewRender(c).Data(models.BuiltinPayloadGet(rt.Ctx, "uuid = ?", uuid))
return
}
id := ginx.QueryInt64(c, "id", 0)
ginx.NewRender(c).Data(models.BuiltinPayloadGet(rt.Ctx, "id = ?", id))
}

View File

@@ -24,6 +24,11 @@ func (rt *Router) configGet(c *gin.Context) {
ginx.NewRender(c).Data(configs, err)
}
func (rt *Router) configGetAll(c *gin.Context) {
config, err := models.ConfigsGetAll(rt.Ctx)
ginx.NewRender(c).Data(config, err)
}
func (rt *Router) configGetByKey(c *gin.Context) {
config, err := models.ConfigsGet(rt.Ctx, ginx.QueryStr(c, "key"))
ginx.NewRender(c).Data(config, err)

View File

@@ -92,10 +92,12 @@ func (rt *Router) datasourceUpsert(c *gin.Context) {
var err error
var count int64
err = DatasourceCheck(req)
if err != nil {
Dangerous(c, err)
return
if !req.ForceSave {
err = DatasourceCheck(req)
if err != nil {
Dangerous(c, err)
return
}
}
if req.Id == 0 {

View File

@@ -45,6 +45,10 @@ func (rt *Router) statistic(c *gin.Context) {
statistics, err = models.ConfigsUserVariableStatistics(rt.Ctx)
ginx.NewRender(c).Data(statistics, err)
return
case "cval":
statistics, err = models.ConfigCvalStatistics(rt.Ctx)
ginx.NewRender(c).Data(statistics, err)
return
default:
ginx.Bomb(http.StatusBadRequest, "invalid name")
}
@@ -65,6 +69,23 @@ func queryDatasourceIds(c *gin.Context) []int64 {
return ids
}
func queryStrListField(c *gin.Context, fieldName string, sep ...string) []string {
str := ginx.QueryStr(c, fieldName, "")
if str == "" {
return nil
}
lst := []string{str}
for _, s := range sep {
var newLst []string
for _, str := range lst {
newLst = append(newLst, strings.Split(str, s)...)
}
lst = newLst
}
return lst
}
type idsForm struct {
Ids []int64 `json:"ids"`
IsSyncToFlashDuty bool `json:"is_sync_to_flashduty"`

View File

@@ -6,6 +6,7 @@ import (
"errors"
"io/ioutil"
"sort"
"strconv"
"strings"
"time"
@@ -80,16 +81,48 @@ func HandleHeartbeat(c *gin.Context, ctx *ctx.Context, engineName string, metaSe
identSet.MSet(items)
if target, has := targetCache.Get(req.Hostname); has && target != nil {
gid := ginx.QueryInt64(c, "gid", 0)
gidsStr := ginx.QueryStr(c, "gid", "")
overwriteGids := ginx.QueryBool(c, "overwrite_gids", false)
hostIp := strings.TrimSpace(req.HostIp)
gids := strings.Split(gidsStr, ",")
if overwriteGids {
groupIds := make([]int64, 0)
for i := range gids {
if gids[i] == "" {
continue
}
groupId, err := strconv.ParseInt(gids[i], 10, 64)
if err != nil {
logger.Warningf("update target:%s group ids failed, err: %v", req.Hostname, err)
continue
}
groupIds = append(groupIds, groupId)
}
err := models.TargetOverrideBgids(ctx, []string{target.Ident}, groupIds)
if err != nil {
logger.Warningf("update target:%s group ids failed, err: %v", target.Ident, err)
}
} else if gidsStr != "" {
for i := range gids {
groupId, err := strconv.ParseInt(gids[i], 10, 64)
if err != nil {
logger.Warningf("update target:%s group ids failed, err: %v", req.Hostname, err)
continue
}
if !target.MatchGroupId(groupId) {
err := models.TargetBindBgids(ctx, []string{target.Ident}, []int64{groupId})
if err != nil {
logger.Warningf("update target:%s group ids failed, err: %v", target.Ident, err)
}
}
}
}
newTarget := models.Target{}
targetNeedUpdate := false
if gid != 0 && gid != target.GroupId {
newTarget.GroupId = gid
targetNeedUpdate = true
}
if hostIp != "" && hostIp != target.HostIp {
newTarget.HostIp = hostIp
targetNeedUpdate = true

View File

@@ -50,7 +50,8 @@ func (rt *Router) alertMuteGets(c *gin.Context) {
prods := strings.Fields(ginx.QueryStr(c, "prods", ""))
bgid := ginx.QueryInt64(c, "bgid", -1)
query := ginx.QueryStr(c, "query", "")
lst, err := models.AlertMuteGets(rt.Ctx, prods, bgid, query)
disabled := ginx.QueryInt(c, "disabled", -1)
lst, err := models.AlertMuteGets(rt.Ctx, prods, bgid, disabled, query)
ginx.NewRender(c).Data(lst, err)
}

View File

@@ -138,7 +138,7 @@ func (rt *Router) notifyTplPreview(c *gin.Context) {
continue
}
arr := strings.Split(pair, "=")
arr := strings.SplitN(pair, "=", 2)
if len(arr) != 2 {
continue
}

View File

@@ -52,6 +52,8 @@ func (rt *Router) targetGets(c *gin.Context) {
order := ginx.QueryStr(c, "order", "ident")
desc := ginx.QueryBool(c, "desc", false)
hosts := queryStrListField(c, "hosts", ",", " ", "\n")
var err error
if len(bgids) == 0 {
user := c.MustGet("user").(*models.User)
@@ -65,11 +67,13 @@ func (rt *Router) targetGets(c *gin.Context) {
bgids = append(bgids, 0)
}
}
options := []models.BuildTargetWhereOption{
models.BuildTargetWhereWithBgids(bgids),
models.BuildTargetWhereWithDsIds(dsIds),
models.BuildTargetWhereWithQuery(query),
models.BuildTargetWhereWithDowntime(downtime),
models.BuildTargetWhereWithHosts(hosts),
}
total, err := models.TargetTotal(rt.Ctx, options...)
ginx.Dangerous(err)
@@ -78,6 +82,13 @@ func (rt *Router) targetGets(c *gin.Context) {
ginx.Offset(c, limit), order, desc, options...)
ginx.Dangerous(err)
tgs, err := models.TargetBusiGroupsGetAll(rt.Ctx)
ginx.Dangerous(err)
for _, t := range list {
t.GroupIds = tgs[t.Ident]
}
if err == nil {
now := time.Now()
cache := make(map[int64]*models.BusiGroup)
@@ -382,8 +393,15 @@ type targetBgidForm struct {
Bgid int64 `json:"bgid"`
}
func (rt *Router) targetUpdateBgid(c *gin.Context) {
var f targetBgidForm
type targetBgidsForm struct {
Idents []string `json:"idents" binding:"required_without=HostIps"`
HostIps []string `json:"host_ips" binding:"required_without=Idents"`
Bgids []int64 `json:"bgids"`
Action string `json:"action"` // add del reset
}
func (rt *Router) targetBindBgids(c *gin.Context) {
var f targetBgidsForm
var err error
var failedResults = make(map[string]string)
ginx.BindJSON(c, &f)
@@ -399,35 +417,24 @@ func (rt *Router) targetUpdateBgid(c *gin.Context) {
}
user := c.MustGet("user").(*models.User)
if user.IsAdmin() {
ginx.NewRender(c).Data(failedResults, models.TargetUpdateBgid(rt.Ctx, f.Idents, f.Bgid, false))
return
}
if f.Bgid > 0 {
// 把要操作的机器分成两部分一部分是bgid为0需要管理员分配另一部分bgid>0说明是业务组内部想调整
// 比如原来分配给didiyun的机器didiyun的管理员想把部分机器调整到didiyun-ceph下
// 对于调整的这种情况当前登录用户要对这批机器有操作权限同时还要对目标BG有操作权限
orphans, err := models.IdentsFilter(rt.Ctx, f.Idents, "group_id = ?", 0)
if !user.IsAdmin() {
// 普通用户,检查用户是否有权限操作所有请求的业务组
existing, _, err := models.SeparateTargetIdents(rt.Ctx, f.Idents)
ginx.Dangerous(err)
rt.checkTargetPerm(c, existing)
// 机器里边存在未归组的登录用户就需要是admin
if len(orphans) > 0 && !user.IsAdmin() {
can, err := user.CheckPerm(rt.Ctx, "/targets/bind")
var groupIds []int64
if f.Action == "reset" {
// 如果是复写,则需要检查用户是否有权限操作机器之前的业务组
bgids, err := models.TargetGroupIdsGetByIdents(rt.Ctx, f.Idents)
ginx.Dangerous(err)
if !can {
ginx.Bomb(http.StatusForbidden, "No permission. Only admin can assign BG")
}
groupIds = append(groupIds, bgids...)
}
groupIds = append(groupIds, f.Bgids...)
reBelongs, err := models.IdentsFilter(rt.Ctx, f.Idents, "group_id > ?", 0)
ginx.Dangerous(err)
if len(reBelongs) > 0 {
// 对于这些要重新分配的机器操作者要对这些机器本身有权限同时要对目标bgid有权限
rt.checkTargetPerm(c, f.Idents)
bg := BusiGroup(rt.Ctx, f.Bgid)
for _, bgid := range groupIds {
bg := BusiGroup(rt.Ctx, bgid)
can, err := user.CanDoBusiGroup(rt.Ctx, bg, "rw")
ginx.Dangerous(err)
@@ -435,14 +442,24 @@ func (rt *Router) targetUpdateBgid(c *gin.Context) {
ginx.Bomb(http.StatusForbidden, "No permission. You are not admin of BG(%s)", bg.Name)
}
}
} else if f.Bgid == 0 {
// 退还机器
rt.checkTargetPerm(c, f.Idents)
} else {
ginx.Bomb(http.StatusBadRequest, "invalid bgid")
can, err := user.CheckPerm(rt.Ctx, "/targets/bind")
ginx.Dangerous(err)
if !can {
ginx.Bomb(http.StatusForbidden, "No permission. Only admin can assign BG")
}
}
ginx.NewRender(c).Data(failedResults, models.TargetUpdateBgid(rt.Ctx, f.Idents, f.Bgid, false))
switch f.Action {
case "add":
ginx.NewRender(c).Data(failedResults, models.TargetBindBgids(rt.Ctx, f.Idents, f.Bgids))
case "del":
ginx.NewRender(c).Data(failedResults, models.TargetUnbindBgids(rt.Ctx, f.Idents, f.Bgids))
case "reset":
ginx.NewRender(c).Data(failedResults, models.TargetOverrideBgids(rt.Ctx, f.Idents, f.Bgids))
default:
ginx.Bomb(http.StatusBadRequest, "invalid action")
}
}
func (rt *Router) targetUpdateBgidByService(c *gin.Context) {
@@ -461,7 +478,7 @@ func (rt *Router) targetUpdateBgidByService(c *gin.Context) {
ginx.Bomb(http.StatusBadRequest, err.Error())
}
ginx.NewRender(c).Data(failedResults, models.TargetUpdateBgid(rt.Ctx, f.Idents, f.Bgid, false))
ginx.NewRender(c).Data(failedResults, models.TargetOverrideBgids(rt.Ctx, f.Idents, []int64{f.Bgid}))
}
type identsForm struct {
@@ -485,7 +502,7 @@ func (rt *Router) targetDel(c *gin.Context) {
ginx.Bomb(http.StatusBadRequest, err.Error())
}
ginx.NewRender(c).Data(failedResults, models.TargetDel(rt.Ctx, f.Idents))
ginx.NewRender(c).Data(failedResults, models.TargetDel(rt.Ctx, f.Idents, rt.TargetDeleteHook))
}
func (rt *Router) targetDelByService(c *gin.Context) {
@@ -504,7 +521,7 @@ func (rt *Router) targetDelByService(c *gin.Context) {
ginx.Bomb(http.StatusBadRequest, err.Error())
}
ginx.NewRender(c).Data(failedResults, models.TargetDel(rt.Ctx, f.Idents))
ginx.NewRender(c).Data(failedResults, models.TargetDel(rt.Ctx, f.Idents, rt.TargetDeleteHook))
}
func (rt *Router) checkTargetPerm(c *gin.Context, idents []string) {

View File

@@ -126,7 +126,7 @@ func (rt *Router) taskAdd(c *gin.Context) {
rt.checkTargetPerm(c, f.Hosts)
// call ibex
taskId, err := sender.TaskAdd(rt.Ctx, f, user.Username, rt.Ctx.IsCenter)
taskId, err := sender.TaskAdd(f, user.Username, rt.Ctx.IsCenter)
ginx.Dangerous(err)
if taskId <= 0 {

View File

@@ -82,7 +82,7 @@ func (rt *Router) QueryData(c *gin.Context) {
var err error
tdClient := rt.TdendgineClients.GetCli(f.DatasourceId)
for _, q := range f.Querys {
datas, err := tdClient.Query(q)
datas, err := tdClient.Query(q, 0)
ginx.Dangerous(err)
resp = append(resp, datas...)
}

View File

@@ -18,7 +18,7 @@ func Upgrade(configFile string) error {
return err
}
ctx := ctx.NewContext(context.Background(), db, nil, true)
ctx := ctx.NewContext(context.Background(), db, true)
for _, cluster := range config.Clusters {
count, err := models.GetDatasourcesCountByName(ctx, cluster.Name)
if err != nil {

View File

@@ -12,7 +12,6 @@ import (
"github.com/ccfos/nightingale/v6/center/metas"
"github.com/ccfos/nightingale/v6/conf"
"github.com/ccfos/nightingale/v6/dumper"
"github.com/ccfos/nightingale/v6/ibex"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/httpx"
@@ -23,6 +22,8 @@ import (
"github.com/ccfos/nightingale/v6/pushgw/writer"
"github.com/ccfos/nightingale/v6/storage"
"github.com/ccfos/nightingale/v6/tdengine"
"github.com/flashcatcloud/ibex/src/cmd/ibex"
)
func Initialize(configDir string, cryptoKey string) (func(), error) {
@@ -39,6 +40,7 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
if len(config.CenterApi.Addrs) < 1 {
return nil, errors.New("failed to init config: the CenterApi configuration is missing")
}
ctx := ctx.NewContext(context.Background(), nil, false, config.CenterApi)
var redis storage.Redis
redis, err = storage.NewRedis(config.Redis)
@@ -46,17 +48,17 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
return nil, err
}
ctx := ctx.NewContext(context.Background(), nil, redis, false, config.CenterApi)
syncStats := memsto.NewSyncStats()
targetCache := memsto.NewTargetCache(ctx, syncStats, redis)
busiGroupCache := memsto.NewBusiGroupCache(ctx, syncStats)
configCvalCache := memsto.NewCvalCache(ctx, syncStats)
idents := idents.New(ctx, redis)
metas := metas.New(redis)
writers := writer.NewWriters(config.Pushgw)
pushgwRouter := pushgwrt.New(config.HTTP, config.Pushgw, config.Alert, targetCache, busiGroupCache, idents, metas, writers, ctx)
r := httpx.GinEngine(config.Global.RunMode, config.HTTP)
r := httpx.GinEngine(config.Global.RunMode, config.HTTP, configCvalCache.PrintBodyPaths, configCvalCache.PrintAccessLog)
pushgwRouter.Config(r)
if !config.Alert.Disable {
@@ -82,12 +84,12 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
alertrtRouter.Config(r)
if config.Ibex.Enable {
ibex.ServerStart(ctx, false, nil, redis, config.HTTP.APIForService.BasicAuth, config.Alert.Heartbeat, &config.CenterApi, r, nil, config.Ibex, config.HTTP.Port)
ibex.ServerStart(false, nil, redis, config.HTTP.APIForService.BasicAuth, config.Alert.Heartbeat, &config.CenterApi, r, nil, config.Ibex, config.HTTP.Port)
}
}
dumper.ConfigRouter(r)
httpClean := httpx.Init(config.HTTP, context.Background(), r)
httpClean := httpx.Init(config.HTTP, r)
return func() {
logxClean()

View File

@@ -1,119 +0,0 @@
package main
import (
"fmt"
"os"
"github.com/ccfos/nightingale/v6/ibex/agentd"
"github.com/ccfos/nightingale/v6/ibex/server"
"github.com/toolkits/pkg/net/tcpx"
"github.com/toolkits/pkg/runner"
"github.com/urfave/cli/v2"
)
// VERSION go build -ldflags "-X main.VERSION=x.x.x"
var VERSION = "not specified"
func main() {
app := cli.NewApp()
app.Name = "ibex"
app.Version = VERSION
app.Usage = "Ibex, running scripts on large scale machines"
app.Commands = []*cli.Command{
newCenterServerCmd(),
newEdgeServerCmd(),
newAgentdCmd(),
}
app.Run(os.Args)
}
func newCenterServerCmd() *cli.Command {
return &cli.Command{
Name: "server",
Usage: "Run server",
Flags: []cli.Flag{
&cli.StringFlag{
Name: "conf",
Aliases: []string{"c"},
Usage: "specify configuration file(.json,.yaml,.toml)",
},
},
Action: func(c *cli.Context) error {
printEnv()
tcpx.WaitHosts()
var opts []server.ServerOption
if c.String("conf") != "" {
opts = append(opts, server.SetConfigFile(c.String("conf")))
}
opts = append(opts, server.SetVersion(VERSION))
server.Run(true, opts...)
return nil
},
}
}
func newEdgeServerCmd() *cli.Command {
return &cli.Command{
Name: "edge server",
Usage: "Run edge server",
Flags: []cli.Flag{
&cli.StringFlag{
Name: "conf",
Aliases: []string{"c"},
Usage: "specify configuration file(.json,.yaml,.toml)",
},
},
Action: func(c *cli.Context) error {
printEnv()
tcpx.WaitHosts()
var opts []server.ServerOption
if c.String("conf") != "" {
opts = append(opts, server.SetConfigFile(c.String("conf")))
}
opts = append(opts, server.SetVersion(VERSION))
server.Run(false, opts...)
return nil
},
}
}
func newAgentdCmd() *cli.Command {
return &cli.Command{
Name: "agentd",
Usage: "Run agentd",
Flags: []cli.Flag{
&cli.StringFlag{
Name: "conf",
Aliases: []string{"c"},
Usage: "specify configuration file(.json,.yaml,.toml)",
},
},
Action: func(c *cli.Context) error {
printEnv()
var opts []agentd.AgentdOption
if c.String("conf") != "" {
opts = append(opts, agentd.SetConfigFile(c.String("conf")))
}
opts = append(opts, agentd.SetVersion(VERSION))
agentd.Run(opts...)
return nil
},
}
}
func printEnv() {
runner.Init()
fmt.Println("runner.cwd:", runner.Cwd)
fmt.Println("runner.hostname:", runner.Hostname)
fmt.Println("runner.fd_limits:", runner.FdLimits())
fmt.Println("runner.vm_limits:", runner.VMLimits())
}

View File

@@ -561,7 +561,7 @@ CREATE TABLE alert_cur_event (
target_note varchar(191) not null default '' ,
first_trigger_time bigint,
trigger_time bigint not null,
trigger_value varchar(255) not null,
trigger_value varchar(2048) not null,
annotations text not null ,
rule_config text not null ,
tags varchar(1024) not null default '' ,
@@ -621,7 +621,7 @@ CREATE TABLE alert_his_event (
target_note varchar(191) not null default '' ,
first_trigger_time bigint,
trigger_time bigint not null,
trigger_value varchar(255) not null,
trigger_value varchar(2048) not null,
recover_time bigint not null default 0,
last_eval_time bigint not null default 0 ,
tags varchar(1024) not null default '' ,

View File

@@ -363,6 +363,7 @@ CREATE TABLE `target` (
`ident` varchar(191) not null comment 'target id',
`note` varchar(255) not null default '' comment 'append to alert event as field',
`tags` varchar(512) not null default '' comment 'append to series data as tags, split by space, append external space at suffix',
`host_tags` varchar(512) not null default '' comment 'append to series data as tags, split by space, append external space at suffix',
`host_ip` varchar(15) default '' COMMENT 'IPv4 string',
`agent_version` varchar(255) default '' COMMENT 'agent version',
`engine_name` varchar(255) default '' COMMENT 'engine_name',
@@ -453,7 +454,7 @@ CREATE TABLE `alert_cur_event` (
`target_note` varchar(191) not null default '' comment 'target note',
`first_trigger_time` bigint,
`trigger_time` bigint not null,
`trigger_value` varchar(255) not null,
`trigger_value` text not null,
`annotations` text not null comment 'annotations',
`rule_config` text not null comment 'annotations',
`tags` varchar(1024) not null default '' comment 'merge data_tags rule_tags, split by ,,',
@@ -493,7 +494,7 @@ CREATE TABLE `alert_his_event` (
`target_note` varchar(191) not null default '' comment 'target note',
`first_trigger_time` bigint,
`trigger_time` bigint not null,
`trigger_value` varchar(255) not null,
`trigger_value` text not null,
`recover_time` bigint not null default 0,
`last_eval_time` bigint not null default 0 comment 'for time filter',
`tags` varchar(1024) not null default '' comment 'merge data_tags rule_tags, split by ,,',
@@ -528,6 +529,7 @@ CREATE TABLE `builtin_components` (
CREATE TABLE `builtin_payloads` (
`id` bigint(20) NOT NULL AUTO_INCREMENT COMMENT '''unique identifier''',
`component_id` bigint(20) NOT NULL DEFAULT 0 COMMENT 'component_id',
`uuid` bigint(20) NOT NULL COMMENT '''uuid of payload''',
`type` varchar(191) NOT NULL COMMENT '''type of payload''',
`component` varchar(191) NOT NULL COMMENT '''component of payload''',
@@ -724,6 +726,15 @@ CREATE TABLE `metric_filter` (
KEY `idx_name` (`name`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
CREATE TABLE `target_busi_group` (
`id` bigint NOT NULL AUTO_INCREMENT,
`target_ident` varchar(191) NOT NULL,
`group_id` bigint NOT NULL,
`update_at` bigint NOT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `idx_target_group` (`target_ident`,`group_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
CREATE TABLE `task_meta`
(
`id` bigint unsigned NOT NULL AUTO_INCREMENT,

View File

@@ -103,4 +103,17 @@ CREATE TABLE notification_record (
/* v7.3.0 2024-08-26 */
ALTER TABLE `target` ADD COLUMN `host_tags` TEXT COMMENT 'global labels set in conf file';
ALTER TABLE `target` ADD COLUMN `host_tags` TEXT COMMENT 'global labels set in conf file';
/* v7.3.4 2024-08-28 */
ALTER TABLE `builtin_payloads` ADD COLUMN `component_id` bigint(20) NOT NULL DEFAULT 0 COMMENT 'component_id';
/* v7.4.0 2024-09-20 */
CREATE TABLE `target_busi_group` (
`id` bigint NOT NULL AUTO_INCREMENT,
`target_ident` varchar(191) NOT NULL,
`group_id` bigint NOT NULL,
`update_at` bigint NOT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `idx_target_group` (`target_ident`,`group_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;

View File

@@ -389,7 +389,7 @@ CREATE TABLE `alert_cur_event` (
`target_note` varchar(191) not null default '',
`first_trigger_time` bigint,
`trigger_time` bigint not null,
`trigger_value` varchar(255) not null,
`trigger_value` varchar(2048) not null,
`annotations` text not null,
`rule_config` text not null,
`tags` varchar(1024) not null default ''
@@ -427,7 +427,7 @@ CREATE TABLE `alert_his_event` (
`target_note` varchar(191) not null default '',
`first_trigger_time` bigint,
`trigger_time` bigint not null,
`trigger_value` varchar(255) not null,
`trigger_value` varchar(2048) not null,
`recover_time` bigint not null default 0,
`last_eval_time` bigint not null default 0,
`tags` varchar(1024) not null default '',

View File

@@ -1,38 +0,0 @@
# debug, release
RunMode = "debug"
# task meta storage dir
MetaDir = "./meta"
[HTTP]
Enable = true
# http listening address
Host = "0.0.0.0"
# http listening port
Port = 2090
# https cert file path
CertFile = ""
# https key file path
KeyFile = ""
# whether print access log
PrintAccessLog = true
# whether enable pprof
PProf = false
# http graceful shutdown timeout, unit: s
ShutdownTimeout = 30
# max content length: 64M
MaxContentLength = 67108864
# http server read timeout, unit: s
ReadTimeout = 20
# http server write timeout, unit: s
WriteTimeout = 40
# http server idle timeout, unit: s
IdleTimeout = 120
[Heartbeat]
# unit: ms
Interval = 1000
# rpc servers
Servers = ["127.0.0.1:20090"]
# $ip or $hostname or specified string
Host = "$hostname"

View File

@@ -1,20 +0,0 @@
[Unit]
Description="ibex-agentd"
After=network.target
[Service]
Type=simple
ExecStart=/root/gopath/ibex/ibex agentd
WorkingDirectory=/root/gopath/ibex
Restart=on-failure
SuccessExitStatus=0
LimitNOFILE=65536
StandardOutput=syslog
StandardError=syslog
SyslogIdentifier=ibex-agentd
[Install]
WantedBy=multi-user.target

View File

@@ -1,20 +0,0 @@
[Unit]
Description="ibex-server"
After=network.target
[Service]
Type=simple
ExecStart=/root/gopath/ibex/ibex server
WorkingDirectory=/root/gopath/ibex
Restart=on-failure
SuccessExitStatus=0
LimitNOFILE=65536
StandardOutput=syslog
StandardError=syslog
SyslogIdentifier=ibex-server
[Install]
WantedBy=multi-user.target

View File

@@ -1,86 +0,0 @@
# debug, release
RunMode = "debug"
[Log]
# log write dir
Dir = "logs-server"
# log level: DEBUG INFO WARNING ERROR
Level = "DEBUG"
# stdout, stderr, file
Output = "stdout"
# # rotate by time
# KeepHours: 4
# # rotate by size
# RotateNum = 3
# # unit: MB
# RotateSize = 256
[HTTP]
Enable = true
# http listening address
Host = "0.0.0.0"
# http listening port
Port = 10090
# https cert file path
CertFile = ""
# https key file path
KeyFile = ""
# whether print access log
PrintAccessLog = true
# whether enable pprof
PProf = false
# http graceful shutdown timeout, unit: s
ShutdownTimeout = 30
# max content length: 64M
MaxContentLength = 67108864
# http server read timeout, unit: s
ReadTimeout = 20
# http server write timeout, unit: s
WriteTimeout = 40
# http server idle timeout, unit: s
IdleTimeout = 120
[BasicAuth]
# using when call apis
ibex = "ibex"
[RPC]
Listen = "0.0.0.0:20090"
[Heartbeat]
# auto detect if blank
IP = ""
# unit: ms
Interval = 1000
[Output]
# database | remote
ComeFrom = "database"
AgtdPort = 2090
[DB]
# postgres: host=%s port=%s user=%s dbname=%s password=%s sslmode=%s
# postgres: DSN="host=127.0.0.1 port=5432 user=root dbname=n9e_v6 password=1234 sslmode=disable"
DSN="root:1234@tcp(127.0.0.1:3306)/ibex?charset=utf8mb4&parseTime=True&loc=Local&allowNativePasswords=true"
# enable debug mode or not
Debug = false
# mysql postgres
DBType = "mysql"
# unit: s
MaxLifetime = 7200
# max open connections
MaxOpenConns = 150
# max idle connections
MaxIdleConns = 50
# table prefix
TablePrefix = ""
[Redis]
# address, ip:port or ip1:port,ip2:port for cluster and sentinel(SentinelAddrs)
Address = "127.0.0.1:6379"
# Username = ""
# Password = ""
# DB = 0
# UseTLS = false
# TLSMinVersion = "1.2"
# standalone cluster sentinel

14
go.mod
View File

@@ -3,11 +3,12 @@ module github.com/ccfos/nightingale/v6
go 1.18
require (
github.com/BurntSushi/toml v1.3.2
github.com/BurntSushi/toml v0.3.1
github.com/coreos/go-oidc v2.2.1+incompatible
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc
github.com/dgrijalva/jwt-go v3.2.0+incompatible
github.com/expr-lang/expr v1.16.1
github.com/flashcatcloud/ibex v1.3.5
github.com/gin-contrib/pprof v1.4.0
github.com/gin-gonic/gin v1.9.1
github.com/go-ldap/ldap/v3 v3.4.4
@@ -32,8 +33,7 @@ require (
github.com/redis/go-redis/v9 v9.0.2
github.com/spaolacci/murmur3 v1.1.0
github.com/tidwall/gjson v1.14.0
github.com/toolkits/pkg v1.3.6
github.com/urfave/cli/v2 v2.27.4
github.com/toolkits/pkg v1.3.8
golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1
golang.org/x/oauth2 v0.10.0
gopkg.in/gomail.v2 v2.0.0-20160411212932-81ebce5c23df
@@ -44,12 +44,6 @@ require (
gorm.io/gorm v1.25.7-0.20240204074919-46816ad31dde
)
require (
github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect
)
require (
github.com/Azure/go-ntlmssp v0.0.0-20220621081337-cb9428e4ac1e // indirect
github.com/beorn7/perks v1.0.1 // indirect
@@ -96,7 +90,7 @@ require (
github.com/tidwall/match v1.1.1 // indirect
github.com/tidwall/pretty v1.2.0 // indirect
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
github.com/ugorji/go/codec v1.2.11
github.com/ugorji/go/codec v1.2.11 // indirect
go.uber.org/atomic v1.11.0 // indirect
go.uber.org/automaxprocs v1.5.2 // indirect
golang.org/x/arch v0.3.0 // indirect

17
go.sum
View File

@@ -5,9 +5,8 @@ github.com/Azure/azure-sdk-for-go/sdk/internal v1.3.0 h1:sXr+ck84g/ZlZUOZiNELInm
github.com/Azure/go-ntlmssp v0.0.0-20220621081337-cb9428e4ac1e h1:NeAW1fUYUEWhft7pkxDf6WoUvEZJ/uOKsvtpjLnn8MU=
github.com/Azure/go-ntlmssp v0.0.0-20220621081337-cb9428e4ac1e/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU=
github.com/AzureAD/microsoft-authentication-library-for-go v1.0.0 h1:OBhqkivkhkMqLPymWEppkm7vgPQY2XsHoEkaMQ0AdZY=
github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/toml v1.3.2 h1:o7IhLm0Msx3BaB+n3Ag7L8EVlByGnpq14C4YWiu/gL8=
github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
github.com/Masterminds/semver/v3 v3.1.1 h1:hLg3sBzpNErnxhQtUy/mmLR2I9foDujNK030IGemrRc=
github.com/Masterminds/semver/v3 v3.1.1/go.mod h1:VPu/7SZ7ePZ3QOrcuXROw5FAcLl4a0cBrbBpGY/8hQs=
github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137 h1:s6gZFSlWYmbqAuRjVTiNNhvNRfY2Wxp9nhfyel4rklc=
@@ -30,8 +29,6 @@ github.com/coreos/go-oidc v2.2.1+incompatible h1:mh48q/BqXqgjVHpy2ZY7WnWAbenxRjs
github.com/coreos/go-oidc v2.2.1+incompatible/go.mod h1:CgnwVTmzoESiwO9qyAFEMiHoZ1nMCKZlZ9V6mm3/LKc=
github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
github.com/cpuguy83/go-md2man/v2 v2.0.4 h1:wfIWP927BUkWJb2NmU/kNDYIBTh/ziUX91+lVfRxZq4=
github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@@ -50,6 +47,8 @@ github.com/fatih/camelcase v1.0.0 h1:hxNvNX/xYBp0ovncs8WyWZrOrpBNub/JfaMvbURyft8
github.com/fatih/camelcase v1.0.0/go.mod h1:yN2Sb0lFhZJUdVvtELVWefmrXpuZESvPmqwoZc+/fpc=
github.com/fatih/structs v1.1.0 h1:Q7juDM0QtcnhCpeyLGQKyg4TOIghuNXrkL32pHAUMxo=
github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M=
github.com/flashcatcloud/ibex v1.3.5 h1:8GOOf5+aJT0TP/MC6izz7CO5JKJSdKVFBwL0vQp93Nc=
github.com/flashcatcloud/ibex v1.3.5/go.mod h1:T8hbMUySK2q6cXUaYp0AUVeKkU9Od2LjzwmB5lmTRBM=
github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU=
github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA=
github.com/garyburd/redigo v1.6.2/go.mod h1:NR3MbYisc3/PwhQ00EMzDiPmrwpPxAn5GI05/YaO1SY=
@@ -263,8 +262,6 @@ github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjR
github.com/rs/xid v1.2.1/go.mod h1:+uKXf+4Djp6Md1KODXJxgGQPKngRmWyn10oCKFzNHOQ=
github.com/rs/zerolog v1.13.0/go.mod h1:YbFCdg8HfsridGWAh22vktObvhZbQsZXe4/zB0OKkWU=
github.com/rs/zerolog v1.15.0/go.mod h1:xYTKnLHcpfU2225ny5qZjxnj9NvkumZYjJHlAThCjNc=
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0=
github.com/shopspring/decimal v0.0.0-20180709203117-cd690d0c9e24/go.mod h1:M+9NzErvs504Cn4c5DxATwIqPbtswREoFCre64PpcG4=
github.com/shopspring/decimal v1.2.0 h1:abSATXmQEYyShuxI4/vyW3tV1MrKAJzCZ/0zLUXYbsQ=
@@ -297,18 +294,14 @@ github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs=
github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
github.com/toolkits/pkg v1.3.6 h1:47e1amsY6mJmcnF3Y2lIpkJXfoYY2RmgI09PtwdAEMU=
github.com/toolkits/pkg v1.3.6/go.mod h1:M9ecwFGW1vxCTUFM9sr2ZjXSKb04N+1sTQ6SA3RNAIU=
github.com/toolkits/pkg v1.3.8 h1:2yamC20c5mHRtbcGiLY99Lm/2mVitFn6onE8KKvMT1o=
github.com/toolkits/pkg v1.3.8/go.mod h1:M9ecwFGW1vxCTUFM9sr2ZjXSKb04N+1sTQ6SA3RNAIU=
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
github.com/ugorji/go v1.2.7/go.mod h1:nF9osbDWLy6bDVv/Rtoh6QgnvNDpmCalQV5urGCCS6M=
github.com/ugorji/go/codec v1.2.7/go.mod h1:WGN1fab3R1fzQlVQTkfxVtIBhWDRqOviHU95kRgeqEY=
github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU=
github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
github.com/urfave/cli/v2 v2.27.4 h1:o1owoI+02Eb+K107p27wEX9Bb8eqIoZCfLXloLUSWJ8=
github.com/urfave/cli/v2 v2.27.4/go.mod h1:m4QzxcD2qpra4z7WhzEGn74WZLViBnMpb1ToCAKdGRQ=
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4=
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=

View File

@@ -1,117 +0,0 @@
package agentd
import (
"context"
"fmt"
"log"
"os"
"os/signal"
"path/filepath"
"syscall"
"github.com/toolkits/pkg/i18n"
"github.com/ccfos/nightingale/v6/ibex/agentd/config"
"github.com/ccfos/nightingale/v6/ibex/agentd/router"
"github.com/ccfos/nightingale/v6/ibex/agentd/timer"
"github.com/ccfos/nightingale/v6/pkg/httpx"
)
type Agentd struct {
ConfigFile string
Version string
}
type AgentdOption func(*Agentd)
func SetConfigFile(f string) AgentdOption {
return func(s *Agentd) {
s.ConfigFile = f
}
}
func SetVersion(v string) AgentdOption {
return func(s *Agentd) {
s.Version = v
}
}
// Run run agentd
func Run(opts ...AgentdOption) {
code := 1
sc := make(chan os.Signal, 1)
signal.Notify(sc, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT)
agentd := Agentd{
ConfigFile: filepath.Join("etc", "ibex", "agentd.toml"),
Version: "not specified",
}
for _, opt := range opts {
opt(&agentd)
}
cleanFunc, err := agentd.initialize()
if err != nil {
fmt.Println("agentd init fail:", err)
os.Exit(code)
}
EXIT:
for {
sig := <-sc
fmt.Println("received signal:", sig.String())
switch sig {
case syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGINT:
code = 0
break EXIT
case syscall.SIGHUP:
// reload configuration?
default:
break EXIT
}
}
cleanFunc()
fmt.Println("agentd exited")
os.Exit(code)
}
func (s Agentd) initialize() (func(), error) {
fns := Functions{}
ctx, cancel := context.WithCancel(context.Background())
fns.Add(cancel)
log.SetFlags(log.Ldate | log.Ltime | log.Lshortfile)
// parse config file
config.MustLoad(s.ConfigFile)
// init i18n
i18n.Init()
// init http server
r := router.New(s.Version)
httpClean := httpx.Init(config.C.HTTP, ctx, r)
fns.Add(httpClean)
go timer.Heartbeat(ctx)
return fns.Ret(), nil
}
type Functions struct {
List []func()
}
func (fs *Functions) Add(f func()) {
fs.List = append(fs.List, f)
}
func (fs *Functions) Ret() func() {
return func() {
for i := 0; i < len(fs.List); i++ {
fs.List[i]()
}
}
}

View File

@@ -1,110 +0,0 @@
package client
import (
"bufio"
"io"
"log"
"net"
"net/rpc"
"reflect"
"time"
"github.com/toolkits/pkg/net/gobrpc"
"github.com/ugorji/go/codec"
"github.com/ccfos/nightingale/v6/ibex/agentd/config"
)
var cli *gobrpc.RPCClient
func getCli() *gobrpc.RPCClient {
if cli != nil {
return cli
}
// detect the fastest server
var (
address string
client *rpc.Client
duration int64 = 999999999999
)
// auto close other slow server
acm := make(map[string]*rpc.Client)
l := len(config.C.Heartbeat.Servers)
for i := 0; i < l; i++ {
addr := config.C.Heartbeat.Servers[i]
begin := time.Now()
conn, err := net.DialTimeout("tcp", addr, time.Second*5)
if err != nil {
log.Printf("W: dial %s fail: %s", addr, err)
continue
}
var bufConn = struct {
io.Closer
*bufio.Reader
*bufio.Writer
}{conn, bufio.NewReader(conn), bufio.NewWriter(conn)}
var mh codec.MsgpackHandle
mh.MapType = reflect.TypeOf(map[string]interface{}(nil))
rpcCodec := codec.MsgpackSpecRpc.ClientCodec(bufConn, &mh)
c := rpc.NewClientWithCodec(rpcCodec)
acm[addr] = c
var out string
err = c.Call("Server.Ping", "", &out)
if err != nil {
log.Printf("W: ping %s fail: %s", addr, err)
continue
}
use := time.Since(begin).Nanoseconds()
if use < duration {
address = addr
client = c
duration = use
}
}
if address == "" {
log.Println("E: no job server found")
return nil
}
log.Printf("I: choose server: %s, duration: %dms", address, duration/1000000)
for addr, c := range acm {
if addr == address {
continue
}
c.Close()
}
cli = gobrpc.NewRPCClient(address, client, 5*time.Second)
return cli
}
// GetCli 探测所有server端的延迟自动选择最快的
func GetCli() *gobrpc.RPCClient {
for {
c := getCli()
if c != nil {
return c
}
time.Sleep(time.Second * 10)
}
}
// CloseCli 关闭客户端连接
func CloseCli() {
if cli != nil {
cli.Close()
cli = nil
}
}

View File

@@ -1,31 +0,0 @@
package client
import (
"fmt"
"log"
"github.com/ccfos/nightingale/v6/ibex/types"
)
// Meta 从Server端获取任务元信息
func Meta(id int64) (script string, args string, account string, stdin string, err error) {
var resp types.TaskMetaResponse
err = GetCli().Call("Server.GetTaskMeta", id, &resp)
if err != nil {
log.Println("E: rpc call Server.GetTaskMeta:", err)
CloseCli()
return
}
if resp.Message != "" {
log.Println("E: rpc call Server.GetTaskMeta:", resp.Message)
err = fmt.Errorf(resp.Message)
return
}
script = resp.Script
args = resp.Args
account = resp.Account
stdin = resp.Stdin
return
}

View File

@@ -1,140 +0,0 @@
package config
import (
"fmt"
"log"
"net"
"os"
"strings"
"sync"
"github.com/koding/multiconfig"
"github.com/toolkits/pkg/file"
"github.com/ccfos/nightingale/v6/pkg/httpx"
)
var (
C = new(Config)
once sync.Once
)
func MustLoad(fpaths ...string) {
once.Do(func() {
loaders := []multiconfig.Loader{
&multiconfig.TagLoader{},
&multiconfig.EnvironmentLoader{},
}
for _, fpath := range fpaths {
handled := false
if strings.HasSuffix(fpath, "toml") {
loaders = append(loaders, &multiconfig.TOMLLoader{Path: fpath})
handled = true
}
if strings.HasSuffix(fpath, "conf") {
loaders = append(loaders, &multiconfig.TOMLLoader{Path: fpath})
handled = true
}
if strings.HasSuffix(fpath, "json") {
loaders = append(loaders, &multiconfig.JSONLoader{Path: fpath})
handled = true
}
if strings.HasSuffix(fpath, "yaml") {
loaders = append(loaders, &multiconfig.YAMLLoader{Path: fpath})
handled = true
}
if !handled {
fmt.Println("config file invalid, valid file exts: .conf,.yaml,.toml,.json")
os.Exit(1)
}
}
m := multiconfig.DefaultLoader{
Loader: multiconfig.MultiLoader(loaders...),
Validator: multiconfig.MultiValidator(&multiconfig.RequiredValidator{}),
}
m.MustLoad(C)
if C.Heartbeat.Host == "" {
fmt.Println("heartbeat.host is blank")
os.Exit(1)
}
if C.Heartbeat.Host == "$ip" {
C.Heartbeat.Endpoint = fmt.Sprint(GetOutboundIP())
if C.Heartbeat.Endpoint == "" {
fmt.Println("ip auto got is blank")
os.Exit(1)
}
fmt.Println("host.ip:", C.Heartbeat.Endpoint)
}
host, err := C.GetHost()
if err != nil {
log.Println("E: failed to GetHost:", err)
os.Exit(1)
}
fmt.Println("host:", host)
if C.MetaDir == "" {
C.MetaDir = "./meta"
}
C.MetaDir, err = file.RealPath(C.MetaDir)
if err != nil {
log.Println("E: failed to get real path of MetaDir:", err)
os.Exit(1)
}
file.EnsureDir(C.MetaDir)
file.EnsureDirRW(C.MetaDir)
})
}
type Config struct {
RunMode string
MetaDir string
Heartbeat Heartbeat
HTTP httpx.Config
}
type Heartbeat struct {
Interval int64
Servers []string
Host string
Endpoint string
}
func (c *Config) IsDebugMode() bool {
return c.RunMode == "debug"
}
func (c *Config) GetHost() (string, error) {
if c.Heartbeat.Host == "$ip" {
return c.Heartbeat.Endpoint, nil
}
if c.Heartbeat.Host == "$hostname" {
return os.Hostname()
}
return c.Heartbeat.Host, nil
}
// Get preferred outbound ip of this machine
func GetOutboundIP() net.IP {
conn, err := net.Dial("udp", "8.8.8.8:80")
if err != nil {
fmt.Println("auto get outbound ip fail:", err)
os.Exit(1)
}
defer conn.Close()
localAddr := conn.LocalAddr().(*net.UDPAddr)
return localAddr.IP
}

View File

@@ -1,60 +0,0 @@
package router
import (
"fmt"
"os"
"strings"
"github.com/gin-contrib/pprof"
"github.com/gin-gonic/gin"
"github.com/ccfos/nightingale/v6/ibex/agentd/config"
"github.com/ccfos/nightingale/v6/pkg/aop"
)
func New(version string) *gin.Engine {
gin.SetMode(config.C.RunMode)
loggerMid := aop.Logger()
recoveryMid := aop.Recovery()
if strings.ToLower(config.C.RunMode) == "release" {
aop.DisableConsoleColor()
}
r := gin.New()
r.Use(recoveryMid)
// whether print access log
if config.C.HTTP.PrintAccessLog {
r.Use(loggerMid)
}
configRoute(r, version)
return r
}
func configRoute(r *gin.Engine, version string) {
if config.C.HTTP.PProf {
pprof.Register(r, "/debug/pprof")
}
r.GET("/ping", func(c *gin.Context) {
c.String(200, "pong")
})
r.GET("/pid", func(c *gin.Context) {
c.String(200, fmt.Sprintf("%d", os.Getpid()))
})
r.GET("/addr", func(c *gin.Context) {
c.String(200, c.Request.RemoteAddr)
})
r.GET("/version", func(c *gin.Context) {
c.String(200, version)
})
}

View File

@@ -1,18 +0,0 @@
//go:build !windows
// +build !windows
package timer
import (
"os/exec"
"syscall"
)
func CmdStart(cmd *exec.Cmd) error {
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
return cmd.Start()
}
func CmdKill(cmd *exec.Cmd) error {
return syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
}

View File

@@ -1,16 +0,0 @@
//go:build windows
// +build windows
package timer
import (
"os/exec"
)
func CmdStart(cmd *exec.Cmd) error {
return cmd.Start()
}
func CmdKill(cmd *exec.Cmd) error {
return cmd.Process.Kill()
}

View File

@@ -1,74 +0,0 @@
package timer
import (
"context"
"log"
"time"
"github.com/ccfos/nightingale/v6/ibex/agentd/client"
"github.com/ccfos/nightingale/v6/ibex/agentd/config"
"github.com/ccfos/nightingale/v6/ibex/types"
)
func Heartbeat(ctx context.Context) {
interval := time.Duration(config.C.Heartbeat.Interval) * time.Millisecond
for {
select {
case <-ctx.Done():
return
case <-time.After(interval):
heartbeat()
}
}
}
func heartbeat() {
ident, err := config.C.GetHost()
if err != nil {
log.Println("E: GetHost fail:", err)
return
}
req := types.ReportRequest{
Ident: ident,
ReportTasks: Locals.ReportTasks(),
}
var resp types.ReportResponse
err = client.GetCli().Call("Server.Report", req, &resp)
if err != nil {
log.Println("E: rpc call Server.Report fail:", err)
client.CloseCli()
return
}
if resp.Message != "" {
log.Println("E: error from server:", resp.Message)
return
}
assigned := make(map[int64]struct{})
if resp.AssignTasks != nil {
count := len(resp.AssignTasks)
for i := 0; i < count; i++ {
at := resp.AssignTasks[i]
assigned[at.Id] = struct{}{}
Locals.AssignTask(at)
}
}
if len(assigned) > 0 {
log.Println("D: assigned tasks:", mapKeys(assigned))
}
Locals.Clean(assigned)
}
func mapKeys(m map[int64]struct{}) []int64 {
lst := make([]int64, 0, len(m))
for k := range m {
lst = append(lst, k)
}
return lst
}

View File

@@ -1,333 +0,0 @@
package timer
import (
"bytes"
"fmt"
"log"
"os/exec"
"os/user"
"path"
"strings"
"sync"
"github.com/toolkits/pkg/file"
"github.com/toolkits/pkg/runner"
"github.com/toolkits/pkg/sys"
"github.com/ccfos/nightingale/v6/ibex/agentd/client"
"github.com/ccfos/nightingale/v6/ibex/agentd/config"
)
type Task struct {
sync.Mutex
Id int64
Clock int64
Action string
Status string
alive bool
Cmd *exec.Cmd
Stdout bytes.Buffer
Stderr bytes.Buffer
Stdin *bytes.Reader
Args string
Account string
StdinStr string
}
func (t *Task) SetStatus(status string) {
t.Lock()
t.Status = status
t.Unlock()
}
func (t *Task) GetStatus() string {
t.Lock()
s := t.Status
t.Unlock()
return s
}
func (t *Task) GetAlive() bool {
t.Lock()
pa := t.alive
t.Unlock()
return pa
}
func (t *Task) SetAlive(pa bool) {
t.Lock()
t.alive = pa
t.Unlock()
}
func (t *Task) GetStdout() string {
t.Lock()
out := t.Stdout.String()
t.Unlock()
return out
}
func (t *Task) GetStderr() string {
t.Lock()
out := t.Stderr.String()
t.Unlock()
return out
}
func (t *Task) ResetBuff() {
t.Lock()
t.Stdout.Reset()
t.Stderr.Reset()
t.Unlock()
}
func (t *Task) doneBefore() bool {
doneFlag := path.Join(config.C.MetaDir, fmt.Sprint(t.Id), fmt.Sprintf("%d.done", t.Clock))
return file.IsExist(doneFlag)
}
func (t *Task) loadResult() {
metadir := config.C.MetaDir
doneFlag := path.Join(metadir, fmt.Sprint(t.Id), fmt.Sprintf("%d.done", t.Clock))
stdoutFile := path.Join(metadir, fmt.Sprint(t.Id), "stdout")
stderrFile := path.Join(metadir, fmt.Sprint(t.Id), "stderr")
var err error
t.Status, err = file.ReadStringTrim(doneFlag)
if err != nil {
log.Printf("E: read file %s fail %v", doneFlag, err)
}
stdout, err := file.ReadString(stdoutFile)
if err != nil {
log.Printf("E: read file %s fail %v", stdoutFile, err)
}
stderr, err := file.ReadString(stderrFile)
if err != nil {
log.Printf("E: read file %s fail %v", stderrFile, err)
}
t.Stdout = *bytes.NewBufferString(stdout)
t.Stderr = *bytes.NewBufferString(stderr)
}
func (t *Task) prepare() error {
if t.Account != "" {
// already prepared
return nil
}
IdDir := path.Join(config.C.MetaDir, fmt.Sprint(t.Id))
err := file.EnsureDir(IdDir)
if err != nil {
log.Printf("E: mkdir -p %s fail: %v", IdDir, err)
return err
}
writeFlag := path.Join(IdDir, ".write")
if file.IsExist(writeFlag) {
// 从磁盘读取
argsFile := path.Join(IdDir, "args")
args, err := file.ReadStringTrim(argsFile)
if err != nil {
log.Printf("E: read %s fail %v", argsFile, err)
return err
}
accountFile := path.Join(IdDir, "account")
account, err := file.ReadStringTrim(accountFile)
if err != nil {
log.Printf("E: read %s fail %v", accountFile, err)
return err
}
stdinFile := path.Join(IdDir, "stdin")
stdin, err := file.ReadStringTrim(stdinFile)
if err != nil {
log.Printf("E: read %s fail %v", stdinFile, err)
return err
}
t.Args = args
t.Account = account
t.StdinStr = stdin
} else {
// 从远端读取,再写入磁盘
script, args, account, stdin, err := client.Meta(t.Id)
if err != nil {
log.Println("E: query task meta fail:", err)
return err
}
scriptFile := path.Join(IdDir, "script")
_, err = file.WriteString(scriptFile, script)
if err != nil {
log.Printf("E: write script to %s fail: %v", scriptFile, err)
return err
}
out, err := sys.CmdOutTrim("chmod", "+x", scriptFile)
if err != nil {
log.Printf("E: chmod +x %s fail %v. output: %s", scriptFile, err, out)
return err
}
argsFile := path.Join(IdDir, "args")
_, err = file.WriteString(argsFile, args)
if err != nil {
log.Printf("E: write args to %s fail: %v", argsFile, err)
return err
}
accountFile := path.Join(IdDir, "account")
_, err = file.WriteString(accountFile, account)
if err != nil {
log.Printf("E: write account to %s fail: %v", accountFile, err)
return err
}
stdinFile := path.Join(IdDir, "stdin")
_, err = file.WriteString(stdinFile, stdin)
if err != nil {
log.Printf("E: write tags to %s fail: %v", stdinFile, err)
return err
}
_, err = file.WriteString(writeFlag, "")
if err != nil {
log.Printf("E: create %s flag file fail: %v", writeFlag, err)
return err
}
t.Args = args
t.Account = account
t.StdinStr = stdin
}
t.Stdin = bytes.NewReader([]byte(t.StdinStr))
return nil
}
func (t *Task) start() {
if t.GetAlive() {
return
}
err := t.prepare()
if err != nil {
return
}
args := t.Args
if args != "" {
args = strings.Replace(args, ",,", "' '", -1)
args = "'" + args + "'"
}
scriptFile := path.Join(config.C.MetaDir, fmt.Sprint(t.Id), "script")
if !path.IsAbs(scriptFile) {
scriptFile = path.Join(runner.Cwd, scriptFile)
}
sh := fmt.Sprintf("%s %s", scriptFile, args)
var cmd *exec.Cmd
loginUser, err := user.Current()
if err != nil {
log.Println("E: cannot get current login user:", err)
return
}
if loginUser.Username == "root" {
// current login user is root
if t.Account == "root" {
cmd = exec.Command("sh", "-c", sh)
cmd.Dir = loginUser.HomeDir
} else {
cmd = exec.Command("su", "-c", sh, "-", t.Account)
}
} else {
// current login user not root
cmd = exec.Command("sh", "-c", sh)
cmd.Dir = loginUser.HomeDir
}
cmd.Stdout = &t.Stdout
cmd.Stderr = &t.Stderr
cmd.Stdin = t.Stdin
t.Cmd = cmd
err = CmdStart(cmd)
if err != nil {
log.Printf("E: cannot start cmd of task[%d]: %v", t.Id, err)
return
}
go runProcess(t)
}
func (t *Task) kill() {
go killProcess(t)
}
func runProcess(t *Task) {
t.SetAlive(true)
defer t.SetAlive(false)
err := t.Cmd.Wait()
if err != nil {
if strings.Contains(err.Error(), "signal: killed") {
t.SetStatus("killed")
log.Printf("D: process of task[%d] killed", t.Id)
} else if strings.Contains(err.Error(), "signal: terminated") {
// kill children process manually
t.SetStatus("killed")
log.Printf("D: process of task[%d] terminated", t.Id)
} else {
t.SetStatus("failed")
log.Printf("D: process of task[%d] return error: %v", t.Id, err)
}
} else {
t.SetStatus("success")
log.Printf("D: process of task[%d] done", t.Id)
}
persistResult(t)
}
func persistResult(t *Task) {
metadir := config.C.MetaDir
stdout := path.Join(metadir, fmt.Sprint(t.Id), "stdout")
stderr := path.Join(metadir, fmt.Sprint(t.Id), "stderr")
doneFlag := path.Join(metadir, fmt.Sprint(t.Id), fmt.Sprintf("%d.done", t.Clock))
file.WriteString(stdout, t.GetStdout())
file.WriteString(stderr, t.GetStderr())
file.WriteString(doneFlag, t.GetStatus())
}
func killProcess(t *Task) {
t.SetAlive(true)
defer t.SetAlive(false)
log.Printf("D: begin kill process of task[%d]", t.Id)
err := CmdKill(t.Cmd)
if err != nil {
t.SetStatus("killfailed")
log.Printf("D: kill process of task[%d] fail: %v", t.Id, err)
} else {
t.SetStatus("killed")
log.Printf("D: process of task[%d] killed", t.Id)
}
persistResult(t)
}

View File

@@ -1,120 +0,0 @@
package timer
import (
"log"
"github.com/ccfos/nightingale/v6/ibex/types"
)
type LocalTasksT struct {
M map[int64]*Task
}
var Locals = &LocalTasksT{M: make(map[int64]*Task)}
func (lt *LocalTasksT) ReportTasks() []types.ReportTask {
ret := make([]types.ReportTask, 0, len(lt.M))
for id, t := range lt.M {
rt := types.ReportTask{Id: id, Clock: t.Clock}
rt.Status = t.GetStatus()
if rt.Status == "running" || rt.Status == "killing" {
// intermediate state
continue
}
rt.Stdout = t.GetStdout()
rt.Stderr = t.GetStderr()
stdoutLen := len(rt.Stdout)
stderrLen := len(rt.Stderr)
// 输出太长的话,截断,要不然把数据库撑爆了
if stdoutLen > 65535 {
start := stdoutLen - 65535
rt.Stdout = rt.Stdout[start:]
}
if stderrLen > 65535 {
start := stderrLen - 65535
rt.Stderr = rt.Stderr[start:]
}
ret = append(ret, rt)
}
return ret
}
func (lt *LocalTasksT) GetTask(id int64) (*Task, bool) {
t, found := lt.M[id]
return t, found
}
func (lt *LocalTasksT) SetTask(t *Task) {
lt.M[t.Id] = t
}
func (lt *LocalTasksT) AssignTask(at types.AssignTask) {
local, found := lt.GetTask(at.Id)
if found {
if local.Clock == at.Clock && local.Action == at.Action {
// ignore repeat task
return
}
local.Clock = at.Clock
local.Action = at.Action
} else {
if at.Action == "kill" {
// no process in local, no need kill
return
}
local = &Task{
Id: at.Id,
Clock: at.Clock,
Action: at.Action,
}
lt.SetTask(local)
if local.doneBefore() {
local.loadResult()
return
}
}
if local.Action == "kill" {
local.SetStatus("killing")
local.kill()
} else if local.Action == "start" {
local.SetStatus("running")
local.start()
} else {
log.Printf("W: unknown action: %s of task %d", at.Action, at.Id)
}
}
func (lt *LocalTasksT) Clean(assigned map[int64]struct{}) {
del := make(map[int64]struct{})
for id := range lt.M {
if _, found := assigned[id]; !found {
del[id] = struct{}{}
}
}
for id := range del {
// 远端已经不关注这个任务了但是本地来看任务还是running的
// 可能是远端认为超时了,此时本地不能删除,仍然要继续上报
if lt.M[id].GetStatus() == "running" {
continue
}
lt.M[id].ResetBuff()
cmd := lt.M[id].Cmd
delete(lt.M, id)
if cmd != nil && cmd.Process != nil {
cmd.Process.Release()
}
}
}

View File

@@ -1,82 +0,0 @@
package ibex
import (
"fmt"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"os"
"strings"
"github.com/ccfos/nightingale/v6/ibex/server/config"
"github.com/ccfos/nightingale/v6/ibex/server/router"
"github.com/ccfos/nightingale/v6/ibex/server/rpc"
"github.com/ccfos/nightingale/v6/ibex/server/timer"
"github.com/ccfos/nightingale/v6/storage"
"github.com/ccfos/nightingale/v6/alert/aconf"
n9eRouter "github.com/ccfos/nightingale/v6/center/router"
"github.com/ccfos/nightingale/v6/conf"
n9eConf "github.com/ccfos/nightingale/v6/conf"
"github.com/gin-gonic/gin"
"github.com/redis/go-redis/v9"
"gorm.io/gorm"
)
var (
HttpPort int
)
func ServerStart(ctx *ctx.Context, isCenter bool, db *gorm.DB, rc redis.Cmdable, basicAuth gin.Accounts, heartbeat aconf.HeartbeatConfig,
api *n9eConf.CenterApi, r *gin.Engine, centerRouter *n9eRouter.Router, ibex conf.Ibex, httpPort int) {
config.C.IsCenter = isCenter
config.C.BasicAuth = make(gin.Accounts)
if len(basicAuth) > 0 {
config.C.BasicAuth = basicAuth
}
config.C.Heartbeat.IP = heartbeat.IP
config.C.Heartbeat.Interval = heartbeat.Interval
config.C.Heartbeat.LocalAddr = schedulerAddrGet(ibex.RPCListen)
HttpPort = httpPort
config.C.Output.ComeFrom = ibex.Output.ComeFrom
config.C.Output.AgtdPort = ibex.Output.AgtdPort
rou := router.NewRouter(ctx)
if centerRouter != nil {
rou.ConfigRouter(r, centerRouter)
} else {
rou.ConfigRouter(r)
}
ctx.Redis = rc
if err := storage.IdInit(ctx.Redis); err != nil {
fmt.Println("cannot init id generator: ", err)
os.Exit(1)
}
rpc.Start(ibex.RPCListen, ctx)
if isCenter {
go timer.Heartbeat(ctx)
go timer.Schedule(ctx)
go timer.CleanLong(ctx)
} else {
config.C.CenterApi = *api
}
timer.CacheHostDoing(ctx)
timer.ReportResult(ctx)
}
func schedulerAddrGet(rpcListen string) string {
ip := fmt.Sprint(config.GetOutboundIP())
if ip == "" {
fmt.Println("heartbeat ip auto got is blank")
os.Exit(1)
}
port := strings.Split(rpcListen, ":")[1]
localAddr := ip + ":" + port
return localAddr
}

View File

@@ -1,135 +0,0 @@
package config
import (
"fmt"
"net"
"os"
"strings"
"sync"
"github.com/ccfos/nightingale/v6/pkg/httpx"
"github.com/ccfos/nightingale/v6/pkg/logx"
"github.com/ccfos/nightingale/v6/conf"
"github.com/ccfos/nightingale/v6/pkg/ormx"
"github.com/ccfos/nightingale/v6/storage"
"github.com/gin-gonic/gin"
"github.com/koding/multiconfig"
)
var (
C = new(Config)
once sync.Once
)
func MustLoad(fpaths ...string) {
once.Do(func() {
loaders := []multiconfig.Loader{
&multiconfig.TagLoader{},
&multiconfig.EnvironmentLoader{},
}
for _, fpath := range fpaths {
handled := false
if strings.HasSuffix(fpath, "toml") {
loaders = append(loaders, &multiconfig.TOMLLoader{Path: fpath})
handled = true
}
if strings.HasSuffix(fpath, "conf") {
loaders = append(loaders, &multiconfig.TOMLLoader{Path: fpath})
handled = true
}
if strings.HasSuffix(fpath, "json") {
loaders = append(loaders, &multiconfig.JSONLoader{Path: fpath})
handled = true
}
if strings.HasSuffix(fpath, "yaml") {
loaders = append(loaders, &multiconfig.YAMLLoader{Path: fpath})
handled = true
}
if !handled {
fmt.Println("config file invalid, valid file exts: .conf,.yaml,.toml,.json")
os.Exit(1)
}
}
m := multiconfig.DefaultLoader{
Loader: multiconfig.MultiLoader(loaders...),
Validator: multiconfig.MultiValidator(&multiconfig.RequiredValidator{}),
}
m.MustLoad(C)
if C.Heartbeat.IP == "" {
// auto detect
C.Heartbeat.IP = fmt.Sprint(GetOutboundIP())
if C.Heartbeat.IP == "" {
fmt.Println("heartbeat ip auto got is blank")
os.Exit(1)
}
}
port := strings.Split(C.RPC.Listen, ":")[1]
endpoint := C.Heartbeat.IP + ":" + port
C.Heartbeat.LocalAddr = endpoint
// 正常情况肯定不是127.0.0.1,但是,如果就是单机部署,并且这个机器没有网络,比如本地调试并且本机没网的时候
// if C.Heartbeat.IP == "127.0.0.1" {
// fmt.Println("heartbeat ip is 127.0.0.1 and it is useless, so, exit")
// os.Exit(1)
// }
fmt.Println("heartbeat.ip:", C.Heartbeat.IP)
fmt.Printf("heartbeat.interval: %dms\n", C.Heartbeat.Interval)
})
}
type Config struct {
RunMode string
RPC RPC
Heartbeat Heartbeat
Output Output
IsCenter bool
CenterApi conf.CenterApi
Log logx.Config
HTTP httpx.Config
BasicAuth gin.Accounts
DB ormx.DBConfig
Redis storage.RedisConfig
}
type RPC struct {
Listen string
}
type Heartbeat struct {
IP string
Interval int64
LocalAddr string
}
type Output struct {
ComeFrom string
AgtdPort int
}
func (c *Config) IsDebugMode() bool {
return c.RunMode == "debug"
}
// Get preferred outbound ip of this machine
func GetOutboundIP() net.IP {
conn, err := net.Dial("udp", "8.8.8.8:80")
if err != nil {
fmt.Println("auto get outbound ip fail:", err)
os.Exit(1)
}
defer conn.Close()
localAddr := conn.LocalAddr().(*net.UDPAddr)
return localAddr.IP
}

View File

@@ -1,144 +0,0 @@
package logic
import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/slice"
"github.com/toolkits/pkg/str"
)
func ScheduleTask(ctx *ctx.Context, id int64) {
logger.Debugf("task[%d] scheduling...", id)
count, err := models.WaitingHostCount(ctx, id)
if err != nil {
logger.Errorf("cannot get task[%d] waiting host count: %v", id, err)
return
}
if count == 0 {
cleanDoneTask(ctx, id)
return
}
action, err := models.TaskActionGet(ctx, "id=?", id)
if err != nil {
logger.Errorf("cannot get task[%d] action: %v", id, err)
return
}
if action == nil {
logger.Errorf("[W] no action found of task[%d]", id)
return
}
switch action.Action {
case "start":
startTask(ctx, id, action)
case "pause":
return
case "cancel":
return
case "kill":
return
default:
logger.Errorf("unknown action: %s of task[%d]", action.Action, id)
}
}
func cleanDoneTask(ctx *ctx.Context, id int64) {
ingCount, err := models.IngStatusHostCount(ctx, id)
if err != nil {
logger.Errorf("cannot get task[%d] ing status host count: %v", id, err)
return
}
if ingCount > 0 {
return
}
err = models.CleanDoneTask(ctx, id)
if err != nil {
logger.Errorf("cannot clean done task[%d]: %v", id, err)
}
logger.Debugf("task[%d] done", id)
}
func startTask(ctx *ctx.Context, id int64, action *models.TaskAction) {
meta, err := models.TaskMetaGetByID(ctx, id)
if err != nil {
logger.Errorf("cannot get task[%d] meta: %v", id, err)
return
}
if meta == nil {
logger.Errorf("task[%d] meta lost", id)
return
}
count, err := models.UnexpectedHostCount(ctx, id)
if err != nil {
logger.Errorf("cannot get task[%d] unexpected host count: %v", id, err)
return
}
if count > int64(meta.Tolerance) {
err = action.Update(ctx, "pause")
if err != nil {
logger.Errorf("cannot update task[%d] action to 'pause': %v", id, err)
}
return
}
waitings, err := models.WaitingHostList(ctx, id)
if err != nil {
logger.Errorf("cannot get task[%d] waiting host: %v", id, err)
return
}
waitingsCount := len(waitings)
if waitingsCount == 0 {
return
}
doingsCount, err := models.TableRecordCount(ctx, models.TaskHostDoing{}.TableName(), "id=?", id)
if err != nil {
logger.Errorf("cannot get task[%d] doing host count: %v", id, err)
return
}
need := meta.Batch - int(doingsCount)
if meta.Batch == 0 {
need = waitingsCount
}
if need <= 0 {
return
}
if need > waitingsCount {
need = waitingsCount
}
arr := str.ParseCommaTrim(meta.Pause)
end := need
for i := 0; i < need; i++ {
if slice.ContainsString(arr, waitings[i].Host) {
end = i + 1
err = action.Update(ctx, "pause")
if err != nil {
logger.Errorf("cannot update task[%d] action to 'pause': %v", id, err)
return
}
break
}
}
err = models.RunWaitingHosts(ctx, waitings[:end])
if err != nil {
logger.Errorf("cannot run waiting hosts: %v", err)
}
}

View File

@@ -1,45 +0,0 @@
package logic
import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"time"
"github.com/toolkits/pkg/logger"
)
func CheckTimeout(ctx *ctx.Context, id int64) {
meta, err := models.TaskMetaGetByID(ctx, id)
if err != nil {
logger.Errorf("cannot get task[%d] meta: %v", id, err)
return
}
if meta == nil {
logger.Errorf("task[%d] meta lost", id)
return
}
hosts, err := models.TableRecordGets[[]models.TaskHostDoing](ctx, models.TaskHostDoing{}.TableName(), "id=?", id)
if err != nil {
logger.Errorf("cannot get task[%d] doing host list: %v", id, err)
return
}
count := len(hosts)
if count == 0 {
return
}
// 3s: task dispatch duration: web -> db -> scheduler -> executor
timeout := int64(meta.Timeout + 3)
now := time.Now().Unix()
for i := 0; i < count; i++ {
if now-hosts[i].Clock > timeout {
err = models.MarkDoneStatus(ctx, hosts[i].Id, hosts[i].Clock, hosts[i].Host, "timeout", "", "")
if err != nil {
logger.Errorf("cannot mark task[%d] done status: %v", id, err)
}
}
}
}

View File

@@ -1,40 +0,0 @@
package router
import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"net/http"
"strings"
"github.com/toolkits/pkg/errorx"
)
func TaskMeta(ctx *ctx.Context, id int64) *models.TaskMeta {
obj, err := models.TaskMetaGet(ctx, "id = ?", id)
errorx.Dangerous(err)
if obj == nil {
errorx.Bomb(http.StatusNotFound, "no such task meta")
}
return obj
}
func cleanHosts(formHosts []string) []string {
cnt := len(formHosts)
arr := make([]string, 0, cnt)
for i := 0; i < cnt; i++ {
item := strings.TrimSpace(formHosts[i])
if item == "" {
continue
}
if strings.HasPrefix(item, "#") {
continue
}
arr = append(arr, item)
}
return arr
}

View File

@@ -1,612 +0,0 @@
package router
import (
"fmt"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"strconv"
"io/ioutil"
"net/http"
"time"
"github.com/ccfos/nightingale/v6/ibex/server/config"
"github.com/ccfos/nightingale/v6/storage"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/errorx"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/slice"
"github.com/toolkits/pkg/str"
)
func (rou *Router) taskStdout(c *gin.Context) {
meta := TaskMeta(rou.ctx, UrlParamsInt64(c, "id"))
stdouts, err := meta.Stdouts(rou.ctx)
ginx.NewRender(c).Data(stdouts, err)
}
func (rou *Router) taskStderr(c *gin.Context) {
meta := TaskMeta(rou.ctx, UrlParamsInt64(c, "id"))
stderrs, err := meta.Stderrs(rou.ctx)
ginx.NewRender(c).Data(stderrs, err)
}
// TODO: 不能只判断task_action还应该看所有的host执行情况
func (rou *Router) taskState(c *gin.Context) {
action, err := models.TaskActionGet(rou.ctx, "id=?", UrlParamsInt64(c, "id"))
if err != nil {
ginx.NewRender(c).Data("", err)
return
}
state := "done"
if action != nil {
state = action.Action
}
ginx.NewRender(c).Data(state, err)
}
func (rou *Router) taskResult(c *gin.Context) {
id := UrlParamsInt64(c, "id")
hosts, err := models.TaskHostStatus(rou.ctx, id)
if err != nil {
errorx.Bomb(500, "load task hosts of %d occur error %v", id, err)
}
ss := make(map[string][]string)
total := len(hosts)
for i := 0; i < total; i++ {
s := hosts[i].Status
ss[s] = append(ss[s], hosts[i].Host)
}
ginx.NewRender(c).Data(ss, nil)
}
func (rou *Router) taskHostOutput(c *gin.Context) {
obj, err := models.TaskHostGet(rou.ctx, UrlParamsInt64(c, "id"), ginx.UrlParamStr(c, "host"))
ginx.NewRender(c).Data(obj, err)
}
func (rou *Router) taskHostStdout(c *gin.Context) {
id := UrlParamsInt64(c, "id")
host := ginx.UrlParamStr(c, "host")
if config.C.Output.ComeFrom == "database" || config.C.Output.ComeFrom == "" {
obj, err := models.TaskHostGet(rou.ctx, id, host)
ginx.NewRender(c).Data(obj.Stdout, err)
return
}
if config.C.Output.AgtdPort <= 0 || config.C.Output.AgtdPort > 65535 {
ginx.NewRender(c).Message(fmt.Errorf("remotePort(%d) invalid", config.C.Output.AgtdPort))
return
}
url := fmt.Sprintf("http://%s:%d/output/%d/stdout.json", host, config.C.Output.AgtdPort, id)
client := &http.Client{
CheckRedirect: func(req *http.Request, via []*http.Request) error {
return http.ErrUseLastResponse
},
}
resp, err := client.Get(url)
errorx.Dangerous(err)
defer resp.Body.Close()
bs, err := ioutil.ReadAll(resp.Body)
errorx.Dangerous(err)
c.Writer.Header().Set("Content-Type", "application/json; charset=UTF-8")
c.Writer.Write(bs)
}
func (rou *Router) taskHostStderr(c *gin.Context) {
id := UrlParamsInt64(c, "id")
host := ginx.UrlParamStr(c, "host")
if config.C.Output.ComeFrom == "database" || config.C.Output.ComeFrom == "" {
obj, err := models.TaskHostGet(rou.ctx, id, host)
ginx.NewRender(c).Data(obj.Stderr, err)
return
}
if config.C.Output.AgtdPort <= 0 || config.C.Output.AgtdPort > 65535 {
ginx.NewRender(c).Message(fmt.Errorf("remotePort(%d) invalid", config.C.Output.AgtdPort))
return
}
url := fmt.Sprintf("http://%s:%d/output/%d/stderr.json", host, config.C.Output.AgtdPort, id)
client := &http.Client{
CheckRedirect: func(req *http.Request, via []*http.Request) error {
return http.ErrUseLastResponse
},
}
resp, err := client.Get(url)
errorx.Dangerous(err)
defer resp.Body.Close()
bs, err := ioutil.ReadAll(resp.Body)
errorx.Dangerous(err)
c.Writer.Header().Set("Content-Type", "application/json; charset=UTF-8")
c.Writer.Write(bs)
}
func (rou *Router) taskStdoutTxt(c *gin.Context) {
id := UrlParamsInt64(c, "id")
meta, err := models.TaskMetaGet(rou.ctx, "id = ?", id)
if err != nil {
c.String(500, err.Error())
return
}
if meta == nil {
c.String(404, "no such task")
return
}
stdouts, err := meta.Stdouts(rou.ctx)
if err != nil {
c.String(500, err.Error())
return
}
w := c.Writer
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
count := len(stdouts)
for i := 0; i < count; i++ {
if i != 0 {
w.Write([]byte("\n\n"))
}
w.Write([]byte(stdouts[i].Host + ":\n"))
w.Write([]byte(stdouts[i].Stdout))
}
}
func (rou *Router) taskStderrTxt(c *gin.Context) {
id := UrlParamsInt64(c, "id")
meta, err := models.TaskMetaGet(rou.ctx, "id = ?", id)
if err != nil {
c.String(500, err.Error())
return
}
if meta == nil {
c.String(404, "no such task")
return
}
stderrs, err := meta.Stderrs(rou.ctx)
if err != nil {
c.String(500, err.Error())
return
}
w := c.Writer
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
count := len(stderrs)
for i := 0; i < count; i++ {
if i != 0 {
w.Write([]byte("\n\n"))
}
w.Write([]byte(stderrs[i].Host + ":\n"))
w.Write([]byte(stderrs[i].Stderr))
}
}
type TaskStdoutData struct {
Host string `json:"host"`
Stdout string `json:"stdout"`
}
type TaskStderrData struct {
Host string `json:"host"`
Stderr string `json:"stderr"`
}
func (rou *Router) taskStdoutJSON(c *gin.Context) {
task := TaskMeta(rou.ctx, UrlParamsInt64(c, "id"))
host := ginx.QueryStr(c, "host", "")
var hostsLen int
var ret []TaskStdoutData
if host != "" {
obj, err := models.TaskHostGet(rou.ctx, task.Id, host)
if err != nil {
ginx.NewRender(c).Data("", err)
return
} else if obj == nil {
ginx.NewRender(c).Data("", fmt.Errorf("task: %d, host(%s) not eixsts", task.Id, host))
return
} else {
ret = append(ret, TaskStdoutData{
Host: host,
Stdout: obj.Stdout,
})
}
} else {
hosts, err := models.TaskHostGets(rou.ctx, task.Id)
if err != nil {
ginx.NewRender(c).Data("", err)
return
}
hostsLen = len(hosts)
ret = make([]TaskStdoutData, 0, hostsLen)
for i := 0; i < hostsLen; i++ {
ret = append(ret, TaskStdoutData{
Host: hosts[i].Host,
Stdout: hosts[i].Stdout,
})
}
}
ginx.NewRender(c).Data(ret, nil)
}
func (rou *Router) taskStderrJSON(c *gin.Context) {
task := TaskMeta(rou.ctx, UrlParamsInt64(c, "id"))
host := ginx.QueryStr(c, "host", "")
var hostsLen int
var ret []TaskStderrData
if host != "" {
obj, err := models.TaskHostGet(rou.ctx, task.Id, host)
if err != nil {
ginx.NewRender(c).Data("", err)
return
} else if obj == nil {
ginx.NewRender(c).Data("", fmt.Errorf("task: %d, host(%s) not eixsts", task.Id, host))
return
} else {
ret = append(ret, TaskStderrData{
Host: host,
Stderr: obj.Stderr,
})
}
} else {
hosts, err := models.TaskHostGets(rou.ctx, task.Id)
if err != nil {
ginx.NewRender(c).Data("", err)
return
}
hostsLen = len(hosts)
ret = make([]TaskStderrData, 0, hostsLen)
for i := 0; i < hostsLen; i++ {
ret = append(ret, TaskStderrData{
Host: hosts[i].Host,
Stderr: hosts[i].Stderr,
})
}
}
ginx.NewRender(c).Data(ret, nil)
}
type taskForm struct {
Title string `json:"title" binding:"required"`
Account string `json:"account" binding:"required"`
Batch int `json:"batch"`
Tolerance int `json:"tolerance"`
Timeout int `json:"timeout"`
Pause string `json:"pause"`
Script string `json:"script" binding:"required"`
Args string `json:"args"`
Stdin string `json:"stdin"`
Action string `json:"action" binding:"required"`
Creator string `json:"creator" binding:"required"`
Hosts []string `json:"hosts" binding:"required"`
AlertTriggered bool `json:"alert_triggered"`
}
func (rou *Router) taskAdd(c *gin.Context) {
var f taskForm
ginx.BindJSON(c, &f)
hosts := cleanHosts(f.Hosts)
if len(hosts) == 0 {
errorx.Bomb(http.StatusBadRequest, "arg(hosts) empty")
}
taskMeta := &models.TaskMeta{
Title: f.Title,
Account: f.Account,
Batch: f.Batch,
Tolerance: f.Tolerance,
Timeout: f.Timeout,
Pause: f.Pause,
Script: f.Script,
Args: f.Args,
Stdin: f.Stdin,
Creator: f.Creator,
}
err := taskMeta.CleanFields()
ginx.Dangerous(err)
taskMeta.HandleFH(hosts[0])
authUser := c.MustGet(gin.AuthUserKey).(string)
// 任务类型分为"告警规则触发"和"n9e center用户下发"两种;
// 边缘机房"告警规则触发"的任务不需要规划并且它可能是失联的无法使用db资源所以放入redis缓存中直接下发给agentd执行
if !config.C.IsCenter && f.AlertTriggered {
if err := taskMeta.Create(rou.ctx); err != nil {
// 当网络不连通时生成唯一的id防止边缘机房中不同任务的id相同
// 方法是redis自增id去防止同一个机房的不同n9e edge生成的id相同
// 但没法防止不同边缘机房生成同样的id所以生成id的数据不会上报存入数据库只用于闭环执行。
taskMeta.Id, err = storage.IdGet(rou.ctx.Redis)
ginx.Dangerous(err)
}
if err == nil {
taskHost := models.TaskHost{
Id: taskMeta.Id,
Host: hosts[0],
Status: "running",
}
if err = taskHost.Create(rou.ctx); err != nil {
logger.Warningf("task_add_fail: authUser=%s title=%s err=%s", authUser, taskMeta.Title, err.Error())
}
}
// 缓存任务元信息和待下发的任务
err = taskMeta.Cache(rou.ctx, hosts[0])
ginx.Dangerous(err)
} else {
// 如果是中心机房,还是保持之前的逻辑
err = taskMeta.Save(rou.ctx, hosts, f.Action)
ginx.Dangerous(err)
}
logger.Infof("task_add_succ: authUser=%s title=%s", authUser, taskMeta.Title)
ginx.NewRender(c).Data(taskMeta.Id, err)
}
func (rou *Router) taskGet(c *gin.Context) {
meta := TaskMeta(rou.ctx, UrlParamsInt64(c, "id"))
hosts, err := meta.Hosts(rou.ctx)
errorx.Dangerous(err)
action, err := meta.Action(rou.ctx)
errorx.Dangerous(err)
actionStr := ""
if action != nil {
actionStr = action.Action
} else {
meta.Done = true
}
ginx.NewRender(c).Data(gin.H{
"meta": meta,
"hosts": hosts,
"action": actionStr,
}, nil)
}
// 传进来一堆ids返回已经done的任务的ids
func (rou *Router) doneIds(c *gin.Context) {
ids := ginx.QueryStr(c, "ids", "")
if ids == "" {
errorx.Dangerous("arg(ids) empty")
}
idsint64 := str.IdsInt64(ids, ",")
if len(idsint64) == 0 {
errorx.Dangerous("arg(ids) empty")
}
exists, err := models.TaskActionExistsIds(rou.ctx, idsint64)
errorx.Dangerous(err)
dones := slice.SubInt64(idsint64, exists)
ginx.NewRender(c).Data(gin.H{
"list": dones,
}, nil)
}
func (rou *Router) taskGets(c *gin.Context) {
query := ginx.QueryStr(c, "query", "")
limit := ginx.QueryInt(c, "limit", 20)
creator := ginx.QueryStr(c, "creator", "")
days := ginx.QueryInt64(c, "days", 7)
before := time.Unix(time.Now().Unix()-days*24*3600, 0)
total, err := models.TaskMetaTotal(rou.ctx, creator, query, before)
errorx.Dangerous(err)
list, err := models.TaskMetaGets(rou.ctx, creator, query, before, limit, ginx.Offset(c, limit))
errorx.Dangerous(err)
cnt := len(list)
ids := make([]int64, cnt)
for i := 0; i < cnt; i++ {
ids[i] = list[i].Id
}
exists, err := models.TaskActionExistsIds(rou.ctx, ids)
errorx.Dangerous(err)
for i := 0; i < cnt; i++ {
if slice.ContainsInt64(exists, list[i].Id) {
list[i].Done = false
} else {
list[i].Done = true
}
}
ginx.NewRender(c).Data(gin.H{
"total": total,
"list": list,
}, nil)
}
type actionForm struct {
Action string `json:"action"`
}
func (rou *Router) taskAction(c *gin.Context) {
meta := TaskMeta(rou.ctx, UrlParamsInt64(c, "id"))
var f actionForm
ginx.BindJSON(c, &f)
action, err := models.TaskActionGet(rou.ctx, "id=?", meta.Id)
errorx.Dangerous(err)
if action == nil {
errorx.Bomb(200, "task already finished, no more action can do")
}
ginx.NewRender(c).Message(action.Update(rou.ctx, f.Action))
}
func (rou *Router) taskHostAction(c *gin.Context) {
host := ginx.UrlParamStr(c, "host")
meta := TaskMeta(rou.ctx, UrlParamsInt64(c, "id"))
noopWhenDone(rou.ctx, meta.Id)
var f actionForm
ginx.BindJSON(c, &f)
if f.Action == "ignore" {
errorx.Dangerous(meta.IgnoreHost(rou.ctx, host))
action, err := models.TaskActionGet(rou.ctx, "id=?", meta.Id)
errorx.Dangerous(err)
if action != nil && action.Action == "pause" {
ginx.NewRender(c).Data("you can click start to run the task", nil)
return
}
}
if f.Action == "kill" {
errorx.Dangerous(meta.KillHost(rou.ctx, host))
}
if f.Action == "redo" {
errorx.Dangerous(meta.RedoHost(rou.ctx, host))
}
ginx.NewRender(c).Message(nil)
}
func noopWhenDone(ctx *ctx.Context, id int64) {
action, err := models.TaskActionGet(ctx, "id=?", id)
errorx.Dangerous(err)
if action == nil {
errorx.Bomb(200, "task already finished, no more taskAction can do")
}
}
type sqlCondForm struct {
Table string
Where string
Args []interface{}
}
func (rou *Router) tableRecordListGet(c *gin.Context) {
var f sqlCondForm
ginx.BindJSON(c, &f)
switch f.Table {
case models.TaskHostDoing{}.TableName():
lst, err := models.TableRecordGets[[]models.TaskHostDoing](rou.ctx, f.Table, f.Where, f.Args)
ginx.NewRender(c).Data(lst, err)
case models.TaskMeta{}.TableName():
lst, err := models.TableRecordGets[[]models.TaskMeta](rou.ctx, f.Table, f.Where, f.Args)
ginx.NewRender(c).Data(lst, err)
default:
ginx.Bomb(http.StatusBadRequest, "table[%v] not support", f.Table)
}
}
func (rou *Router) tableRecordCount(c *gin.Context) {
var f sqlCondForm
ginx.BindJSON(c, &f)
ginx.NewRender(c).Data(models.TableRecordCount(rou.ctx, f.Table, f.Where, f.Args))
}
type markDoneForm struct {
Id int64
Clock int64
Host string
Status string
Stdout string
Stderr string
}
func (rou *Router) markDone(c *gin.Context) {
var f markDoneForm
ginx.BindJSON(c, &f)
ginx.NewRender(c).Message(models.MarkDoneStatus(rou.ctx, f.Id, f.Clock, f.Host, f.Status, f.Stdout, f.Stderr))
}
func (rou *Router) taskMetaAdd(c *gin.Context) {
var f models.TaskMeta
ginx.BindJSON(c, &f)
err := f.Create(rou.ctx)
ginx.NewRender(c).Data(f.Id, err)
}
func (rou *Router) taskHostAdd(c *gin.Context) {
var f models.TaskHost
ginx.BindJSON(c, &f)
ginx.NewRender(c).Message(f.Upsert(rou.ctx))
}
func (rou *Router) taskHostUpsert(c *gin.Context) {
var f []models.TaskHost
ginx.BindJSON(c, &f)
ginx.NewRender(c).Data(models.TaskHostUpserts(rou.ctx, f))
}
func UrlParamsInt64(c *gin.Context, field string) int64 {
var params []gin.Param
for _, p := range c.Params {
if p.Key == "id" {
params = append(params, p)
}
}
var strval string
if len(params) == 1 {
strval = ginx.UrlParamStr(c, field)
} else if len(params) == 2 {
strval = params[1].Value
} else {
logger.Warningf("url param[%+v] not ok", params)
errorx.Bomb(http.StatusBadRequest, "url param[%s] is blank", field)
}
intval, err := strconv.ParseInt(strval, 10, 64)
if err != nil {
errorx.Bomb(http.StatusBadRequest, "cannot convert %s to int64", strval)
}
return intval
}

View File

@@ -1,132 +0,0 @@
package router
import (
"fmt"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"os"
"strings"
"github.com/ccfos/nightingale/v6/ibex/server/config"
"github.com/ccfos/nightingale/v6/pkg/aop"
"github.com/ccfos/nightingale/v6/center/router"
"github.com/gin-contrib/pprof"
"github.com/gin-gonic/gin"
)
func New(ctx *ctx.Context, version string) *gin.Engine {
gin.SetMode(config.C.RunMode)
loggerMid := aop.Logger()
recoveryMid := aop.Recovery()
if strings.ToLower(config.C.RunMode) == "release" {
aop.DisableConsoleColor()
}
r := gin.New()
r.Use(recoveryMid)
// whether print access log
if config.C.HTTP.PrintAccessLog {
r.Use(loggerMid)
}
rou := NewRouter(ctx)
rou.configBaseRouter(r, version)
rou.ConfigRouter(r)
return r
}
type Router struct {
ctx *ctx.Context
}
func NewRouter(ctx *ctx.Context) *Router {
return &Router{
ctx: ctx,
}
}
func (rou *Router) configBaseRouter(r *gin.Engine, version string) {
if config.C.HTTP.PProf {
pprof.Register(r, "/debug/pprof")
}
r.GET("/ping", func(c *gin.Context) {
c.String(200, "pong")
})
r.GET("/pid", func(c *gin.Context) {
c.String(200, fmt.Sprintf("%d", os.Getpid()))
})
r.GET("/addr", func(c *gin.Context) {
c.String(200, c.Request.RemoteAddr)
})
r.GET("/version", func(c *gin.Context) {
c.String(200, version)
})
}
func (rou *Router) ConfigRouter(r *gin.Engine, rts ...*router.Router) {
if len(rts) > 0 {
rt := rts[0]
pagesPrefix := "/api/n9e/busi-group/:id"
pages := r.Group(pagesPrefix)
{
pages.GET("/task/:id", rt.Auth(), rt.User(), rt.Perm("/job-tasks"), rou.taskGet)
pages.PUT("/task/:id/action", rt.Auth(), rt.User(), rt.Perm("/job-tasks/put"), rt.Bgrw(), rou.taskAction)
pages.GET("/task/:id/stdout", rt.Auth(), rt.User(), rt.Perm("/job-tasks"), rou.taskStdout)
pages.GET("/task/:id/stderr", rt.Auth(), rt.User(), rt.Perm("/job-tasks"), rou.taskStderr)
pages.GET("/task/:id/state", rt.Auth(), rt.User(), rt.Perm("/job-tasks"), rou.taskState)
pages.GET("/task/:id/result", rt.Auth(), rt.User(), rt.Perm("/job-tasks"), rou.taskResult)
pages.PUT("/task/:id/host/:host/action", rt.Auth(), rt.User(), rt.Perm("/job-tasks/put"), rt.Bgrw(), rou.taskHostAction)
pages.GET("/task/:id/host/:host/output", rt.Auth(), rt.User(), rt.Perm("/job-tasks"), rou.taskHostOutput)
pages.GET("/task/:id/host/:host/stdout", rt.Auth(), rt.User(), rt.Perm("/job-tasks"), rou.taskHostStdout)
pages.GET("/task/:id/host/:host/stderr", rt.Auth(), rt.User(), rt.Perm("/job-tasks"), rou.taskHostStderr)
pages.GET("/task/:id/stdout.txt", rt.Auth(), rt.User(), rt.Perm("/job-tasks"), rou.taskStdoutTxt)
pages.GET("/task/:id/stderr.txt", rt.Auth(), rt.User(), rt.Perm("/job-tasks"), rou.taskStderrTxt)
pages.GET("/task/:id/stdout.json", rt.Auth(), rt.User(), rt.Perm("/job-tasks"), rou.taskStdoutJSON)
pages.GET("/task/:id/stderr.json", rt.Auth(), rt.User(), rt.Perm("/job-tasks"), rou.taskStderrJSON)
}
}
api := r.Group("/ibex/v1")
if len(config.C.BasicAuth) > 0 {
api = r.Group("/ibex/v1", gin.BasicAuth(config.C.BasicAuth))
}
{
api.POST("/tasks", rou.taskAdd)
api.GET("/tasks", rou.taskGets)
api.GET("/tasks/done-ids", rou.doneIds)
api.GET("/task/:id", rou.taskGet)
api.PUT("/task/:id/action", rou.taskAction)
api.GET("/task/:id/stdout", rou.taskStdout)
api.GET("/task/:id/stderr", rou.taskStderr)
api.GET("/task/:id/state", rou.taskState)
api.GET("/task/:id/result", rou.taskResult)
api.PUT("/task/:id/host/:host/action", rou.taskHostAction)
api.GET("/task/:id/host/:host/output", rou.taskHostOutput)
api.GET("/task/:id/host/:host/stdout", rou.taskHostStdout)
api.GET("/task/:id/host/:host/stderr", rou.taskHostStderr)
api.GET("/task/:id/stdout.txt", rou.taskStdoutTxt)
api.GET("/task/:id/stderr.txt", rou.taskStderrTxt)
api.GET("/task/:id/stdout.json", rou.taskStdoutJSON)
api.GET("/task/:id/stderr.json", rou.taskStderrJSON)
// api for edge server
api.POST("/table/record/list", rou.tableRecordListGet)
api.POST("/table/record/count", rou.tableRecordCount)
api.POST("/mark/done", rou.markDone)
api.POST("/task/meta", rou.taskMetaAdd)
api.POST("/task/host/", rou.taskHostAdd)
api.POST("/task/hosts/upsert", rou.taskHostUpsert)
}
}

View File

@@ -1,93 +0,0 @@
package rpc
import (
"fmt"
"github.com/ccfos/nightingale/v6/models"
"os"
"github.com/toolkits/pkg/logger"
"github.com/ccfos/nightingale/v6/ibex/types"
)
// Ping return string 'pong', just for test
func (*Server) Ping(input string, output *string) error {
*output = "pong"
return nil
}
func (*Server) GetTaskMeta(id int64, resp *types.TaskMetaResponse) error {
meta, err := models.TaskMetaGetByID(ctxC, id)
if err != nil {
resp.Message = err.Error()
return nil
}
if meta == nil {
resp.Message = fmt.Sprintf("task %d not found", id)
return nil
}
resp.Script = meta.Script
resp.Args = meta.Args
resp.Account = meta.Account
resp.Stdin = meta.Stdin
return nil
}
func (*Server) Report(req types.ReportRequest, resp *types.ReportResponse) error {
if req.ReportTasks != nil && len(req.ReportTasks) > 0 {
err := handleDoneTask(req)
if err != nil {
resp.Message = err.Error()
return nil
}
}
doings := models.GetDoingCache(req.Ident)
tasks := make([]types.AssignTask, 0, len(doings))
for _, doing := range doings {
tasks = append(tasks, types.AssignTask{
Id: doing.Id,
Clock: doing.Clock,
Action: doing.Action,
})
}
resp.AssignTasks = tasks
return nil
}
func handleDoneTask(req types.ReportRequest) error {
count := len(req.ReportTasks)
val, ok := os.LookupEnv("CONTINUOUS_OUTPUT")
for i := 0; i < count; i++ {
t := req.ReportTasks[i]
if ok && val == "1" && t.Status == "running" {
err := models.RealTimeUpdateOutput(ctxC, t.Id, req.Ident, t.Stdout, t.Stderr)
if err != nil {
logger.Errorf("cannot update output, id:%d, hostname:%s, clock:%d, status:%s, err: %v", t.Id, req.Ident, t.Clock, t.Status, err)
return err
}
} else {
if t.Status == "success" || t.Status == "failed" {
exist, isEdgeAlertTriggered := models.CheckExistAndEdgeAlertTriggered(req.Ident, t.Id)
// ibex agent可能会重复上报结果如果任务已经不在task_host_doing缓存中了说明该任务已经MarkDone了不需要再处理
if !exist {
continue
}
err := models.MarkDoneStatus(ctxC, t.Id, t.Clock, req.Ident, t.Status, t.Stdout, t.Stderr, isEdgeAlertTriggered)
if err != nil {
logger.Errorf("cannot mark task done, id:%d, hostname:%s, clock:%d, status:%s, err: %v", t.Id, req.Ident, t.Clock, t.Status, err)
return err
}
}
}
}
return nil
}

View File

@@ -1,61 +0,0 @@
package rpc
import (
"bufio"
"fmt"
"io"
"net"
"net/rpc"
"os"
"reflect"
"time"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/toolkits/pkg/logger"
"github.com/ugorji/go/codec"
)
type Server int
var ctxC *ctx.Context
func Start(listen string, ctx *ctx.Context) {
ctxC = ctx
go serve(listen)
}
func serve(listen string) {
server := rpc.NewServer()
server.Register(new(Server))
l, err := net.Listen("tcp", listen)
if err != nil {
fmt.Printf("fail to listen on: %s, error: %v\n", listen, err)
os.Exit(1)
}
fmt.Println("rpc.listening:", listen)
var mh codec.MsgpackHandle
mh.MapType = reflect.TypeOf(map[string]interface{}(nil))
duration := time.Duration(100) * time.Millisecond
for {
conn, err := l.Accept()
if err != nil {
logger.Warningf("listener accept error: %v", err)
time.Sleep(duration)
continue
}
var bufconn = struct {
io.Closer
*bufio.Reader
*bufio.Writer
}{conn, bufio.NewReader(conn), bufio.NewWriter(conn)}
go server.ServeCodec(codec.MsgpackSpecRpc.ServerCodec(bufconn, &mh))
}
}

View File

@@ -1,159 +0,0 @@
package server
import (
"context"
"fmt"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"os"
"os/signal"
"path/filepath"
"syscall"
"github.com/ccfos/nightingale/v6/ibex/server/config"
"github.com/ccfos/nightingale/v6/ibex/server/router"
"github.com/ccfos/nightingale/v6/ibex/server/rpc"
"github.com/ccfos/nightingale/v6/ibex/server/timer"
"github.com/ccfos/nightingale/v6/pkg/httpx"
"github.com/ccfos/nightingale/v6/pkg/logx"
"github.com/ccfos/nightingale/v6/storage"
"github.com/toolkits/pkg/i18n"
)
type Server struct {
ConfigFile string
Version string
}
type ServerOption func(*Server)
func SetConfigFile(f string) ServerOption {
return func(s *Server) {
s.ConfigFile = f
}
}
func SetVersion(v string) ServerOption {
return func(s *Server) {
s.Version = v
}
}
// Run run server
func Run(isCenter bool, opts ...ServerOption) {
code := 1
sc := make(chan os.Signal, 1)
signal.Notify(sc, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT)
server := Server{
ConfigFile: filepath.Join("etc", "ibex", "server.toml"),
Version: "not specified",
}
for _, opt := range opts {
opt(&server)
}
// parse config file
config.MustLoad(server.ConfigFile)
config.C.IsCenter = isCenter
cleanFunc, err := server.initialize()
if err != nil {
fmt.Println("server init fail:", err)
os.Exit(code)
}
EXIT:
for {
sig := <-sc
fmt.Println("received signal:", sig.String())
switch sig {
case syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGINT:
code = 0
break EXIT
case syscall.SIGHUP:
// reload configuration?
default:
break EXIT
}
}
cleanFunc()
fmt.Println("server exited")
os.Exit(code)
}
func (s Server) initialize() (func(), error) {
fns := Functions{}
bgCtx, cancel := context.WithCancel(context.Background())
fns.Add(cancel)
// init i18n
i18n.Init()
// init logger
loggerClean, err := logx.Init(config.C.Log)
if err != nil {
return fns.Ret(), err
} else {
fns.Add(loggerClean)
}
var ctxC *ctx.Context
var redis storage.Redis
if redis, err = storage.NewRedis(config.C.Redis); err != nil {
return fns.Ret(), err
}
// init database
if config.C.IsCenter {
db, err := storage.New(config.C.DB)
if err != nil {
return fns.Ret(), err
}
ctxC = ctx.NewContext(context.Background(), db, redis, true, config.C.CenterApi)
} else {
ctxC = ctx.NewContext(context.Background(), nil, redis, false, config.C.CenterApi)
}
if err := storage.IdInit(ctxC.Redis); err != nil {
fmt.Println("cannot init id generator: ", err)
os.Exit(1)
}
timer.CacheHostDoing(ctxC)
timer.ReportResult(ctxC)
if config.C.IsCenter {
go timer.Heartbeat(ctxC)
go timer.Schedule(ctxC)
go timer.CleanLong(ctxC)
}
// init http server
r := router.New(ctxC, s.Version)
httpClean := httpx.Init(config.C.HTTP, bgCtx, r)
fns.Add(httpClean)
// start rpc server
rpc.Start(config.C.RPC.Listen, ctxC)
// release all the resources
return fns.Ret(), nil
}
type Functions struct {
List []func()
}
func (fs *Functions) Add(f func()) {
fs.List = append(fs.List, f)
}
func (fs *Functions) Ret() func() {
return func() {
for i := 0; i < len(fs.List); i++ {
fs.List[i]()
}
}
}

View File

@@ -1,76 +0,0 @@
package timer
import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"time"
"github.com/ccfos/nightingale/v6/ibex/server/config"
"github.com/toolkits/pkg/logger"
)
func Heartbeat(ctx *ctx.Context) {
if config.C.Heartbeat.Interval == 0 {
config.C.Heartbeat.Interval = 1000
}
for {
heartbeat(ctx)
time.Sleep(time.Duration(config.C.Heartbeat.Interval) * time.Millisecond)
}
}
func heartbeat(ctx *ctx.Context) {
ident := config.C.Heartbeat.LocalAddr
err := models.TaskSchedulerHeartbeat(ctx, ident)
if err != nil {
logger.Errorf("task scheduler(%s) cannot heartbeat: %v", ident, err)
return
}
dss, err := models.DeadTaskSchedulers(ctx)
if err != nil {
logger.Errorf("cannot get dead task schedulers: %v", err)
return
}
cnt := len(dss)
if cnt == 0 {
return
}
for i := 0; i < cnt; i++ {
ids, err := models.TasksOfScheduler(ctx, dss[i])
if err != nil {
logger.Errorf("cannot get tasks of scheduler(%s): %v", dss[i], err)
return
}
if len(ids) == 0 {
err = models.DelDeadTaskScheduler(ctx, dss[i])
if err != nil {
logger.Errorf("cannot del dead task scheduler(%s): %v", dss[i], err)
return
}
}
takeOverTasks(ctx, ident, dss[i], ids)
}
}
func takeOverTasks(ctx *ctx.Context, alive, dead string, ids []int64) {
count := len(ids)
for i := 0; i < count; i++ {
success, err := models.TakeOverTask(ctx, ids[i], dead, alive)
if err != nil {
logger.Errorf("cannot take over task: %v", err)
return
}
if success {
logger.Infof("%s take over task[%d] of %s", alive, ids[i], dead)
}
}
}

View File

@@ -1,53 +0,0 @@
package timer
import (
"fmt"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"time"
"github.com/toolkits/pkg/logger"
)
// CacheHostDoing 缓存task_host_doing表全部内容减轻DB压力
func CacheHostDoing(ctx *ctx.Context) {
if err := cacheHostDoing(ctx); err != nil {
fmt.Println("cannot cache task_host_doing data: ", err)
}
go loopCacheHostDoing(ctx)
}
func loopCacheHostDoing(ctx *ctx.Context) {
for {
time.Sleep(time.Millisecond * 400)
if err := cacheHostDoing(ctx); err != nil {
logger.Warning("cannot cache task_host_doing data: ", err)
}
}
}
func cacheHostDoing(ctx *ctx.Context) error {
doingsFromDb, err := models.TableRecordGets[[]models.TaskHostDoing](ctx, models.TaskHostDoing{}.TableName(), "")
if err != nil {
logger.Errorf("models.TableRecordGets fail: %v", err)
}
doingsFromRedis, err := models.CacheRecordGets[models.TaskHostDoing](ctx)
if err != nil {
logger.Errorf("models.CacheRecordGets fail: %v", err)
}
set := make(map[string][]models.TaskHostDoing)
for _, doing := range doingsFromDb {
doing.AlertTriggered = false
set[doing.Host] = append(set[doing.Host], doing)
}
for _, doing := range doingsFromRedis {
doing.AlertTriggered = true
set[doing.Host] = append(set[doing.Host], doing)
}
models.SetDoingCache(set)
return err
}

View File

@@ -1,27 +0,0 @@
package timer
import (
"fmt"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"time"
"github.com/toolkits/pkg/logger"
)
func ReportResult(ctx *ctx.Context) {
if err := models.ReportCacheResult(ctx); err != nil {
fmt.Println("cannot report task_host result from alter trigger: ", err)
}
go loopReport(ctx)
}
func loopReport(ctx *ctx.Context) {
d := time.Duration(2) * time.Second
for {
time.Sleep(d)
if err := models.ReportCacheResult(ctx); err != nil {
logger.Warning("cannot report task_host result from alter trigger: ", err)
}
}
}

View File

@@ -1,79 +0,0 @@
package timer
import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"time"
"github.com/ccfos/nightingale/v6/ibex/server/config"
"github.com/ccfos/nightingale/v6/ibex/server/logic"
"github.com/toolkits/pkg/logger"
)
func Schedule(ctx *ctx.Context) {
for {
scheduleOrphan(ctx)
scheduleMine(ctx)
time.Sleep(time.Second)
}
}
func scheduleMine(ctx *ctx.Context) {
ids, err := models.TasksOfScheduler(ctx, config.C.Heartbeat.LocalAddr)
if err != nil {
logger.Errorf("cannot get tasks of scheduler(%s): %v", config.C.Heartbeat.LocalAddr, err)
return
}
count := len(ids)
for i := 0; i < count; i++ {
logic.CheckTimeout(ctx, ids[i])
logic.ScheduleTask(ctx, ids[i])
}
}
func scheduleOrphan(ctx *ctx.Context) {
ids, err := models.OrphanTaskIds(ctx)
if err != nil {
logger.Errorf("cannot get orphan task ids: %v", err)
return
}
count := len(ids)
if count == 0 {
return
}
logger.Debug("orphan task ids:", ids)
for i := 0; i < count; i++ {
action, err := models.TaskActionGet(ctx, "id=?", ids[i])
if err != nil {
logger.Errorf("cannot get task[%d] action: %v", ids[i], err)
continue
}
if action == nil {
continue
}
if action.Action == "pause" {
continue
}
mine, err := models.TakeOverTask(ctx, ids[i], "", config.C.Heartbeat.LocalAddr)
if err != nil {
logger.Errorf("cannot take over task[%d]: %v", ids[i], err)
continue
}
if !mine {
continue
}
logger.Debugf("task[%d] is mine", ids[i])
logic.ScheduleTask(ctx, ids[i])
}
}

View File

@@ -1,38 +0,0 @@
package timer
import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"time"
"github.com/toolkits/pkg/logger"
)
func CleanLong(ctx *ctx.Context) {
d := time.Duration(24) * time.Hour
for {
cleanLongTask(ctx)
time.Sleep(d)
}
}
func cleanLongTask(ctx *ctx.Context) {
ids, err := models.LongTaskIds(ctx)
if err != nil {
logger.Error("LongTaskIds:", err)
return
}
if ids == nil {
return
}
count := len(ids)
for i := 0; i < count; i++ {
action := models.TaskAction{Id: ids[i]}
err = action.Update(ctx, "cancel")
if err != nil {
logger.Errorf("cannot cancel long task[%d]: %v", ids[i], err)
}
}
}

View File

@@ -1,33 +0,0 @@
package types
type TaskMetaResponse struct {
Message string
Script string
Args string
Account string
Stdin string
}
type ReportTask struct {
Id int64
Clock int64
Status string
Stdout string
Stderr string
}
type ReportRequest struct {
Ident string
ReportTasks []ReportTask
}
type AssignTask struct {
Id int64
Clock int64
Action string
}
type ReportResponse struct {
Message string
AssignTasks []AssignTask
}

View File

Before

Width:  |  Height:  |  Size: 2.8 KiB

After

Width:  |  Height:  |  Size: 2.8 KiB

View File

@@ -584,7 +584,7 @@
"links": [
{
"title": "下钻",
"url": "/dashboards/automq-group-metrics?TSDB=${DS_PROMETHEUS}\u0026cluster_id=${cluster_id}\u0026group_id=${__field.labels.consumer_group}\u0026partition=all\u0026topic=${__field.labels.topic}"
"url": "/built-in-components/dashboard/detail?__uuid__=1717556327172992000&TSDB=${DS_PROMETHEUS}\u0026cluster_id=${cluster_id}\u0026group_id=${__field.labels.consumer_group}\u0026partition=all\u0026topic=${__field.labels.topic}"
}
],
"showHeader": true
@@ -669,7 +669,7 @@
"links": [
{
"title": "下钻",
"url": "/dashboards/automq-topic-metrics?TSDB=${DS_PROMETHEUS}\u0026cluster_id=${cluster_id}\u0026topic=${__field.labels.topic}"
"url": "/built-in-components/dashboard/detail?__uuid__=1717556327174664000&TSDB=${DS_PROMETHEUS}\u0026cluster_id=${cluster_id}\u0026topic=${__field.labels.topic}"
}
],
"showHeader": true
@@ -781,7 +781,7 @@
"links": [
{
"title": "下钻",
"url": "/dashboards/automq-broker-metrics?DS_PROMETHEUS=${DS_PROMETHEUS}\u0026cluster_id=${cluster_id}\u0026node_id=${__field.labels.instance}"
"url": "/built-in-components/dashboard/detail?__uuid__=1717556327159415000&DS_PROMETHEUS=${DS_PROMETHEUS}\u0026cluster_id=${cluster_id}\u0026node_id=${__field.labels.instance}"
}
],
"showHeader": true

View File

@@ -0,0 +1,463 @@
{
"name": "IPMI for Prometheus",
"ident": "",
"configs": {
"version": "2.0.0",
"links": [],
"var": [
{
"name": "node",
"type": "query",
"datasource": {
"cate": "prometheus"
},
"definition": "label_values(ipmi_bmc_info, ident)",
"reg": "",
"multi": false
}
],
"panels": [
{
"type": "gauge",
"id": "f975fded-f57e-4a6e-80b4-50d5be6dd84c",
"layout": {
"h": 7,
"w": 24,
"x": 0,
"y": 0,
"i": "f975fded-f57e-4a6e-80b4-50d5be6dd84c",
"isResizable": true
},
"version": "2.0.0",
"datasourceCate": "prometheus",
"targets": [
{
"refId": "A",
"expr": "ipmi_temperature_celsius{ident='$node'}",
"legend": "{{name}}"
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"name": "Temperatures",
"links": [],
"custom": {
"textMode": "valueAndName",
"calc": "avg"
},
"options": {
"valueMappings": [],
"standardOptions": {
"util": "none"
},
"thresholds": {
"steps": [
{
"color": "green",
"value": null,
"type": "base"
},
{
"color": "red",
"value": 80
}
]
}
}
},
{
"type": "timeseries",
"id": "681f1191-4777-4377-8b77-404d9f036406",
"layout": {
"h": 5,
"w": 12,
"x": 0,
"y": 7,
"i": "681f1191-4777-4377-8b77-404d9f036406",
"isResizable": true
},
"version": "2.0.0",
"datasourceCate": "prometheus",
"targets": [
{
"refId": "A",
"expr": "ipmi_power_watts{ident='$node'}",
"legend": "{{name}}"
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"name": "Power",
"links": [],
"options": {
"tooltip": {
"mode": "all",
"sort": "none"
},
"legend": {
"displayMode": "hidden"
},
"standardOptions": {},
"thresholds": {
"steps": [
{
"color": "#634CD9",
"value": null,
"type": "base"
}
]
}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"spanNulls": false,
"lineWidth": 1,
"fillOpacity": 0.5,
"gradientMode": "none",
"stack": "off",
"scaleDistribution": {
"type": "linear"
}
}
},
{
"type": "timeseries",
"id": "feede24c-8296-4127-982e-08cfc4151933",
"layout": {
"h": 5,
"w": 12,
"x": 12,
"y": 7,
"i": "feede24c-8296-4127-982e-08cfc4151933",
"isResizable": true
},
"version": "2.0.0",
"datasourceCate": "prometheus",
"targets": [
{
"refId": "A",
"expr": "ipmi_power_watts{ident='$node'} * 30 * 24 ",
"legend": "{{name}}"
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"name": "Power usage 30d",
"links": [],
"options": {
"tooltip": {
"mode": "all",
"sort": "none"
},
"legend": {
"displayMode": "hidden"
},
"standardOptions": {},
"thresholds": {
"steps": [
{
"color": "#634CD9",
"value": null,
"type": "base"
}
]
}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"spanNulls": false,
"lineWidth": 1,
"fillOpacity": 0.5,
"gradientMode": "none",
"stack": "off",
"scaleDistribution": {
"type": "linear"
}
}
},
{
"type": "timeseries",
"id": "9e11e7f5-ed3c-49eb-8a72-ee76c8700c24",
"layout": {
"h": 7,
"w": 12,
"x": 0,
"y": 12,
"i": "9e11e7f5-ed3c-49eb-8a72-ee76c8700c24",
"isResizable": true
},
"version": "2.0.0",
"datasourceCate": "prometheus",
"targets": [
{
"refId": "A",
"expr": "ipmi_temperature_celsius{ident='$node'}",
"legend": "{{name}}"
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"name": "Temperatures",
"links": [],
"description": "",
"options": {
"tooltip": {
"mode": "multi"
},
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"standardOptions": {
"util": "none"
},
"thresholds": {
"steps": [
{
"color": "green",
"value": null,
"type": "base"
},
{
"color": "red",
"value": 80
}
]
}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "linear",
"spanNulls": false,
"lineWidth": 1,
"fillOpacity": 0.5,
"gradientMode": "none",
"stack": "off",
"scaleDistribution": {
"type": "linear"
}
}
},
{
"type": "timeseries",
"id": "95c734f7-26cb-41a7-8376-49332cc220c2",
"layout": {
"h": 7,
"w": 12,
"x": 12,
"y": 12,
"i": "95c734f7-26cb-41a7-8376-49332cc220c2",
"isResizable": true
},
"version": "2.0.0",
"datasourceCate": "prometheus",
"targets": [
{
"refId": "A",
"expr": "ipmi_power_watts{ident='$node'}",
"legend": "{{name}}"
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"name": "Power",
"links": [],
"description": "",
"options": {
"tooltip": {
"mode": "multi"
},
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"standardOptions": {
"util": "none"
},
"thresholds": {
"steps": [
{
"color": "green",
"value": null,
"type": "base"
},
{
"color": "red",
"value": 80
}
]
}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "linear",
"spanNulls": false,
"lineWidth": 1,
"fillOpacity": 0.01,
"gradientMode": "none",
"stack": "off",
"scaleDistribution": {
"type": "linear"
}
}
},
{
"type": "timeseries",
"id": "0313f34f-afcf-41e9-8f69-9a3dbd4b2e56",
"layout": {
"h": 7,
"w": 12,
"x": 0,
"y": 19,
"i": "0313f34f-afcf-41e9-8f69-9a3dbd4b2e56",
"isResizable": true
},
"version": "2.0.0",
"datasourceCate": "prometheus",
"targets": [
{
"refId": "A",
"expr": "ipmi_fan_speed_rpm{ident='$node'}",
"legend": "{{name}}"
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"name": "Fans",
"links": [],
"description": "",
"options": {
"tooltip": {
"mode": "multi"
},
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"standardOptions": {
"util": "none"
},
"thresholds": {
"steps": [
{
"color": "green",
"value": null,
"type": "base"
},
{
"color": "red",
"value": 80
}
]
}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "linear",
"spanNulls": false,
"lineWidth": 1,
"fillOpacity": 0.5,
"gradientMode": "none",
"stack": "off",
"scaleDistribution": {
"type": "linear"
}
}
},
{
"type": "timeseries",
"id": "29ee004d-a95c-405d-97d1-d715fab4e1de",
"layout": {
"h": 7,
"w": 12,
"x": 12,
"y": 19,
"i": "29ee004d-a95c-405d-97d1-d715fab4e1de",
"isResizable": true
},
"version": "2.0.0",
"datasourceCate": "prometheus",
"targets": [
{
"refId": "A",
"expr": "ipmi_voltage_volts{ident='$node',name!~\"Voltage 1|Voltage 2\"}",
"legend": "{{name}}"
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"name": "Voltages",
"links": [],
"description": "",
"options": {
"tooltip": {
"mode": "multi"
},
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"standardOptions": {
"util": "none"
},
"thresholds": {
"steps": [
{
"color": "green",
"value": null,
"type": "base"
},
{
"color": "red",
"value": 80
}
]
}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "linear",
"spanNulls": false,
"lineWidth": 1,
"fillOpacity": 0.5,
"gradientMode": "none",
"stack": "off",
"scaleDistribution": {
"type": "linear"
}
}
}
]
},
"uuid": 1727587308068775200
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1425,7 +1425,7 @@
"rule_config": {
"queries": [
{
"prom_ql": "kernel_vmstat_oom_kill != 0",
"prom_ql": "increase(kernel_vmstat_oom_kill[2m]) > 0",
"severity": 2
}
]
@@ -2139,4 +2139,4 @@
"update_by": "",
"uuid": 1717556327737117000
}
]
]

View File

@@ -1,13 +1,6 @@
{
"id": 0,
"group_id": 0,
"name": "机器台账表格视图",
"ident": "",
"tags": "",
"create_at": 0,
"create_by": "",
"update_at": 0,
"update_by": "",
"configs": {
"links": [
{
@@ -28,7 +21,7 @@
"colorRange": [
"thresholds"
],
"detailUrl": "/dashboards-built-in/detail?__built-in-cate=Linux\u0026__built-in-name=Linux%20Host%20by%20Categraf%20v2\u0026ident=${__field.labels.ident}",
"detailUrl": "/built-in-components/dashboard/detail?__uuid__=1717556327744505000&ident=${__field.labels.ident}",
"textMode": "valueAndName",
"valueField": "Value"
},
@@ -98,7 +91,7 @@
"colorRange": [
"thresholds"
],
"detailUrl": "/dashboards-built-in/detail?__built-in-cate=Linux\u0026__built-in-name=Linux%20Host%20by%20Categraf%20v2\u0026ident=${__field.labels.ident}",
"detailUrl": "/built-in-components/dashboard/detail?__uuid__=1717556327744505000&ident=${__field.labels.ident}",
"textMode": "valueAndName",
"valueField": "Value"
},
@@ -171,13 +164,16 @@
"linkMode": "appendLinkColumn",
"links": [
{
"targetBlank": true,
"title": "详情",
"url": "/dashboards-built-in/detail?__built-in-cate=Linux\u0026__built-in-name=Linux%20Host%20by%20Categraf%20v2\u0026ident=${__field.labels.ident}"
"url": "/built-in-components/dashboard/detail?__uuid__=1717556327744505000&ident=${__field.labels.ident}"
}
],
"nowrap": false,
"showHeader": true,
"sortColumn": "ident",
"sortOrder": "ascend"
"sortOrder": "ascend",
"tableLayout": "fixed"
},
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
@@ -385,10 +381,5 @@
],
"version": "3.0.0"
},
"public": 0,
"public_cate": 0,
"bgids": null,
"built_in": 0,
"hide": 0,
"uuid": 1717556327742611000
}

View File

@@ -259,11 +259,11 @@
"uuid": 1717556327796195000,
"collector": "Categraf",
"typ": "Linux",
"name": "OOM 次数统计",
"name": "1分钟内 OOM 次数统计",
"unit": "none",
"note": "取自 `/proc/vmstat`,需要较高版本的内核,没记错的话应该是 4.13 以上版本",
"lang": "zh_CN",
"expression": "kernel_vmstat_oom_kill",
"expression": "increase(kernel_vmstat_oom_kill[1m])",
"created_at": 0,
"created_by": "",
"updated_at": 0,
@@ -1334,4 +1334,4 @@
"updated_at": 0,
"updated_by": ""
}
]
]

150
memsto/config_cval_cache.go Normal file
View File

@@ -0,0 +1,150 @@
package memsto
import (
"encoding/json"
"log"
"sync"
"time"
"github.com/ccfos/nightingale/v6/dumper"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/pkg/errors"
"github.com/toolkits/pkg/logger"
)
type CvalCache struct {
statTotal int64
statLastUpdated int64
ctx *ctx.Context
stats *Stats
mu sync.RWMutex
cvals map[string]string
}
func NewCvalCache(ctx *ctx.Context, stats *Stats) *CvalCache {
cvalCache := &CvalCache{
statTotal: -1,
statLastUpdated: -1,
ctx: ctx,
stats: stats,
cvals: make(map[string]string),
}
cvalCache.initSyncConfigs()
return cvalCache
}
func (c *CvalCache) initSyncConfigs() {
err := c.syncConfigs()
if err != nil {
log.Fatalln("failed to sync configs:", err)
}
go c.loopSyncConfigs()
}
func (c *CvalCache) loopSyncConfigs() {
duration := time.Duration(9000) * time.Millisecond
for {
time.Sleep(duration)
if err := c.syncConfigs(); err != nil {
logger.Warning("failed to sync configs:", err)
}
}
}
func (c *CvalCache) syncConfigs() error {
start := time.Now()
stat, err := models.ConfigCvalStatistics(c.ctx)
if err != nil {
dumper.PutSyncRecord("cvals", start.Unix(), -1, -1, "failed to query statistics: "+err.Error())
return errors.WithMessage(err, "failed to call ConfigCvalStatistics")
}
if !c.statChanged(stat.Total, stat.LastUpdated) {
c.stats.GaugeCronDuration.WithLabelValues("sync_cvals").Set(0)
c.stats.GaugeSyncNumber.WithLabelValues("sync_cvals").Set(0)
dumper.PutSyncRecord("cvals", start.Unix(), -1, -1, "not changed")
return nil
}
cvals, err := models.ConfigsGetAll(c.ctx)
if err != nil {
dumper.PutSyncRecord("cvals", start.Unix(), -1, -1, "failed to query records: "+err.Error())
return errors.WithMessage(err, "failed to call ConfigsGet")
}
c.Set(cvals, stat.Total, stat.LastUpdated)
ms := time.Since(start).Milliseconds()
c.stats.GaugeCronDuration.WithLabelValues("sync_cvals").Set(float64(ms))
c.stats.GaugeSyncNumber.WithLabelValues("sync_cvals").Set(float64(len(c.cvals)))
logger.Infof("timer: sync cvals done, cost: %dms", ms)
dumper.PutSyncRecord("cvals", start.Unix(), ms, len(c.cvals), "success")
return nil
}
func (c *CvalCache) statChanged(total int64, updated int64) bool {
if c.statTotal == total && c.statLastUpdated == updated {
return false
}
return true
}
func (c *CvalCache) Set(cvals []*models.Configs, total int64, updated int64) {
c.mu.Lock()
defer c.mu.Unlock()
c.statTotal = total
c.statLastUpdated = updated
for _, cfg := range cvals {
c.cvals[cfg.Ckey] = cfg.Cval
}
}
func (c *CvalCache) Get(ckey string) string {
c.mu.RLock()
defer c.mu.RUnlock()
return c.cvals[ckey]
}
func (c *CvalCache) GetLastUpdateTime() int64 {
c.mu.RLock()
defer c.mu.RUnlock()
return c.statLastUpdated
}
type SiteInfo struct {
PrintBodyPaths []string `json:"print_body_paths"`
PrintAccessLog bool `json:"print_access_log"`
}
func (c *CvalCache) GetSiteInfo() *SiteInfo {
c.mu.RLock()
defer c.mu.RUnlock()
si := SiteInfo{}
if siteInfoStr := c.Get("site_info"); siteInfoStr != "" {
if err := json.Unmarshal([]byte(siteInfoStr), &si); err != nil {
logger.Errorf("Failed to unmarshal site info: %v", err)
}
}
return &si
}
func (c *CvalCache) PrintBodyPaths() map[string]struct{} {
printBodyPaths := c.GetSiteInfo().PrintBodyPaths
pbp := make(map[string]struct{}, len(printBodyPaths))
for _, p := range printBodyPaths {
pbp[p] = struct{}{}
}
return pbp
}
func (c *CvalCache) PrintAccessLog() bool {
return c.GetSiteInfo().PrintAccessLog
}

View File

@@ -160,8 +160,9 @@ func (tc *TargetCacheType) syncTargets() error {
}
m := make(map[string]*models.Target)
if tc.ctx.IsCenter {
metaMap := tc.GetHostMetas(lst)
metaMap := tc.GetHostMetas(lst)
if len(metaMap) > 0 {
for i := 0; i < len(lst); i++ {
if meta, ok := metaMap[lst[i].Ident]; ok {
lst[i].FillMeta(meta)

View File

@@ -67,6 +67,8 @@ type AlertCurEvent struct {
Claimant string `json:"claimant" gorm:"-"`
SubRuleId int64 `json:"sub_rule_id" gorm:"-"`
ExtraInfo []string `json:"extra_info" gorm:"-"`
Target *Target `json:"target" gorm:"-"`
RecoverConfig RecoverConfig `json:"recover_config" gorm:"-"`
}
func (e *AlertCurEvent) TableName() string {
@@ -341,7 +343,7 @@ func (e *AlertCurEvent) DB2Mem() {
continue
}
arr := strings.Split(pair, "=")
arr := strings.SplitN(pair, "=", 2)
if len(arr) != 2 {
continue
}
@@ -418,7 +420,8 @@ func (e *AlertCurEvent) FillNotifyGroups(ctx *ctx.Context, cache map[int64]*User
return nil
}
func AlertCurEventTotal(ctx *ctx.Context, prods []string, bgids []int64, stime, etime int64, severity int, dsIds []int64, cates []string, query string) (int64, error) {
func AlertCurEventTotal(ctx *ctx.Context, prods []string, bgids []int64, stime, etime int64,
severity int, dsIds []int64, cates []string, ruleId int64, query string) (int64, error) {
session := DB(ctx).Model(&AlertCurEvent{})
if stime != 0 && etime != 0 {
session = session.Where("trigger_time between ? and ?", stime, etime)
@@ -443,6 +446,10 @@ func AlertCurEventTotal(ctx *ctx.Context, prods []string, bgids []int64, stime,
session = session.Where("cate in ?", cates)
}
if ruleId > 0 {
session = session.Where("rule_id = ?", ruleId)
}
if query != "" {
arr := strings.Fields(query)
for i := 0; i < len(arr); i++ {
@@ -454,7 +461,9 @@ func AlertCurEventTotal(ctx *ctx.Context, prods []string, bgids []int64, stime,
return Count(session)
}
func AlertCurEventGets(ctx *ctx.Context, prods []string, bgids []int64, stime, etime int64, severity int, dsIds []int64, cates []string, query string, limit, offset int) ([]AlertCurEvent, error) {
func AlertCurEventsGet(ctx *ctx.Context, prods []string, bgids []int64, stime, etime int64,
severity int, dsIds []int64, cates []string, ruleId int64, query string, limit, offset int) (
[]AlertCurEvent, error) {
session := DB(ctx).Model(&AlertCurEvent{})
if stime != 0 && etime != 0 {
session = session.Where("trigger_time between ? and ?", stime, etime)
@@ -479,6 +488,10 @@ func AlertCurEventGets(ctx *ctx.Context, prods []string, bgids []int64, stime, e
session = session.Where("cate in ?", cates)
}
if ruleId > 0 {
session = session.Where("rule_id = ?", ruleId)
}
if query != "" {
arr := strings.Fields(query)
for i := 0; i < len(arr); i++ {
@@ -499,6 +512,26 @@ func AlertCurEventGets(ctx *ctx.Context, prods []string, bgids []int64, stime, e
return lst, err
}
func AlertCurEventCountByRuleId(ctx *ctx.Context, rids []int64, stime, etime int64) map[int64]int64 {
type Row struct {
RuleId int64
Cnt int64
}
var rows []Row
err := DB(ctx).Model(&AlertCurEvent{}).Select("rule_id, count(*) as cnt").
Where("trigger_time between ? and ?", stime, etime).Group("rule_id").Find(&rows).Error
if err != nil {
logger.Errorf("Failed to count group by rule_id: %v", err)
return nil
}
curEventTotalByRid := make(map[int64]int64, len(rids))
for _, r := range rows {
curEventTotalByRid[r.RuleId] = r.Cnt
}
return curEventTotalByRid
}
func AlertCurEventDel(ctx *ctx.Context, ids []int64) error {
if len(ids) == 0 {
return nil

View File

@@ -121,7 +121,9 @@ func (e *AlertHisEvent) FillNotifyGroups(ctx *ctx.Context, cache map[int64]*User
// }
func AlertHisEventTotal(ctx *ctx.Context, prods []string, bgids []int64, stime, etime int64, severity int, recovered int, dsIds []int64, cates []string, query string) (int64, error) {
func AlertHisEventTotal(
ctx *ctx.Context, prods []string, bgids []int64, stime, etime int64, severity int,
recovered int, dsIds []int64, cates []string, ruleId int64, query string) (int64, error) {
session := DB(ctx).Model(&AlertHisEvent{}).Where("last_eval_time between ? and ?", stime, etime)
if len(prods) > 0 {
@@ -148,6 +150,10 @@ func AlertHisEventTotal(ctx *ctx.Context, prods []string, bgids []int64, stime,
session = session.Where("cate in ?", cates)
}
if ruleId > 0 {
session = session.Where("rule_id = ?", ruleId)
}
if query != "" {
arr := strings.Fields(query)
for i := 0; i < len(arr); i++ {
@@ -159,7 +165,9 @@ func AlertHisEventTotal(ctx *ctx.Context, prods []string, bgids []int64, stime,
return Count(session)
}
func AlertHisEventGets(ctx *ctx.Context, prods []string, bgids []int64, stime, etime int64, severity int, recovered int, dsIds []int64, cates []string, query string, limit, offset int) ([]AlertHisEvent, error) {
func AlertHisEventGets(ctx *ctx.Context, prods []string, bgids []int64, stime, etime int64,
severity int, recovered int, dsIds []int64, cates []string, ruleId int64, query string,
limit, offset int) ([]AlertHisEvent, error) {
session := DB(ctx).Where("last_eval_time between ? and ?", stime, etime)
if len(prods) != 0 {
@@ -186,6 +194,10 @@ func AlertHisEventGets(ctx *ctx.Context, prods []string, bgids []int64, stime, e
session = session.Where("cate in ?", cates)
}
if ruleId > 0 {
session = session.Where("rule_id = ?", ruleId)
}
if query != "" {
arr := strings.Fields(query)
for i := 0; i < len(arr); i++ {

View File

@@ -108,7 +108,7 @@ func AlertMuteGet(ctx *ctx.Context, where string, args ...interface{}) (*AlertMu
return lst[0], err
}
func AlertMuteGets(ctx *ctx.Context, prods []string, bgid int64, query string) (lst []AlertMute, err error) {
func AlertMuteGets(ctx *ctx.Context, prods []string, bgid int64, disabled int, query string) (lst []AlertMute, err error) {
session := DB(ctx)
if bgid != -1 {
@@ -119,6 +119,14 @@ func AlertMuteGets(ctx *ctx.Context, prods []string, bgid int64, query string) (
session = session.Where("prod in (?)", prods)
}
if disabled != -1 {
if disabled == 0 {
session = session.Where("disabled = 0")
} else {
session = session.Where("disabled = 1")
}
}
if query != "" {
arr := strings.Fields(query)
for i := 0; i < len(arr); i++ {
@@ -287,16 +295,9 @@ func AlertMuteStatistics(ctx *ctx.Context) (*Statistics, error) {
return s, err
}
// clean expired first
buf := int64(30)
err := DB(ctx).Where("etime < ? and mute_time_type = 0", time.Now().Unix()-buf).Delete(new(AlertMute)).Error
if err != nil {
return nil, err
}
session := DB(ctx).Model(&AlertMute{}).Select("count(*) as total", "max(update_at) as last_updated")
err = session.Find(&stats).Error
err := session.Find(&stats).Error
if err != nil {
return nil, err
}
@@ -308,7 +309,7 @@ func AlertMuteGetsAll(ctx *ctx.Context) ([]*AlertMute, error) {
// get my cluster's mutes
var lst []*AlertMute
if !ctx.IsCenter {
lst, err := poster.GetByUrls[[]*AlertMute](ctx, "/v1/n9e/alert-mutes")
lst, err := poster.GetByUrls[[]*AlertMute](ctx, "/v1/n9e/alert-mutes?disabled=0")
if err != nil {
return nil, err
}
@@ -318,7 +319,7 @@ func AlertMuteGetsAll(ctx *ctx.Context) ([]*AlertMute, error) {
return lst, err
}
session := DB(ctx).Model(&AlertMute{})
session := DB(ctx).Model(&AlertMute{}).Where("disabled = 0")
err := session.Find(&lst).Error
if err != nil {

View File

@@ -98,6 +98,8 @@ type AlertRule struct {
UpdateAt int64 `json:"update_at"`
UpdateBy string `json:"update_by"`
UUID int64 `json:"uuid" gorm:"-"` // tpl identifier
CurEventCount int64 `json:"cur_event_count" gorm:"-"`
UpdateByNickname string `json:"update_by_nickname" gorm:"-"` // for fe
}
type Tpl struct {
@@ -126,6 +128,19 @@ type PromRuleConfig struct {
AlgoParams interface{} `json:"algo_params"`
}
type RecoverJudge int
const (
Origin RecoverJudge = 0
RecoverWithoutData RecoverJudge = 1
RecoverOnCondition RecoverJudge = 2
)
type RecoverConfig struct {
JudgeType RecoverJudge `json:"judge_type"`
RecoverExp string `json:"recover_exp"`
}
type HostRuleConfig struct {
Queries []HostQuery `json:"queries"`
Triggers []HostTrigger `json:"triggers"`
@@ -133,8 +148,9 @@ type HostRuleConfig struct {
}
type PromQuery struct {
PromQl string `json:"prom_ql"`
Severity int `json:"severity"`
PromQl string `json:"prom_ql"`
Severity int `json:"severity"`
RecoverConfig RecoverConfig `json:"recover_config"`
}
type HostTrigger struct {
@@ -161,10 +177,13 @@ type Trigger struct {
Duration int `json:"duration,omitempty"`
Percent int `json:"percent,omitempty"`
Joins []Join `json:"joins"`
JoinRef string `json:"join_ref"`
RecoverConfig RecoverConfig `json:"recover_config"`
}
type Join struct {
JoinType string `json:"join_type"`
Ref string `json:"ref"`
On []string `json:"on"`
}
@@ -176,9 +195,10 @@ func GetHostsQuery(queries []HostQuery) []map[string]interface{} {
case "group_ids":
ids := ParseInt64(q.Values)
if q.Op == "==" {
m["group_id in (?)"] = ids
m["target_busi_group.group_id in (?)"] = ids
} else {
m["group_id not in (?)"] = ids
m["target.ident not in (select target_ident "+
"from target_busi_group where group_id in (?))"] = ids
}
case "tags":
lst := []string{}
@@ -818,7 +838,8 @@ func AlertRuleGetsAll(ctx *ctx.Context) ([]*AlertRule, error) {
return lst, nil
}
func AlertRulesGetsBy(ctx *ctx.Context, prods []string, query, algorithm, cluster string, cates []string, disabled int) ([]*AlertRule, error) {
func AlertRulesGetsBy(ctx *ctx.Context, prods []string, query, algorithm, cluster string,
cates []string, disabled int) ([]*AlertRule, error) {
session := DB(ctx)
if len(prods) > 0 {

View File

@@ -286,3 +286,53 @@ func BoardSetHide(ctx *ctx.Context, ids []int64) error {
return nil
})
}
func BoardGetsByBids(ctx *ctx.Context, bids []int64) ([]map[string]interface{}, error) {
var boards []Board
err := DB(ctx).Where("id IN ?", bids).Find(&boards).Error
if err != nil {
return nil, err
}
// 收集所有唯一的 group_id
groupIDs := make([]int64, 0)
groupIDSet := make(map[int64]struct{})
for _, board := range boards {
if _, exists := groupIDSet[board.GroupId]; !exists {
groupIDs = append(groupIDs, board.GroupId)
groupIDSet[board.GroupId] = struct{}{}
}
}
// 一次性查询所有需要的 BusiGroup
var busiGroups []BusiGroup
err = DB(ctx).Where("id IN ?", groupIDs).Find(&busiGroups).Error
if err != nil {
return nil, err
}
// 创建 group_id 到 BusiGroup 的映射
groupMap := make(map[int64]BusiGroup)
for _, bg := range busiGroups {
groupMap[bg.Id] = bg
}
result := make([]map[string]interface{}, 0, len(boards))
for _, board := range boards {
busiGroup, exists := groupMap[board.GroupId]
if !exists {
// 处理找不到对应 BusiGroup 的情况
continue
}
item := map[string]interface{}{
"busi_group_name": busiGroup.Name,
"busi_group_id": busiGroup.Id,
"board_id": board.Id,
"board_name": board.Name,
}
result = append(result, item)
}
return result, nil
}

View File

@@ -11,7 +11,7 @@ import (
// BuiltinComponent represents a builtin component along with its metadata.
type BuiltinComponent struct {
ID uint64 `json:"id" gorm:"primaryKey;type:bigint;autoIncrement;comment:'unique identifier'"`
Ident string `json:"ident" gorm:"type:varchar(191);not null;index:idx_ident,sort:asc;comment:'identifier of component'"`
Ident string `json:"ident" gorm:"type:varchar(191);not null;uniqueIndex:idx_ident,sort:asc;comment:'identifier of component'"`
Logo string `json:"logo" gorm:"type:varchar(191);not null;comment:'logo of component'"`
Readme string `json:"readme" gorm:"type:text;not null;comment:'readme of component'"`
CreatedAt int64 `json:"created_at" gorm:"type:bigint;not null;default:0;comment:'create time'"`

View File

@@ -2,6 +2,7 @@ package models
import (
"errors"
"fmt"
"strings"
"time"
@@ -217,3 +218,10 @@ func BuiltinMetricCollectors(ctx *ctx.Context, lang, typ, query string) ([]strin
err := session.Select("distinct(collector)").Pluck("collector", &collectors).Error
return collectors, err
}
func BuiltinMetricBatchUpdateColumn(ctx *ctx.Context, col, old, new, updatedBy string) error {
if old == new {
return nil
}
return DB(ctx).Model(&BuiltinMetric{}).Where(fmt.Sprintf("%s = ?", col), old).Updates(map[string]interface{}{col: new, "updated_by": updatedBy}).Error
}

View File

@@ -9,18 +9,19 @@ import (
)
type BuiltinPayload struct {
ID int64 `json:"id" gorm:"primaryKey;type:bigint;autoIncrement;comment:'unique identifier'"`
Type string `json:"type" gorm:"type:varchar(191);not null;index:idx_type,sort:asc;comment:'type of payload'"` // Alert Dashboard Collet
Component string `json:"component" gorm:"type:varchar(191);not null;index:idx_component,sort:asc;comment:'component of payload'"` // Host MySQL Redis
Cate string `json:"cate" gorm:"type:varchar(191);not null;comment:'category of payload'"` // categraf_v1 telegraf_v1
Name string `json:"name" gorm:"type:varchar(191);not null;index:idx_buildinpayload_name,sort:asc;comment:'name of payload'"` //
Tags string `json:"tags" gorm:"type:varchar(191);not null;default:'';comment:'tags of payload'"` // {"host":"
Content string `json:"content" gorm:"type:longtext;not null;comment:'content of payload'"`
UUID int64 `json:"uuid" gorm:"type:bigint;not null;index:idx_uuid;comment:'uuid of payload'"`
CreatedAt int64 `json:"created_at" gorm:"type:bigint;not null;default:0;comment:'create time'"`
CreatedBy string `json:"created_by" gorm:"type:varchar(191);not null;default:'';comment:'creator'"`
UpdatedAt int64 `json:"updated_at" gorm:"type:bigint;not null;default:0;comment:'update time'"`
UpdatedBy string `json:"updated_by" gorm:"type:varchar(191);not null;default:'';comment:'updater'"`
ID int64 `json:"id" gorm:"primaryKey;type:bigint;autoIncrement;comment:'unique identifier'"`
Type string `json:"type" gorm:"type:varchar(191);not null;index:idx_type,sort:asc;comment:'type of payload'"` // Alert Dashboard Collet
Component string `json:"component" gorm:"type:varchar(191);not null;index:idx_component,sort:asc;comment:'component of payload'"` //
ComponentID uint64 `json:"component_id" gorm:"type:bigint;index:idx_component,sort:asc;comment:'component_id of payload'"` // ComponentID which the payload belongs to
Cate string `json:"cate" gorm:"type:varchar(191);not null;comment:'category of payload'"` // categraf_v1 telegraf_v1
Name string `json:"name" gorm:"type:varchar(191);not null;index:idx_buildinpayload_name,sort:asc;comment:'name of payload'"` //
Tags string `json:"tags" gorm:"type:varchar(191);not null;default:'';comment:'tags of payload'"` // {"host":"
Content string `json:"content" gorm:"type:longtext;not null;comment:'content of payload'"`
UUID int64 `json:"uuid" gorm:"type:bigint;not null;index:idx_uuid;comment:'uuid of payload'"`
CreatedAt int64 `json:"created_at" gorm:"type:bigint;not null;default:0;comment:'create time'"`
CreatedBy string `json:"created_by" gorm:"type:varchar(191);not null;default:'';comment:'creator'"`
UpdatedAt int64 `json:"updated_at" gorm:"type:bigint;not null;default:0;comment:'update time'"`
UpdatedBy string `json:"updated_by" gorm:"type:varchar(191);not null;default:'';comment:'updater'"`
}
func (bp *BuiltinPayload) TableName() string {
@@ -33,9 +34,8 @@ func (bp *BuiltinPayload) Verify() error {
return errors.New("type is blank")
}
bp.Component = strings.TrimSpace(bp.Component)
if bp.Component == "" {
return errors.New("component is blank")
if bp.ComponentID == 0 {
return errors.New("component_id is blank")
}
if bp.Name == "" {
@@ -47,7 +47,7 @@ func (bp *BuiltinPayload) Verify() error {
func BuiltinPayloadExists(ctx *ctx.Context, bp *BuiltinPayload) (bool, error) {
var count int64
err := DB(ctx).Model(bp).Where("type = ? AND component = ? AND name = ? AND cate = ?", bp.Type, bp.Component, bp.Name, bp.Cate).Count(&count).Error
err := DB(ctx).Model(bp).Where("type = ? AND component_id = ? AND name = ? AND cate = ?", bp.Type, bp.ComponentID, bp.Name, bp.Cate).Count(&count).Error
if err != nil {
return false, err
}
@@ -78,7 +78,7 @@ func (bp *BuiltinPayload) Update(ctx *ctx.Context, req BuiltinPayload) error {
return err
}
if bp.Type != req.Type || bp.Component != req.Component || bp.Name != req.Name {
if bp.Type != req.Type || bp.ComponentID != req.ComponentID || bp.Name != req.Name {
exists, err := BuiltinPayloadExists(ctx, &req)
if err != nil {
return err
@@ -117,13 +117,13 @@ func BuiltinPayloadGet(ctx *ctx.Context, where string, args ...interface{}) (*Bu
return &bp, nil
}
func BuiltinPayloadGets(ctx *ctx.Context, typ, component, cate, query string) ([]*BuiltinPayload, error) {
func BuiltinPayloadGets(ctx *ctx.Context, componentId uint64, typ, cate, query string) ([]*BuiltinPayload, error) {
session := DB(ctx)
if typ != "" {
session = session.Where("type = ?", typ)
}
if component != "" {
session = session.Where("component = ?", component)
if componentId != 0 {
session = session.Where("component_id = ?", componentId)
}
if cate != "" {
@@ -144,9 +144,9 @@ func BuiltinPayloadGets(ctx *ctx.Context, typ, component, cate, query string) ([
}
// get cates of BuiltinPayload by type and component, return []string
func BuiltinPayloadCates(ctx *ctx.Context, typ, component string) ([]string, error) {
func BuiltinPayloadCates(ctx *ctx.Context, typ string, componentID uint64) ([]string, error) {
var cates []string
err := DB(ctx).Model(new(BuiltinPayload)).Where("type = ? and component = ?", typ, component).Distinct("cate").Pluck("cate", &cates).Error
err := DB(ctx).Model(new(BuiltinPayload)).Where("type = ? and component_id = ?", typ, componentID).Distinct("cate").Pluck("cate", &cates).Error
return cates, err
}
@@ -163,3 +163,37 @@ func BuiltinPayloadComponents(ctx *ctx.Context, typ, cate string) (string, error
}
return components[0], nil
}
// InitBuiltinPayloads 兼容新旧 BuiltinPayload 格式
func InitBuiltinPayloads(ctx *ctx.Context) error {
var lst []*BuiltinPayload
components, err := BuiltinComponentGets(ctx, "")
if err != nil {
return err
}
identToId := make(map[string]uint64)
for _, component := range components {
identToId[component.Ident] = component.ID
}
err = DB(ctx).Where("component_id = 0 or component_id is NULL").Find(&lst).Error
if err != nil {
return err
}
for _, bp := range lst {
componentId, ok := identToId[bp.Component]
if !ok {
continue
}
bp.ComponentID = componentId
}
if len(lst) == 0 {
return nil
}
return DB(ctx).Save(&lst).Error
}

View File

@@ -134,7 +134,7 @@ func (bg *BusiGroup) Del(ctx *ctx.Context) error {
return errors.New("Some alert subscribes still in the BusiGroup")
}
has, err = Exists(DB(ctx).Model(&Target{}).Where("group_id=?", bg.Id))
has, err = Exists(DB(ctx).Model(&TargetBusiGroup{}).Where("group_id=?", bg.Id))
if err != nil {
return err
}

View File

@@ -106,10 +106,8 @@ func InitRSAPassWord(ctx *ctx.Context) (string, error) {
func ConfigsGet(ctx *ctx.Context, ckey string) (string, error) { //select built-in type configs
if !ctx.IsCenter {
if !ctx.IsCenter {
s, err := poster.GetByUrls[string](ctx, "/v1/n9e/config?key="+ckey)
return s, err
}
s, err := poster.GetByUrls[string](ctx, "/v1/n9e/config?key="+ckey)
return s, err
}
var lst []string
@@ -125,6 +123,22 @@ func ConfigsGet(ctx *ctx.Context, ckey string) (string, error) { //select built-
return "", nil
}
func ConfigsGetAll(ctx *ctx.Context) ([]*Configs, error) { // select built-in type configs
if !ctx.IsCenter {
lst, err := poster.GetByUrls[[]*Configs](ctx, "/v1/n9e/all-configs")
return lst, err
}
var lst []*Configs
err := DB(ctx).Model(&Configs{}).Select("ckey, cval").
Where("ckey!='' and external=? ", 0).Find(&lst).Error
if err != nil {
return nil, errors.WithMessage(err, "failed to query configs")
}
return lst, nil
}
func ConfigsSet(ctx *ctx.Context, ckey, cval string) error {
return ConfigsSetWithUname(ctx, ckey, cval, "default")
}
@@ -355,3 +369,19 @@ func ConfigUserVariableGetDecryptMap(context *ctx.Context, privateKey []byte, pa
return ret, nil
}
func ConfigCvalStatistics(context *ctx.Context) (*Statistics, error) {
if !context.IsCenter {
return poster.GetByUrls[*Statistics](context, "/v1/n9e/statistic?name=cval")
}
session := DB(context).Model(&Configs{}).Select("count(*) as total",
"max(update_at) as last_updated").Where("ckey!='' and external=? ", 0) // built-in config
var stats []*Statistics
err := session.Find(&stats).Error
if err != nil {
return nil, err
}
return stats[0], nil
}

View File

@@ -35,6 +35,7 @@ type Datasource struct {
UpdatedBy string `json:"updated_by"`
IsDefault bool `json:"is_default"`
Transport *http.Transport `json:"-" gorm:"-"`
ForceSave bool `json:"force_save" gorm:"-"`
}
type Auth struct {

View File

@@ -1,69 +0,0 @@
package models
import (
"encoding/json"
"fmt"
"github.com/ccfos/nightingale/v6/ibex/server/config"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/poster"
"gorm.io/gorm"
)
func IbexCount(tx *gorm.DB) (int64, error) {
var cnt int64
err := tx.Count(&cnt).Error
return cnt, err
}
func tht(id int64) string {
return fmt.Sprintf("task_host_%d", id%100)
}
func TableRecordGets[T any](ctx *ctx.Context, table, where string, args ...interface{}) (lst T, err error) {
if config.C.IsCenter {
if where == "" || len(args) == 0 {
err = DB(ctx).Table(table).Find(&lst).Error
} else {
err = DB(ctx).Table(table).Where(where, args...).Find(&lst).Error
}
return
}
return poster.PostByUrlsWithResp[T](ctx, "/ibex/v1/table/record/list", map[string]interface{}{
"table": table,
"where": where,
"args": args,
})
}
func TableRecordCount(ctx *ctx.Context, table, where string, args ...interface{}) (int64, error) {
if config.C.IsCenter {
if where == "" || len(args) == 0 {
return IbexCount(DB(ctx).Table(table))
}
return IbexCount(DB(ctx).Table(table).Where(where, args...))
}
return poster.PostByUrlsWithResp[int64](ctx, "/ibex/v1/table/record/count", map[string]interface{}{
"table": table,
"where": where,
"args": args,
})
}
var IBEX_HOST_DOING = "ibex-host-doing"
func CacheRecordGets[T any](ctx *ctx.Context) ([]T, error) {
lst := make([]T, 0)
values, _ := ctx.Redis.HVals(ctx.Ctx, IBEX_HOST_DOING).Result()
for _, val := range values {
t := new(T)
if err := json.Unmarshal([]byte(val), t); err != nil {
return nil, err
}
lst = append(lst, *t)
}
return lst, nil
}

View File

@@ -1,112 +0,0 @@
package models
import (
"fmt"
"time"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"gorm.io/gorm"
)
type TaskAction struct {
Id int64 `gorm:"column:id;primaryKey"`
Action string `gorm:"column:action;size:32;not null"`
Clock int64 `gorm:"column:clock;not null;default:0"`
}
func (TaskAction) TableName() string {
return "task_action"
}
func TaskActionGet(ctx *ctx.Context, where string, args ...interface{}) (*TaskAction, error) {
var obj TaskAction
ret := DB(ctx).Where(where, args...).Find(&obj)
if ret.Error != nil {
return nil, ret.Error
}
if ret.RowsAffected == 0 {
return nil, nil
}
return &obj, nil
}
func TaskActionExistsIds(ctx *ctx.Context, ids []int64) ([]int64, error) {
if len(ids) == 0 {
return ids, nil
}
var ret []int64
err := DB(ctx).Model(&TaskAction{}).Where("id in ?", ids).Pluck("id", &ret).Error
return ret, err
}
func CancelWaitingHosts(ctx *ctx.Context, id int64) error {
return DB(ctx).Table(tht(id)).Where("id = ? and status = ?", id, "waiting").Update("status", "cancelled").Error
}
func StartTask(ctx *ctx.Context, id int64) error {
return DB(ctx).Model(&TaskScheduler{}).Where("id = ?", id).Update("scheduler", "").Error
}
func CancelTask(ctx *ctx.Context, id int64) error {
return CancelWaitingHosts(ctx, id)
}
func KillTask(ctx *ctx.Context, id int64) error {
if err := CancelWaitingHosts(ctx, id); err != nil {
return err
}
now := time.Now().Unix()
return DB(ctx).Transaction(func(tx *gorm.DB) error {
err := tx.Model(&TaskHostDoing{}).Where("id = ? and action <> ?", id, "kill").Updates(map[string]interface{}{
"clock": now,
"action": "kill",
}).Error
if err != nil {
return err
}
return tx.Table(tht(id)).Where("id = ? and status = ?", id, "running").Update("status", "killing").Error
})
}
func (a *TaskAction) Update(ctx *ctx.Context, action string) error {
if !(action == "start" || action == "cancel" || action == "kill" || action == "pause") {
return fmt.Errorf("action invalid")
}
err := DB(ctx).Model(a).Updates(map[string]interface{}{
"action": action,
"clock": time.Now().Unix(),
}).Error
if err != nil {
return err
}
if action == "start" {
return StartTask(ctx, a.Id)
}
if action == "cancel" {
return CancelTask(ctx, a.Id)
}
if action == "kill" {
return KillTask(ctx, a.Id)
}
return nil
}
// LongTaskIds two weeks ago
func LongTaskIds(ctx *ctx.Context) ([]int64, error) {
clock := time.Now().Unix() - 604800*2
var ids []int64
err := DB(ctx).Model(&TaskAction{}).Where("clock < ?", clock).Pluck("id", &ids).Error
return ids, err
}

View File

@@ -1,262 +0,0 @@
package models
import (
"fmt"
"sync"
"time"
"github.com/ccfos/nightingale/v6/ibex/server/config"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/poster"
"github.com/ccfos/nightingale/v6/storage"
"github.com/toolkits/pkg/logger"
"gorm.io/gorm"
"gorm.io/gorm/clause"
)
type TaskHost struct {
II int64 `gorm:"column:ii;primaryKey;autoIncrement" json:"-"`
Id int64 `gorm:"column:id;uniqueIndex:idx_id_host;not null" json:"id"`
Host string `gorm:"column:host;uniqueIndex:idx_id_host;size:128;not null" json:"host"`
Status string `gorm:"column:status;size:32;not null" json:"status"`
Stdout string `gorm:"column:stdout;type:text" json:"stdout"`
Stderr string `gorm:"column:stderr;type:text" json:"stderr"`
}
func (taskHost *TaskHost) Upsert(ctx *ctx.Context) error {
return DB(ctx).Table(tht(taskHost.Id)).Clauses(clause.OnConflict{
Columns: []clause.Column{{Name: "id"}, {Name: "host"}},
DoUpdates: clause.AssignmentColumns([]string{"status", "stdout", "stderr"}),
}).Create(taskHost).Error
}
func (taskHost *TaskHost) Create(ctx *ctx.Context) error {
if config.C.IsCenter {
return DB(ctx).Table(tht(taskHost.Id)).Create(taskHost).Error
}
return poster.PostByUrls(ctx, "/ibex/v1/task/host", taskHost)
}
func TaskHostUpserts(ctx *ctx.Context, lst []TaskHost) (map[string]error, error) {
if len(lst) == 0 {
return nil, fmt.Errorf("empty list")
}
if !config.C.IsCenter {
return poster.PostByUrlsWithResp[map[string]error](ctx, "/ibex/v1/task/hosts/upsert", lst)
}
errs := make(map[string]error, 0)
for _, taskHost := range lst {
if err := taskHost.Upsert(ctx); err != nil {
errs[fmt.Sprintf("%d:%s", taskHost.Id, taskHost.Host)] = err
}
}
return errs, nil
}
func TaskHostGet(ctx *ctx.Context, id int64, host string) (*TaskHost, error) {
var ret []*TaskHost
err := DB(ctx).Table(tht(id)).Where("id=? and host=?", id, host).Find(&ret).Error
if err != nil {
return nil, err
}
if len(ret) == 0 {
return nil, nil
}
return ret[0], nil
}
func MarkDoneStatus(ctx *ctx.Context, id, clock int64, host, status, stdout, stderr string, edgeAlertTriggered ...bool) error {
if len(edgeAlertTriggered) > 0 && edgeAlertTriggered[0] {
return CacheMarkDone(ctx, TaskHost{
Id: id,
Host: host,
Status: status,
Stdout: stdout,
Stderr: stderr,
})
}
if !config.C.IsCenter {
return poster.PostByUrls(ctx, "/ibex/v1/mark/done", map[string]interface{}{
"id": id,
"clock": clock,
"host": host,
"status": status,
"stdout": stdout,
"stderr": stderr,
})
}
count, err := TableRecordCount(ctx, TaskHostDoing{}.TableName(), "id=? and host=? and clock=?", id, host, clock)
if err != nil {
return err
}
if count == 0 {
// 如果是timeout了后来任务执行完成之后结果又上来了stdout和stderr最好还是存库让用户看到
count, err = TableRecordCount(ctx, tht(id), "id=? and host=? and status=?", id, host, "timeout")
if err != nil {
return err
}
if count == 1 {
return DB(ctx).Table(tht(id)).Where("id=? and host=?", id, host).Updates(map[string]interface{}{
"status": status,
"stdout": stdout,
"stderr": stderr,
}).Error
}
return nil
}
return DB(ctx).Transaction(func(tx *gorm.DB) error {
err = tx.Table(tht(id)).Where("id=? and host=?", id, host).Updates(map[string]interface{}{
"status": status,
"stdout": stdout,
"stderr": stderr,
}).Error
if err != nil {
return err
}
if err = tx.Where("id=? and host=?", id, host).Delete(&TaskHostDoing{}).Error; err != nil {
return err
}
return nil
})
}
func RealTimeUpdateOutput(ctx *ctx.Context, id int64, host, stdout, stderr string) error {
return DB(ctx).Transaction(func(tx *gorm.DB) error {
err := tx.Table(tht(id)).Where("id=? and host=?", id, host).Updates(map[string]interface{}{
"stdout": stdout,
"stderr": stderr,
}).Error
if err != nil {
return err
}
return nil
})
}
func CacheMarkDone(ctx *ctx.Context, taskHost TaskHost) error {
if err := ctx.Redis.HDel(ctx.Ctx, IBEX_HOST_DOING, hostDoingCacheKey(taskHost.Id, taskHost.Host)).Err(); err != nil {
return err
}
TaskHostCachePush(taskHost)
return nil
}
func WaitingHostList(ctx *ctx.Context, id int64, limit ...int) ([]TaskHost, error) {
var hosts []TaskHost
session := DB(ctx).Table(tht(id)).Where("id = ? and status = 'waiting'", id).Order("ii")
if len(limit) > 0 {
session = session.Limit(limit[0])
}
err := session.Find(&hosts).Error
return hosts, err
}
func WaitingHostCount(ctx *ctx.Context, id int64) (int64, error) {
return TableRecordCount(ctx, tht(id), "id=? and status='waiting'", id)
}
func UnexpectedHostCount(ctx *ctx.Context, id int64) (int64, error) {
return TableRecordCount(ctx, tht(id), "id=? and status in ('failed', 'timeout', 'killfailed')", id)
}
func IngStatusHostCount(ctx *ctx.Context, id int64) (int64, error) {
return TableRecordCount(ctx, tht(id), "id=? and status in ('waiting', 'running', 'killing')", id)
}
func RunWaitingHosts(ctx *ctx.Context, taskHosts []TaskHost) error {
count := len(taskHosts)
if count == 0 {
return nil
}
now := time.Now().Unix()
return DB(ctx).Transaction(func(tx *gorm.DB) error {
for i := 0; i < count; i++ {
if err := tx.Table(tht(taskHosts[i].Id)).Where("id=? and host=?", taskHosts[i].Id, taskHosts[i].Host).Update("status", "running").Error; err != nil {
return err
}
err := tx.Create(&TaskHostDoing{Id: taskHosts[i].Id, Host: taskHosts[i].Host, Clock: now, Action: "start"}).Error
if err != nil {
return err
}
}
return nil
})
}
func TaskHostStatus(ctx *ctx.Context, id int64) ([]TaskHost, error) {
var ret []TaskHost
err := DB(ctx).Table(tht(id)).Select("id", "host", "status").Where("id=?", id).Order("ii").Find(&ret).Error
return ret, err
}
func TaskHostGets(ctx *ctx.Context, id int64) ([]TaskHost, error) {
var ret []TaskHost
err := DB(ctx).Table(tht(id)).Where("id=?", id).Order("ii").Find(&ret).Error
return ret, err
}
var (
taskHostCache = make([]TaskHost, 0, 128)
taskHostLock sync.RWMutex
)
func TaskHostCachePush(taskHost TaskHost) {
taskHostLock.Lock()
defer taskHostLock.Unlock()
taskHostCache = append(taskHostCache, taskHost)
}
func TaskHostCachePopAll() []TaskHost {
taskHostLock.Lock()
defer taskHostLock.Unlock()
all := taskHostCache
taskHostCache = make([]TaskHost, 0, 128)
return all
}
func ReportCacheResult(ctx *ctx.Context) error {
result := TaskHostCachePopAll()
reports := make([]TaskHost, 0)
for _, th := range result {
// id大于redis初始id说明是edge与center失联时本地告警规则触发的自愈脚本生成的id
// 为了防止不同边缘机房生成的脚本任务id相同不上报结果至数据库
if th.Id >= storage.IDINITIAL {
logger.Infof("task[%d] host[%s] done, result:[%v]", th.Id, th.Host, th)
} else {
reports = append(reports, th)
}
}
if len(reports) == 0 {
return nil
}
errs, err := TaskHostUpserts(ctx, reports)
if err != nil {
return err
}
for key, err := range errs {
logger.Warningf("report task_host_cache[%s] result error: %v", key, err)
}
return nil
}

Some files were not shown because too many files have changed in this diff Show More