Compare commits

..

1 Commits

Author SHA1 Message Date
Xu Bin
0292779711 refactor: embedded ibex (#2136) 2024-09-03 15:32:35 +08:00
104 changed files with 4482 additions and 5158 deletions

View File

@@ -88,13 +88,12 @@
- 报告Bug优先推荐提交[夜莺GitHub Issue](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Fbug&projects=&template=bug_report.yml)
- 推荐完整浏览[夜莺文档站点](https://flashcat.cloud/docs/content/flashcat-monitor/nightingale-v7/introduction/),了解更多信息
- 推荐搜索关注夜莺公众号,第一时间获取社区动态:`夜莺监控Nightingale`
- 日常问题交流
- QQ群730841964
- [加入微信群](https://download.flashcat.cloud/ulric/20241008153952.png),如果二维码过期了,可以联系我(我的微信:`picobyte`)拉群,备注: `夜莺互助群`
- 日常问题交流推荐加入[知识星球](https://download.flashcat.cloud/ulric/20240319095409.png),也可以加我微信 `picobyte`,备注:`夜莺加群-<公司>-<姓名>` 拉入微信群,不过研发人员主要是关注 github issue 和星球,微信群关注较少
## 广受关注
[![Stargazers over time](https://api.star-history.com/svg?repos=ccfos/nightingale&type=Date)](https://star-history.com/#ccfos/nightingale&Date)
## 社区共建
- ❇️ 请阅读浏览[夜莺开源项目和社区治理架构草案](./doc/community-governance.md),真诚欢迎每一位用户、开发者、公司以及组织,使用夜莺监控、积极反馈 Bug、提交功能需求、分享最佳实践共建专业、活跃的夜莺开源社区。
- ❤️ 夜莺贡献者

View File

@@ -60,6 +60,10 @@ func (a *Alert) PreCheck(configDir string) {
a.Heartbeat.Interval = 1000
}
if a.Heartbeat.EngineName == "" {
a.Heartbeat.EngineName = "default"
}
if a.EngineDelay == 0 {
a.EngineDelay = 30
}

View File

@@ -16,6 +16,7 @@ import (
"github.com/ccfos/nightingale/v6/alert/sender"
"github.com/ccfos/nightingale/v6/conf"
"github.com/ccfos/nightingale/v6/dumper"
"github.com/ccfos/nightingale/v6/ibex"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
@@ -26,8 +27,6 @@ import (
"github.com/ccfos/nightingale/v6/pushgw/writer"
"github.com/ccfos/nightingale/v6/storage"
"github.com/ccfos/nightingale/v6/tdengine"
"github.com/flashcatcloud/ibex/src/cmd/ibex"
)
func Initialize(configDir string, cryptoKey string) (func(), error) {
@@ -41,14 +40,14 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
return nil, err
}
ctx := ctx.NewContext(context.Background(), nil, false, config.CenterApi)
var redis storage.Redis
redis, err = storage.NewRedis(config.Redis)
if err != nil {
return nil, err
}
ctx := ctx.NewContext(context.Background(), nil, redis, false, config.CenterApi)
syncStats := memsto.NewSyncStats()
alertStats := astats.NewSyncStats()
@@ -74,13 +73,13 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
rt := router.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors)
if config.Ibex.Enable {
ibex.ServerStart(false, nil, redis, config.HTTP.APIForService.BasicAuth, config.Alert.Heartbeat, &config.CenterApi, r, nil, config.Ibex, config.HTTP.Port)
ibex.ServerStart(ctx, false, nil, redis, config.HTTP.APIForService.BasicAuth, config.Alert.Heartbeat, &config.CenterApi, r, nil, config.Ibex, config.HTTP.Port)
}
rt.Config(r)
dumper.ConfigRouter(r)
httpClean := httpx.Init(config.HTTP, r)
httpClean := httpx.Init(config.HTTP, context.Background(), r)
return func() {
logxClean()

View File

@@ -286,7 +286,6 @@ func (e *Dispatch) SendCallbacks(rule *models.AlertRule, notifyTarget *NotifyTar
uids := notifyTarget.ToUidList()
urls := notifyTarget.ToCallbackList()
whMap := notifyTarget.ToWebhookMap()
for _, urlStr := range urls {
if len(urlStr) == 0 {
continue
@@ -294,11 +293,6 @@ func (e *Dispatch) SendCallbacks(rule *models.AlertRule, notifyTarget *NotifyTar
cbCtx := sender.BuildCallBackContext(e.ctx, urlStr, rule, []*models.AlertCurEvent{event}, uids, e.userCache, e.alerting.WebhookBatchSend, e.Astats)
if wh, ok := whMap[cbCtx.CallBackURL]; ok && wh.Enable {
logger.Debugf("SendCallbacks: webhook[%s] is in global conf.", cbCtx.CallBackURL)
continue
}
if strings.HasPrefix(urlStr, "${ibex}") {
e.CallBacks[models.IbexDomain].CallBack(cbCtx)
continue

View File

@@ -100,32 +100,8 @@ func (s *NotifyTarget) ToWebhookList() []*models.Webhook {
return webhooks
}
func (s *NotifyTarget) ToWebhookMap() map[string]*models.Webhook {
webhookMap := make(map[string]*models.Webhook, len(s.webhooks))
for _, wh := range s.webhooks {
if wh.Batch == 0 {
wh.Batch = 1000
}
if wh.Timeout == 0 {
wh.Timeout = 10
}
if wh.RetryCount == 0 {
wh.RetryCount = 10
}
if wh.RetryInterval == 0 {
wh.RetryInterval = 10
}
webhookMap[wh.Url] = wh
}
return webhookMap
}
func (s *NotifyTarget) ToUidList() []int64 {
uids := make([]int64, 0, len(s.userMap))
uids := make([]int64, len(s.userMap))
for uid, _ := range s.userMap {
uids = append(uids, uid)
}

View File

@@ -5,7 +5,6 @@ import (
"encoding/json"
"fmt"
"math"
"reflect"
"sort"
"strings"
"time"
@@ -268,7 +267,7 @@ func (arw *AlertRuleWorker) GetTdengineAnomalyPoint(rule *models.AlertRule, dsId
if len(ruleQuery.Queries) > 0 {
seriesStore := make(map[uint64]models.DataResp)
// 将不同查询的 hash 索引分组存放
seriesTagIndexes := make(map[string]map[uint64][]uint64)
seriesTagIndexes := make([]map[uint64][]uint64, 0)
for _, query := range ruleQuery.Queries {
seriesTagIndex := make(map[uint64][]uint64)
@@ -293,13 +292,7 @@ func (arw *AlertRuleWorker) GetTdengineAnomalyPoint(rule *models.AlertRule, dsId
// 此条日志很重要,是告警判断的现场值
logger.Debugf("rule_eval rid:%d req:%+v resp:%+v", rule.Id, query, series)
MakeSeriesMap(series, seriesTagIndex, seriesStore)
ref, err := GetQueryRef(query)
if err != nil {
logger.Warningf("rule_eval rid:%d query ref error: %v query:%+v", rule.Id, err, query)
arw.processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.processor.DatasourceId()), GET_RULE_CONFIG).Inc()
continue
}
seriesTagIndexes[ref] = seriesTagIndex
seriesTagIndexes = append(seriesTagIndexes, seriesTagIndex)
}
points, recoverPoints = GetAnomalyPoint(rule.Id, ruleQuery, seriesTagIndexes, seriesStore)
@@ -376,6 +369,11 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) []common.Anom
}
m["ident"] = target.Ident
bg := arw.processor.BusiGroupCache.GetByBusiGroupId(target.GroupId)
if bg != nil && bg.LabelEnable == 1 {
m["busigroup"] = bg.LabelValue
}
lst = append(lst, common.NewAnomalyPoint(trigger.Type, m, now, float64(now-target.UpdateAt), trigger.Severity))
}
case "offset":
@@ -424,6 +422,11 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) []common.Anom
}
m["ident"] = host
bg := arw.processor.BusiGroupCache.GetByBusiGroupId(target.GroupId)
if bg != nil && bg.LabelEnable == 1 {
m["busigroup"] = bg.LabelValue
}
lst = append(lst, common.NewAnomalyPoint(trigger.Type, m, now, float64(offset), trigger.Severity))
}
case "pct_target_miss":
@@ -452,7 +455,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) []common.Anom
return lst
}
func GetAnomalyPoint(ruleId int64, ruleQuery models.RuleQuery, seriesTagIndexes map[string]map[uint64][]uint64, seriesStore map[uint64]models.DataResp) ([]common.AnomalyPoint, []common.AnomalyPoint) {
func GetAnomalyPoint(ruleId int64, ruleQuery models.RuleQuery, seriesTagIndexes []map[uint64][]uint64, seriesStore map[uint64]models.DataResp) ([]common.AnomalyPoint, []common.AnomalyPoint) {
points := []common.AnomalyPoint{}
recoverPoints := []common.AnomalyPoint{}
@@ -460,13 +463,61 @@ func GetAnomalyPoint(ruleId int64, ruleQuery models.RuleQuery, seriesTagIndexes
return points, recoverPoints
}
if len(seriesTagIndexes) == 0 {
return points, recoverPoints
}
for _, trigger := range ruleQuery.Triggers {
// seriesTagIndex 的 key 仅做分组使用value 为每组 series 的 hash
seriesTagIndex := ProcessJoins(ruleId, trigger, seriesTagIndexes, seriesStore)
seriesTagIndex := make(map[uint64][]uint64)
if len(trigger.Joins) == 0 {
// 没有 join 条件,走原逻辑
last := seriesTagIndexes[0]
for i := 1; i < len(seriesTagIndexes); i++ {
last = originalJoin(last, seriesTagIndexes[i])
}
seriesTagIndex = last
} else {
// 有 join 条件,按条件依次合并
if len(seriesTagIndexes) != len(trigger.Joins)+1 {
logger.Errorf("rule_eval rid:%d queries' count: %d not match join condition's count: %d", ruleId, len(seriesTagIndexes), len(trigger.Joins))
continue
}
last := seriesTagIndexes[0]
lastRehashed := rehashSet(last, seriesStore, trigger.Joins[0].On)
for i := range trigger.Joins {
cur := seriesTagIndexes[i+1]
switch trigger.Joins[i].JoinType {
case "original":
last = originalJoin(last, cur)
case "none":
last = noneJoin(last, cur)
case "cartesian":
last = cartesianJoin(last, cur)
case "inner_join":
curRehashed := rehashSet(cur, seriesStore, trigger.Joins[i].On)
lastRehashed = onJoin(lastRehashed, curRehashed, Inner)
last = flatten(lastRehashed)
case "left_join":
curRehashed := rehashSet(cur, seriesStore, trigger.Joins[i].On)
lastRehashed = onJoin(lastRehashed, curRehashed, Left)
last = flatten(lastRehashed)
case "right_join":
curRehashed := rehashSet(cur, seriesStore, trigger.Joins[i].On)
lastRehashed = onJoin(curRehashed, lastRehashed, Right)
last = flatten(lastRehashed)
case "left_exclude":
curRehashed := rehashSet(cur, seriesStore, trigger.Joins[i].On)
lastRehashed = exclude(lastRehashed, curRehashed)
last = flatten(lastRehashed)
case "right_exclude":
curRehashed := rehashSet(cur, seriesStore, trigger.Joins[i].On)
lastRehashed = exclude(curRehashed, lastRehashed)
last = flatten(lastRehashed)
default:
logger.Warningf("rule_eval rid:%d join type:%s not support", ruleId, trigger.Joins[i].JoinType)
}
}
seriesTagIndex = last
}
for _, seriesHash := range seriesTagIndex {
sort.Slice(seriesHash, func(i, j int) bool {
@@ -562,7 +613,7 @@ func flatten(rehashed map[uint64][][]uint64) map[uint64][]uint64 {
// [[A3{data_base=2, table=board}B2{data_base=2, table=alert}][A4{data_base=2, table=alert}B2{data_base=2, table=alert}]]
func onJoin(reHashTagIndex1 map[uint64][][]uint64, reHashTagIndex2 map[uint64][][]uint64, joinType JoinType) map[uint64][][]uint64 {
reHashTagIndex := make(map[uint64][][]uint64)
for rehash := range reHashTagIndex1 {
for rehash, _ := range reHashTagIndex1 {
if _, ok := reHashTagIndex2[rehash]; ok {
// 若有 rehash 相同的记录,两两合并
for i1 := range reHashTagIndex1[rehash] {
@@ -605,7 +656,6 @@ func rehashSet(seriesTagIndex1 map[uint64][]uint64, seriesStore map[uint64]model
if !exists {
continue
}
rehash := hash.GetTargetTagHash(series.Metric, on)
if _, ok := reHashTagIndex[rehash]; !ok {
reHashTagIndex[rehash] = make([][]uint64, 0)
@@ -697,100 +747,3 @@ func mergeNewArray(arg ...[]uint64) []uint64 {
}
return res
}
func ProcessJoins(ruleId int64, trigger models.Trigger, seriesTagIndexes map[string]map[uint64][]uint64, seriesStore map[uint64]models.DataResp) map[uint64][]uint64 {
last := make(map[uint64][]uint64)
if len(seriesTagIndexes) == 0 {
return last
}
if len(trigger.Joins) == 0 {
idx := 0
for _, seriesTagIndex := range seriesTagIndexes {
if idx == 0 {
last = seriesTagIndex
} else {
last = originalJoin(last, seriesTagIndex)
}
idx++
}
return last
}
// 有 join 条件,按条件依次合并
if len(seriesTagIndexes) < len(trigger.Joins)+1 {
logger.Errorf("rule_eval rid:%d queries' count: %d not match join condition's count: %d", ruleId, len(seriesTagIndexes), len(trigger.Joins))
return nil
}
last = seriesTagIndexes[trigger.JoinRef]
lastRehashed := rehashSet(last, seriesStore, trigger.Joins[0].On)
for i := range trigger.Joins {
cur := seriesTagIndexes[trigger.Joins[i].Ref]
switch trigger.Joins[i].JoinType {
case "original":
last = originalJoin(last, cur)
case "none":
last = noneJoin(last, cur)
case "cartesian":
last = cartesianJoin(last, cur)
case "inner_join":
curRehashed := rehashSet(cur, seriesStore, trigger.Joins[i].On)
lastRehashed = onJoin(lastRehashed, curRehashed, Inner)
last = flatten(lastRehashed)
case "left_join":
curRehashed := rehashSet(cur, seriesStore, trigger.Joins[i].On)
lastRehashed = onJoin(lastRehashed, curRehashed, Left)
last = flatten(lastRehashed)
case "right_join":
curRehashed := rehashSet(cur, seriesStore, trigger.Joins[i].On)
lastRehashed = onJoin(curRehashed, lastRehashed, Right)
last = flatten(lastRehashed)
case "left_exclude":
curRehashed := rehashSet(cur, seriesStore, trigger.Joins[i].On)
lastRehashed = exclude(lastRehashed, curRehashed)
last = flatten(lastRehashed)
case "right_exclude":
curRehashed := rehashSet(cur, seriesStore, trigger.Joins[i].On)
lastRehashed = exclude(curRehashed, lastRehashed)
last = flatten(lastRehashed)
default:
logger.Warningf("rule_eval rid:%d join type:%s not support", ruleId, trigger.Joins[i].JoinType)
}
}
return last
}
func GetQueryRef(query interface{}) (string, error) {
// 首先检查是否为 map
if m, ok := query.(map[string]interface{}); ok {
if ref, exists := m["ref"]; exists {
if refStr, ok := ref.(string); ok {
return refStr, nil
}
return "", fmt.Errorf("ref 字段不是字符串类型")
}
return "", fmt.Errorf("query 中没有找到 ref 字段")
}
// 如果不是 map则按原来的方式处理结构体
v := reflect.ValueOf(query)
if v.Kind() == reflect.Ptr {
v = v.Elem()
}
if v.Kind() != reflect.Struct {
return "", fmt.Errorf("query not a struct or map")
}
refField := v.FieldByName("Ref")
if !refField.IsValid() {
return "", fmt.Errorf("not find ref field")
}
if refField.Kind() != reflect.String {
return "", fmt.Errorf("ref not a string")
}
return refField.String(), nil
}

View File

@@ -114,7 +114,7 @@ func BgNotMatchMuteStrategy(rule *models.AlertRule, event *models.AlertCurEvent,
target, exists := targetCache.Get(ident)
// 对于包含ident的告警事件check一下ident所属bg和rule所属bg是否相同
// 如果告警规则选择了只在本BG生效那其他BG的机器就不能因此规则产生告警
if exists && !target.MatchGroupId(rule.GroupId) {
if exists && target.GroupId != rule.GroupId {
logger.Debugf("[%s] mute: rule_eval:%d cluster:%s", "BgNotMatchMuteStrategy", rule.Id, event.Cluster)
return true
}

View File

@@ -212,14 +212,6 @@ func (p *Processor) BuildEvent(anomalyPoint common.AnomalyPoint, from string, no
event.ExtraConfig = p.rule.ExtraConfigJSON
event.PromQl = anomalyPoint.Query
if p.target != "" {
if pt, exist := p.TargetCache.Get(p.target); exist {
event.Target = pt
} else {
logger.Infof("Target[ident: %s] doesn't exist in cache.", p.target)
}
}
if event.TriggerValues != "" && strings.Count(event.TriggerValues, "$") > 1 {
// TriggerValues 有多个变量,将多个变量都放到 TriggerValue 中
event.TriggerValue = event.TriggerValues
@@ -505,7 +497,6 @@ func (p *Processor) pushEventToQueue(e *models.AlertCurEvent) {
func (p *Processor) RecoverAlertCurEventFromDb() {
p.pendings = NewAlertCurEventMap(nil)
p.pendingsUseByRecover = NewAlertCurEventMap(nil)
curEvents, err := models.AlertCurEventGetByRuleIdAndDsId(p.ctx, p.rule.Id, p.datasourceId)
if err != nil {
@@ -527,11 +518,6 @@ func (p *Processor) RecoverAlertCurEventFromDb() {
}
event.DB2Mem()
target, exists := p.TargetCache.Get(event.TargetIdent)
if exists {
event.Target = target
}
fireMap[event.Hash] = event
e := *event
pendingsUseByRecoverMap[event.Hash] = &e

View File

@@ -56,12 +56,11 @@ func (rrc *RecordRuleContext) Key() string {
}
func (rrc *RecordRuleContext) Hash() string {
return str.MD5(fmt.Sprintf("%d_%s_%s_%d_%s",
return str.MD5(fmt.Sprintf("%d_%s_%s_%d",
rrc.rule.Id,
rrc.rule.CronPattern,
rrc.rule.PromQl,
rrc.datasourceId,
rrc.rule.AppendTags,
))
}

View File

@@ -34,7 +34,7 @@ func (rt *Router) pushEventToQueue(c *gin.Context) {
continue
}
arr := strings.SplitN(pair, "=", 2)
arr := strings.Split(pair, "=")
if len(arr) != 2 {
continue
}

View File

@@ -56,8 +56,8 @@ const (
Triggered = "triggered"
)
func createFeishuCardBody() feishuCard {
return feishuCard{
var (
body = feishuCard{
feishu: feishu{Msgtype: "interactive"},
Card: Cards{
Config: Conf{
@@ -90,7 +90,7 @@ func createFeishuCardBody() feishuCard {
},
},
}
}
)
func (fs *FeishuCardSender) CallBack(ctx CallBackContext) {
if len(ctx.Events) == 0 || len(ctx.CallBackURL) == 0 {
@@ -121,7 +121,6 @@ func (fs *FeishuCardSender) CallBack(ctx CallBackContext) {
}
SendTitle := fmt.Sprintf("🔔 %s", ctx.Events[0].RuleName)
body := createFeishuCardBody()
body.Card.Header.Title.Content = SendTitle
body.Card.Header.Template = color
body.Card.Elements[0].Text.Content = message
@@ -154,7 +153,6 @@ func (fs *FeishuCardSender) Send(ctx MessageContext) {
}
SendTitle := fmt.Sprintf("🔔 %s", ctx.Events[0].RuleName)
body := createFeishuCardBody()
body.Card.Header.Title.Content = SendTitle
body.Card.Header.Template = color
body.Card.Elements[0].Text.Content = message

View File

@@ -12,8 +12,7 @@ import (
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
imodels "github.com/flashcatcloud/ibex/src/models"
"github.com/flashcatcloud/ibex/src/storage"
"github.com/ccfos/nightingale/v6/storage"
"github.com/toolkits/pkg/logger"
)
@@ -43,7 +42,7 @@ func (c *IbexCallBacker) CallBack(ctx CallBackContext) {
}
func (c *IbexCallBacker) handleIbex(ctx *ctx.Context, url string, event *models.AlertCurEvent) {
if imodels.DB() == nil && ctx.IsCenter {
if models.DB(ctx) == nil && ctx.IsCenter {
logger.Warning("event_callback_ibex: db is nil")
return
}
@@ -108,7 +107,7 @@ func CallIbex(ctx *ctx.Context, id int64, host string,
continue
}
arr := strings.SplitN(pair, "=", 2)
arr := strings.Split(pair, "=")
if len(arr) != 2 {
continue
}
@@ -142,7 +141,7 @@ func CallIbex(ctx *ctx.Context, id int64, host string,
AlertTriggered: true,
}
id, err = TaskAdd(in, tpl.UpdateBy, ctx.IsCenter)
id, err = TaskAdd(ctx, in, tpl.UpdateBy, ctx.IsCenter)
if err != nil {
logger.Errorf("event_callback_ibex: call ibex fail: %v", err)
return
@@ -181,16 +180,16 @@ func canDoIbex(username string, tpl *models.TaskTpl, host string, targetCache *m
return false, nil
}
return target.MatchGroupId(tpl.GroupId), nil
return target.GroupId == tpl.GroupId, nil
}
func TaskAdd(f models.TaskForm, authUser string, isCenter bool) (int64, error) {
func TaskAdd(ctx *ctx.Context, f models.TaskForm, authUser string, isCenter bool) (int64, error) {
hosts := cleanHosts(f.Hosts)
if len(hosts) == 0 {
return 0, fmt.Errorf("arg(hosts) empty")
}
taskMeta := &imodels.TaskMeta{
taskMeta := &models.TaskMeta{
Title: f.Title,
Account: f.Account,
Batch: f.Batch,
@@ -213,34 +212,34 @@ func TaskAdd(f models.TaskForm, authUser string, isCenter bool) (int64, error) {
// 任务类型分为"告警规则触发"和"n9e center用户下发"两种;
// 边缘机房"告警规则触发"的任务不需要规划并且它可能是失联的无法使用db资源所以放入redis缓存中直接下发给agentd执行
if !isCenter && f.AlertTriggered {
if err := taskMeta.Create(); err != nil {
if err := taskMeta.Create(ctx); err != nil {
// 当网络不连通时生成唯一的id防止边缘机房中不同任务的id相同
// 方法是redis自增id去防止同一个机房的不同n9e edge生成的id相同
// 但没法防止不同边缘机房生成同样的id所以生成id的数据不会上报存入数据库只用于闭环执行。
taskMeta.Id, err = storage.IdGet()
taskMeta.Id, err = storage.IdGet(ctx.Redis)
if err != nil {
return 0, err
}
}
taskHost := imodels.TaskHost{
taskHost := models.TaskHost{
Id: taskMeta.Id,
Host: hosts[0],
Status: "running",
}
if err = taskHost.Create(); err != nil {
if err = taskHost.Create(ctx); err != nil {
logger.Warningf("task_add_fail: authUser=%s title=%s err=%s", authUser, taskMeta.Title, err.Error())
}
// 缓存任务元信息和待下发的任务
err = taskMeta.Cache(hosts[0])
err = taskMeta.Cache(ctx, hosts[0])
if err != nil {
return 0, err
}
} else {
// 如果是中心机房,还是保持之前的逻辑
err = taskMeta.Save(hosts, f.Action)
err = taskMeta.Save(ctx, hosts, f.Action)
if err != nil {
return 0, err
}

View File

@@ -42,7 +42,6 @@ func (fs *LarkCardSender) CallBack(ctx CallBackContext) {
}
SendTitle := fmt.Sprintf("🔔 %s", ctx.Events[0].RuleName)
body := createFeishuCardBody()
body.Card.Header.Title.Content = SendTitle
body.Card.Header.Template = color
body.Card.Elements[0].Text.Content = message
@@ -75,7 +74,6 @@ func (fs *LarkCardSender) Send(ctx MessageContext) {
}
SendTitle := fmt.Sprintf("🔔 %s", ctx.Events[0].RuleName)
body := createFeishuCardBody()
body.Card.Header.Title.Content = SendTitle
body.Card.Header.Template = color
body.Card.Elements[0].Text.Content = message

View File

@@ -166,10 +166,10 @@ func StartConsumer(ctx *ctx.Context, queue *WebhookQueue, popSize int, webhook *
retryCount := 0
for retryCount < webhook.RetryCount {
needRetry, res, err := sendWebhook(webhook, events, stats)
go RecordEvents(ctx, webhook, events, stats, res, err)
if !needRetry {
break
}
go RecordEvents(ctx, webhook, events, stats, res, err)
retryCount++
time.Sleep(time.Second * time.Duration(webhook.RetryInterval) * time.Duration(retryCount))
}

View File

@@ -14,7 +14,6 @@ type Center struct {
FlashDuty FlashDuty
EventHistoryGroupView bool
CleanNotifyRecordDay int
MigrateBusiGroupLabel bool
}
type Plugin struct {

View File

@@ -18,6 +18,7 @@ import (
"github.com/ccfos/nightingale/v6/conf"
"github.com/ccfos/nightingale/v6/cron"
"github.com/ccfos/nightingale/v6/dumper"
"github.com/ccfos/nightingale/v6/ibex"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/models/migrate"
@@ -33,8 +34,6 @@ import (
"github.com/ccfos/nightingale/v6/pushgw/writer"
"github.com/ccfos/nightingale/v6/storage"
"github.com/ccfos/nightingale/v6/tdengine"
"github.com/flashcatcloud/ibex/src/cmd/ibex"
)
func Initialize(configDir string, cryptoKey string) (func(), error) {
@@ -61,7 +60,14 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
if err != nil {
return nil, err
}
ctx := ctx.NewContext(context.Background(), db, true)
var redis storage.Redis
redis, err = storage.NewRedis(config.Redis)
if err != nil {
return nil, err
}
ctx := ctx.NewContext(context.Background(), db, redis, true)
migrate.Migrate(db)
models.InitRoot(ctx)
@@ -72,12 +78,7 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
return nil, err
}
go integration.Init(ctx, config.Center.BuiltinIntegrationsDir)
var redis storage.Redis
redis, err = storage.NewRedis(config.Redis)
if err != nil {
return nil, err
}
integration.Init(ctx, config.Center.BuiltinIntegrationsDir)
metas := metas.New(redis)
idents := idents.New(ctx, redis)
@@ -110,13 +111,10 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
go cron.CleanNotifyRecord(ctx, config.Center.CleanNotifyRecordDay)
alertrtRouter := alertrt.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors)
centerRouter := centerrt.New(config.HTTP, config.Center, config.Alert, config.Ibex,
cconf.Operations, dsCache, notifyConfigCache, promClients, tdengineClients,
centerRouter := centerrt.New(config.HTTP, config.Center, config.Alert, config.Ibex, cconf.Operations, dsCache, notifyConfigCache, promClients, tdengineClients,
redis, sso, ctx, metas, idents, targetCache, userCache, userGroupCache)
pushgwRouter := pushgwrt.New(config.HTTP, config.Pushgw, config.Alert, targetCache, busiGroupCache, idents, metas, writers, ctx)
go models.MigrateBg(ctx, pushgwRouter.Pushgw.BusiGroupLabelKey)
r := httpx.GinEngine(config.Global.RunMode, config.HTTP)
centerRouter.Config(r)
@@ -126,10 +124,10 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
if config.Ibex.Enable {
migrate.MigrateIbexTables(db)
ibex.ServerStart(true, db, redis, config.HTTP.APIForService.BasicAuth, config.Alert.Heartbeat, &config.CenterApi, r, centerRouter, config.Ibex, config.HTTP.Port)
ibex.ServerStart(ctx, true, db, redis, config.HTTP.APIForService.BasicAuth, config.Alert.Heartbeat, &config.CenterApi, r, centerRouter, config.Ibex, config.HTTP.Port)
}
httpClean := httpx.Init(config.HTTP, r)
httpClean := httpx.Init(config.HTTP, context.Background(), r)
return func() {
logxClean()

View File

@@ -16,12 +16,6 @@ import (
const SYSTEM = "system"
func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
err := models.InitBuiltinPayloads(ctx)
if err != nil {
logger.Warning("init old builtinPayloads fail ", err)
return
}
fp := builtinIntegrationsDir
if fp == "" {
fp = path.Join(runner.Cwd, "integrations")
@@ -98,7 +92,6 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
logger.Warning("update builtin component fail ", old, err)
}
}
component.ID = old.ID
}
// delete uuid is emtpy
@@ -148,13 +141,13 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
cate := strings.Replace(f, ".json", "", -1)
builtinAlert := models.BuiltinPayload{
ComponentID: component.ID,
Type: "alert",
Cate: cate,
Name: alert.Name,
Tags: alert.AppendTags,
Content: string(content),
UUID: alert.UUID,
Component: component.Ident,
Type: "alert",
Cate: cate,
Name: alert.Name,
Tags: alert.AppendTags,
Content: string(content),
UUID: alert.UUID,
}
old, err := models.BuiltinPayloadGet(ctx, "uuid = ?", alert.UUID)
@@ -172,7 +165,6 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
}
if old.UpdatedBy == SYSTEM {
old.ComponentID = component.ID
old.Content = string(content)
old.Name = alert.Name
old.Tags = alert.AppendTags
@@ -239,13 +231,13 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
}
builtinDashboard := models.BuiltinPayload{
ComponentID: component.ID,
Type: "dashboard",
Cate: "",
Name: dashboard.Name,
Tags: dashboard.Tags,
Content: string(content),
UUID: dashboard.UUID,
Component: component.Ident,
Type: "dashboard",
Cate: "",
Name: dashboard.Name,
Tags: dashboard.Tags,
Content: string(content),
UUID: dashboard.UUID,
}
old, err := models.BuiltinPayloadGet(ctx, "uuid = ?", dashboard.UUID)
@@ -263,7 +255,6 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
}
if old.UpdatedBy == SYSTEM {
old.ComponentID = component.ID
old.Content = string(content)
old.Name = dashboard.Name
old.Tags = dashboard.Tags

View File

@@ -16,7 +16,6 @@ import (
"github.com/ccfos/nightingale/v6/conf"
_ "github.com/ccfos/nightingale/v6/front/statik"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/aop"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/httpx"
@@ -52,14 +51,9 @@ type Router struct {
UserGroupCache *memsto.UserGroupCacheType
Ctx *ctx.Context
HeartbeatHook HeartbeatHookFunc
TargetDeleteHook models.TargetDeleteHookFunc
}
func New(httpConfig httpx.Config, center cconf.Center, alert aconf.Alert, ibex conf.Ibex,
operations cconf.Operation, ds *memsto.DatasourceCacheType, ncc *memsto.NotifyConfigCacheType,
pc *prom.PromClientMap, tdendgineClients *tdengine.TdengineClientMap, redis storage.Redis,
sso *sso.SsoClient, ctx *ctx.Context, metaSet *metas.Set, idents *idents.Set,
tc *memsto.TargetCacheType, uc *memsto.UserCacheType, ugc *memsto.UserGroupCacheType) *Router {
func New(httpConfig httpx.Config, center cconf.Center, alert aconf.Alert, ibex conf.Ibex, operations cconf.Operation, ds *memsto.DatasourceCacheType, ncc *memsto.NotifyConfigCacheType, pc *prom.PromClientMap, tdendgineClients *tdengine.TdengineClientMap, redis storage.Redis, sso *sso.SsoClient, ctx *ctx.Context, metaSet *metas.Set, idents *idents.Set, tc *memsto.TargetCacheType, uc *memsto.UserCacheType, ugc *memsto.UserGroupCacheType) *Router {
return &Router{
HTTP: httpConfig,
Center: center,
@@ -79,14 +73,9 @@ func New(httpConfig httpx.Config, center cconf.Center, alert aconf.Alert, ibex c
UserGroupCache: ugc,
Ctx: ctx,
HeartbeatHook: func(ident string) map[string]interface{} { return nil },
TargetDeleteHook: emptyDeleteHook,
}
}
func emptyDeleteHook(ctx *ctx.Context, idents []string) error {
return nil
}
func stat() gin.HandlerFunc {
return func(c *gin.Context) {
start := time.Now()
@@ -287,7 +276,7 @@ func (rt *Router) Config(r *gin.Engine) {
pages.POST("/targets/tags", rt.auth(), rt.user(), rt.perm("/targets/put"), rt.targetBindTagsByFE)
pages.DELETE("/targets/tags", rt.auth(), rt.user(), rt.perm("/targets/put"), rt.targetUnbindTagsByFE)
pages.PUT("/targets/note", rt.auth(), rt.user(), rt.perm("/targets/put"), rt.targetUpdateNote)
pages.PUT("/targets/bgids", rt.auth(), rt.user(), rt.perm("/targets/put"), rt.targetBindBgids)
pages.PUT("/targets/bgid", rt.auth(), rt.user(), rt.perm("/targets/put"), rt.targetUpdateBgid)
pages.POST("/builtin-cate-favorite", rt.auth(), rt.user(), rt.builtinCateFavoriteAdd)
pages.DELETE("/builtin-cate-favorite/:name", rt.auth(), rt.user(), rt.builtinCateFavoriteDel)
@@ -308,7 +297,6 @@ func (rt *Router) Config(r *gin.Engine) {
pages.POST("/busi-group/:id/board/:bid/clone", rt.auth(), rt.user(), rt.perm("/dashboards/add"), rt.bgrw(), rt.boardClone)
pages.POST("/busi-groups/boards/clones", rt.auth(), rt.user(), rt.perm("/dashboards/add"), rt.boardBatchClone)
pages.GET("/boards", rt.auth(), rt.user(), rt.boardGetsByBids)
pages.GET("/board/:bid", rt.boardGet)
pages.GET("/board/:bid/pure", rt.boardPureGet)
pages.PUT("/board/:bid", rt.auth(), rt.user(), rt.perm("/dashboards/put"), rt.boardPut)
@@ -336,7 +324,7 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/alert-rule/:arid/pure", rt.auth(), rt.user(), rt.perm("/alert-rules"), rt.alertRulePureGet)
pages.PUT("/busi-group/alert-rule/validate", rt.auth(), rt.user(), rt.perm("/alert-rules/put"), rt.alertRuleValidation)
pages.POST("/relabel-test", rt.auth(), rt.user(), rt.relabelTest)
pages.POST("/busi-group/:id/alert-rules/clone", rt.auth(), rt.user(), rt.perm("/alert-rules/add"), rt.bgrw(), rt.cloneToMachine)
pages.POST("/busi-group/:id/alert-rules/clone", rt.auth(), rt.user(), rt.perm("/alert-rules/post"), rt.bgrw(), rt.cloneToMachine)
pages.GET("/busi-groups/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules"), rt.recordingRuleGetsByGids)
pages.GET("/busi-group/:id/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules"), rt.recordingRuleGets)
@@ -475,7 +463,6 @@ func (rt *Router) Config(r *gin.Engine) {
pages.GET("/builtin-payload/:id", rt.auth(), rt.user(), rt.perm("/built-in-components"), rt.builtinPayloadGet)
pages.PUT("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/built-in-components/put"), rt.builtinPayloadsPut)
pages.DELETE("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/built-in-components/del"), rt.builtinPayloadsDel)
pages.GET("/builtin-payload", rt.auth(), rt.user(), rt.builtinPayloadsGetByUUIDOrID)
}
r.GET("/api/n9e/versions", func(c *gin.Context) {
@@ -572,8 +559,6 @@ func (rt *Router) Config(r *gin.Engine) {
service.POST("/notify-record", rt.notificationRecordAdd)
service.GET("/alert-cur-events-del-by-hash", rt.alertCurEventDelByHash)
service.POST("/center/heartbeat", rt.heartbeat)
}
}

View File

@@ -65,8 +65,7 @@ func (rt *Router) alertCurEventsCard(c *gin.Context) {
ginx.Dangerous(err)
// 最多获取50000个获取太多也没啥意义
list, err := models.AlertCurEventsGet(rt.Ctx, prods, bgids, stime, etime, severity, dsIds,
cates, 0, query, 50000, 0)
list, err := models.AlertCurEventGets(rt.Ctx, prods, bgids, stime, etime, severity, dsIds, cates, query, 50000, 0)
ginx.Dangerous(err)
cardmap := make(map[string]*AlertCard)
@@ -163,17 +162,13 @@ func (rt *Router) alertCurEventsList(c *gin.Context) {
cates = strings.Split(cate, ",")
}
ruleId := ginx.QueryInt64(c, "rid", 0)
bgids, err := GetBusinessGroupIds(c, rt.Ctx, rt.Center.EventHistoryGroupView)
ginx.Dangerous(err)
total, err := models.AlertCurEventTotal(rt.Ctx, prods, bgids, stime, etime, severity, dsIds,
cates, ruleId, query)
total, err := models.AlertCurEventTotal(rt.Ctx, prods, bgids, stime, etime, severity, dsIds, cates, query)
ginx.Dangerous(err)
list, err := models.AlertCurEventsGet(rt.Ctx, prods, bgids, stime, etime, severity, dsIds,
cates, ruleId, query, limit, ginx.Offset(c, limit))
list, err := models.AlertCurEventGets(rt.Ctx, prods, bgids, stime, etime, severity, dsIds, cates, query, limit, ginx.Offset(c, limit))
ginx.Dangerous(err)
cache := make(map[int64]*models.UserGroup)
@@ -206,9 +201,7 @@ func (rt *Router) checkCurEventBusiGroupRWPermission(c *gin.Context, ids []int64
for i := 0; i < len(ids); i++ {
event, err := models.AlertCurEventGetById(rt.Ctx, ids[i])
ginx.Dangerous(err)
if event == nil {
continue
}
if _, has := set[event.GroupId]; !has {
rt.bgrwCheck(c, event.GroupId)
set[event.GroupId] = struct{}{}
@@ -234,7 +227,6 @@ func (rt *Router) alertCurEventGet(c *gin.Context) {
event.RuleConfigJson = ruleConfig
}
event.LastEvalTime = event.TriggerTime
ginx.NewRender(c).Data(event, nil)
}

View File

@@ -54,17 +54,13 @@ func (rt *Router) alertHisEventsList(c *gin.Context) {
cates = strings.Split(cate, ",")
}
ruleId := ginx.QueryInt64(c, "rid", 0)
bgids, err := GetBusinessGroupIds(c, rt.Ctx, rt.Center.EventHistoryGroupView)
ginx.Dangerous(err)
total, err := models.AlertHisEventTotal(rt.Ctx, prods, bgids, stime, etime, severity,
recovered, dsIds, cates, ruleId, query)
total, err := models.AlertHisEventTotal(rt.Ctx, prods, bgids, stime, etime, severity, recovered, dsIds, cates, query)
ginx.Dangerous(err)
list, err := models.AlertHisEventGets(rt.Ctx, prods, bgids, stime, etime, severity, recovered,
dsIds, cates, ruleId, query, limit, ginx.Offset(c, limit))
list, err := models.AlertHisEventGets(rt.Ctx, prods, bgids, stime, etime, severity, recovered, dsIds, cates, query, limit, ginx.Offset(c, limit))
ginx.Dangerous(err)
cache := make(map[int64]*models.UserGroup)

View File

@@ -37,18 +37,6 @@ func (rt *Router) alertRuleGets(c *gin.Context) {
ginx.NewRender(c).Data(ars, err)
}
func getAlertCueEventTimeRange(c *gin.Context) (stime, etime int64) {
stime = ginx.QueryInt64(c, "stime", 0)
etime = ginx.QueryInt64(c, "etime", 0)
if etime == 0 {
etime = time.Now().Unix()
}
if stime == 0 || stime >= etime {
stime = etime - 30*24*int64(time.Hour.Seconds())
}
return
}
func (rt *Router) alertRuleGetsByGids(c *gin.Context) {
gids := str.IdsInt64(ginx.QueryStr(c, "gids", ""), ",")
if len(gids) > 0 {
@@ -72,30 +60,9 @@ func (rt *Router) alertRuleGetsByGids(c *gin.Context) {
ars, err := models.AlertRuleGetsByBGIds(rt.Ctx, gids)
if err == nil {
cache := make(map[int64]*models.UserGroup)
rids := make([]int64, 0, len(ars))
names := make([]string, 0, len(ars))
for i := 0; i < len(ars); i++ {
ars[i].FillNotifyGroups(rt.Ctx, cache)
ars[i].FillSeverities()
rids = append(rids, ars[i].Id)
names = append(names, ars[i].UpdateBy)
}
stime, etime := getAlertCueEventTimeRange(c)
cnt := models.AlertCurEventCountByRuleId(rt.Ctx, rids, stime, etime)
if cnt != nil {
for i := 0; i < len(ars); i++ {
ars[i].CurEventCount = cnt[ars[i].Id]
}
}
users := models.UserMapGet(rt.Ctx, "username in (?)", names)
if users != nil {
for i := 0; i < len(ars); i++ {
if user, exist := users[ars[i].UpdateBy]; exist {
ars[i].UpdateByNickname = user.Nickname
}
}
}
}
ginx.NewRender(c).Data(ars, err)
@@ -525,7 +492,7 @@ func (rt *Router) relabelTest(c *gin.Context) {
labels := make([]prompb.Label, len(f.Tags))
for i, tag := range f.Tags {
label := strings.SplitN(tag, "=", 2)
label := strings.Split(tag, "=")
if len(label) != 2 {
ginx.Bomb(http.StatusBadRequest, "tag:%s format error", tag)
}
@@ -562,15 +529,6 @@ type identListForm struct {
IdentList []string `json:"ident_list"`
}
func containsIdentOperator(s string) bool {
pattern := `ident\s*(!=|!~|=~)`
matched, err := regexp.MatchString(pattern, s)
if err != nil {
return false
}
return matched
}
func (rt *Router) cloneToMachine(c *gin.Context) {
var f identListForm
ginx.BindJSON(c, &f)
@@ -592,17 +550,10 @@ func (rt *Router) cloneToMachine(c *gin.Context) {
reterr := make(map[string]map[string]string)
for i := range alertRules {
errMsg := make(map[string]string)
reterr[alertRules[i].Name] = make(map[string]string)
if alertRules[i].Cate != "prometheus" {
errMsg["all"] = "Only Prometheus rule can be cloned to machines"
reterr[alertRules[i].Name] = errMsg
continue
}
if containsIdentOperator(alertRules[i].RuleConfig) {
errMsg["all"] = "promql is missing ident"
reterr[alertRules[i].Name] = errMsg
reterr[alertRules[i].Name]["all"] = "Only Prometheus rules can be cloned to machines"
continue
}
@@ -611,7 +562,7 @@ func (rt *Router) cloneToMachine(c *gin.Context) {
newRule := &models.AlertRule{}
if err := copier.Copy(newRule, alertRules[i]); err != nil {
errMsg[f.IdentList[j]] = fmt.Sprintf("fail to clone rule, err: %s", err)
reterr[alertRules[i].Name][f.IdentList[j]] = fmt.Sprintf("fail to clone rule, err: %s", err)
continue
}
@@ -625,21 +576,17 @@ func (rt *Router) cloneToMachine(c *gin.Context) {
exist, err := models.AlertRuleExists(rt.Ctx, 0, newRule.GroupId, newRule.DatasourceIdsJson, newRule.Name)
if err != nil {
errMsg[f.IdentList[j]] = err.Error()
reterr[alertRules[i].Name][f.IdentList[j]] = err.Error()
continue
}
if exist {
errMsg[f.IdentList[j]] = fmt.Sprintf("rule already exists, ruleName: %s", newRule.Name)
reterr[alertRules[i].Name][f.IdentList[j]] = fmt.Sprintf("rule already exists, ruleName: %s", newRule.Name)
continue
}
newRules = append(newRules, newRule)
}
if len(errMsg) > 0 {
reterr[alertRules[i].Name] = errMsg
}
}
ginx.NewRender(c).Data(reterr, models.InsertAlertRule(rt.Ctx, newRules))

View File

@@ -94,14 +94,6 @@ func (rt *Router) boardGet(c *gin.Context) {
ginx.NewRender(c).Data(board, nil)
}
// 根据 bids 参数,获取多个 board
func (rt *Router) boardGetsByBids(c *gin.Context) {
bids := str.IdsInt64(ginx.QueryStr(c, "bids", ""), ",")
boards, err := models.BoardGetsByBids(rt.Ctx, bids)
ginx.Dangerous(err)
ginx.NewRender(c).Data(boards, err)
}
func (rt *Router) boardPureGet(c *gin.Context) {
board, err := models.BoardGetByID(rt.Ctx, ginx.UrlParamInt64(c, "bid"))
ginx.Dangerous(err)

View File

@@ -4,15 +4,10 @@ import (
"net/http"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
"gorm.io/gorm"
)
const SYSTEM = "system"
func (rt *Router) builtinComponentsAdd(c *gin.Context) {
var lst []models.BuiltinComponent
ginx.BindJSON(c, &lst)
@@ -55,31 +50,10 @@ func (rt *Router) builtinComponentsPut(c *gin.Context) {
return
}
if bc.CreatedBy == SYSTEM {
req.Ident = bc.Ident
}
username := Username(c)
req.UpdatedBy = username
err = models.DB(rt.Ctx).Transaction(func(tx *gorm.DB) error {
tCtx := &ctx.Context{
DB: tx,
}
txErr := models.BuiltinMetricBatchUpdateColumn(tCtx, "typ", bc.Ident, req.Ident, req.UpdatedBy)
if txErr != nil {
return txErr
}
txErr = bc.Update(tCtx, req)
if txErr != nil {
return txErr
}
return nil
})
ginx.NewRender(c).Message(err)
ginx.NewRender(c).Message(bc.Update(rt.Ctx, req))
}
func (rt *Router) builtinComponentsDel(c *gin.Context) {

View File

@@ -6,7 +6,6 @@ import (
"strings"
"time"
"github.com/BurntSushi/toml"
"github.com/ccfos/nightingale/v6/models"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/ginx"
@@ -53,15 +52,15 @@ func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
}
bp := models.BuiltinPayload{
Type: lst[i].Type,
ComponentID: lst[i].ComponentID,
Cate: lst[i].Cate,
Name: rule.Name,
Tags: rule.AppendTags,
UUID: rule.UUID,
Content: string(contentBytes),
CreatedBy: username,
UpdatedBy: username,
Type: lst[i].Type,
Component: lst[i].Component,
Cate: lst[i].Cate,
Name: rule.Name,
Tags: rule.AppendTags,
UUID: rule.UUID,
Content: string(contentBytes),
CreatedBy: username,
UpdatedBy: username,
}
if err := bp.Add(rt.Ctx, username); err != nil {
@@ -82,15 +81,15 @@ func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
}
bp := models.BuiltinPayload{
Type: lst[i].Type,
ComponentID: lst[i].ComponentID,
Cate: lst[i].Cate,
Name: alertRule.Name,
Tags: alertRule.AppendTags,
UUID: alertRule.UUID,
Content: lst[i].Content,
CreatedBy: username,
UpdatedBy: username,
Type: lst[i].Type,
Component: lst[i].Component,
Cate: lst[i].Cate,
Name: alertRule.Name,
Tags: alertRule.AppendTags,
UUID: alertRule.UUID,
Content: lst[i].Content,
CreatedBy: username,
UpdatedBy: username,
}
if err := bp.Add(rt.Ctx, username); err != nil {
@@ -116,15 +115,15 @@ func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
}
bp := models.BuiltinPayload{
Type: lst[i].Type,
ComponentID: lst[i].ComponentID,
Cate: lst[i].Cate,
Name: dashboard.Name,
Tags: dashboard.Tags,
UUID: dashboard.UUID,
Content: string(contentBytes),
CreatedBy: username,
UpdatedBy: username,
Type: lst[i].Type,
Component: lst[i].Component,
Cate: lst[i].Cate,
Name: dashboard.Name,
Tags: dashboard.Tags,
UUID: dashboard.UUID,
Content: string(contentBytes),
CreatedBy: username,
UpdatedBy: username,
}
if err := bp.Add(rt.Ctx, username); err != nil {
@@ -145,29 +144,21 @@ func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
}
bp := models.BuiltinPayload{
Type: lst[i].Type,
ComponentID: lst[i].ComponentID,
Cate: lst[i].Cate,
Name: dashboard.Name,
Tags: dashboard.Tags,
UUID: dashboard.UUID,
Content: lst[i].Content,
CreatedBy: username,
UpdatedBy: username,
Type: lst[i].Type,
Component: lst[i].Component,
Cate: lst[i].Cate,
Name: dashboard.Name,
Tags: dashboard.Tags,
UUID: dashboard.UUID,
Content: lst[i].Content,
CreatedBy: username,
UpdatedBy: username,
}
if err := bp.Add(rt.Ctx, username); err != nil {
reterr[bp.Name] = i18n.Sprintf(c.GetHeader("X-Language"), err.Error())
}
} else {
if lst[i].Type == "collect" {
c := make(map[string]interface{})
if _, err := toml.Decode(lst[i].Content, &c); err != nil {
reterr[lst[i].Name] = err.Error()
continue
}
}
if err := lst[i].Add(rt.Ctx, username); err != nil {
reterr[lst[i].Name] = i18n.Sprintf(c.GetHeader("X-Language"), err.Error())
}
@@ -180,20 +171,19 @@ func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
func (rt *Router) builtinPayloadsGets(c *gin.Context) {
typ := ginx.QueryStr(c, "type", "")
ComponentID := ginx.QueryInt64(c, "component_id", 0)
component := ginx.QueryStr(c, "component", "")
cate := ginx.QueryStr(c, "cate", "")
query := ginx.QueryStr(c, "query", "")
lst, err := models.BuiltinPayloadGets(rt.Ctx, uint64(ComponentID), typ, cate, query)
lst, err := models.BuiltinPayloadGets(rt.Ctx, typ, component, cate, query)
ginx.NewRender(c).Data(lst, err)
}
func (rt *Router) builtinPayloadcatesGet(c *gin.Context) {
typ := ginx.QueryStr(c, "type", "")
ComponentID := ginx.QueryInt64(c, "component_id", 0)
component := ginx.QueryStr(c, "component", "")
cates, err := models.BuiltinPayloadCates(rt.Ctx, typ, uint64(ComponentID))
cates, err := models.BuiltinPayloadCates(rt.Ctx, typ, component)
ginx.NewRender(c).Data(cates, err)
}
@@ -239,11 +229,6 @@ func (rt *Router) builtinPayloadsPut(c *gin.Context) {
req.Name = dashboard.Name
req.Tags = dashboard.Tags
} else if req.Type == "collect" {
c := make(map[string]interface{})
if _, err := toml.Decode(req.Content, &c); err != nil {
ginx.Bomb(http.StatusBadRequest, err.Error())
}
}
username := Username(c)
@@ -260,15 +245,3 @@ func (rt *Router) builtinPayloadsDel(c *gin.Context) {
ginx.NewRender(c).Message(models.BuiltinPayloadDels(rt.Ctx, req.Ids))
}
func (rt *Router) builtinPayloadsGetByUUIDOrID(c *gin.Context) {
uuid := ginx.QueryInt64(c, "uuid", 0)
// 优先以 uuid 为准
if uuid != 0 {
ginx.NewRender(c).Data(models.BuiltinPayloadGet(rt.Ctx, "uuid = ?", uuid))
return
}
id := ginx.QueryInt64(c, "id", 0)
ginx.NewRender(c).Data(models.BuiltinPayloadGet(rt.Ctx, "id = ?", id))
}

View File

@@ -92,12 +92,10 @@ func (rt *Router) datasourceUpsert(c *gin.Context) {
var err error
var count int64
if !req.ForceSave {
err = DatasourceCheck(req)
if err != nil {
Dangerous(c, err)
return
}
err = DatasourceCheck(req)
if err != nil {
Dangerous(c, err)
return
}
if req.Id == 0 {

View File

@@ -65,23 +65,6 @@ func queryDatasourceIds(c *gin.Context) []int64 {
return ids
}
func queryStrListField(c *gin.Context, fieldName string, sep ...string) []string {
str := ginx.QueryStr(c, fieldName, "")
if str == "" {
return nil
}
lst := []string{str}
for _, s := range sep {
var newLst []string
for _, str := range lst {
newLst = append(newLst, strings.Split(str, s)...)
}
lst = newLst
}
return lst
}
type idsForm struct {
Ids []int64 `json:"ids"`
IsSyncToFlashDuty bool `json:"is_sync_to_flashduty"`

View File

@@ -6,7 +6,6 @@ import (
"errors"
"io/ioutil"
"sort"
"strconv"
"strings"
"time"
@@ -81,48 +80,16 @@ func HandleHeartbeat(c *gin.Context, ctx *ctx.Context, engineName string, metaSe
identSet.MSet(items)
if target, has := targetCache.Get(req.Hostname); has && target != nil {
gidsStr := ginx.QueryStr(c, "gid", "")
overwriteGids := ginx.QueryBool(c, "overwrite_gids", false)
gid := ginx.QueryInt64(c, "gid", 0)
hostIp := strings.TrimSpace(req.HostIp)
gids := strings.Split(gidsStr, ",")
if overwriteGids {
groupIds := make([]int64, 0)
for i := range gids {
if gids[i] == "" {
continue
}
groupId, err := strconv.ParseInt(gids[i], 10, 64)
if err != nil {
logger.Warningf("update target:%s group ids failed, err: %v", req.Hostname, err)
continue
}
groupIds = append(groupIds, groupId)
}
err := models.TargetOverrideBgids(ctx, []string{target.Ident}, groupIds)
if err != nil {
logger.Warningf("update target:%s group ids failed, err: %v", target.Ident, err)
}
} else if gidsStr != "" {
for i := range gids {
groupId, err := strconv.ParseInt(gids[i], 10, 64)
if err != nil {
logger.Warningf("update target:%s group ids failed, err: %v", req.Hostname, err)
continue
}
if !target.MatchGroupId(groupId) {
err := models.TargetBindBgids(ctx, []string{target.Ident}, []int64{groupId})
if err != nil {
logger.Warningf("update target:%s group ids failed, err: %v", target.Ident, err)
}
}
}
}
newTarget := models.Target{}
targetNeedUpdate := false
if gid != 0 && gid != target.GroupId {
newTarget.GroupId = gid
targetNeedUpdate = true
}
if hostIp != "" && hostIp != target.HostIp {
newTarget.HostIp = hostIp
targetNeedUpdate = true

View File

@@ -138,7 +138,7 @@ func (rt *Router) notifyTplPreview(c *gin.Context) {
continue
}
arr := strings.SplitN(pair, "=", 2)
arr := strings.Split(pair, "=")
if len(arr) != 2 {
continue
}

View File

@@ -52,8 +52,6 @@ func (rt *Router) targetGets(c *gin.Context) {
order := ginx.QueryStr(c, "order", "ident")
desc := ginx.QueryBool(c, "desc", false)
hosts := queryStrListField(c, "hosts", ",", " ", "\n")
var err error
if len(bgids) == 0 {
user := c.MustGet("user").(*models.User)
@@ -67,13 +65,11 @@ func (rt *Router) targetGets(c *gin.Context) {
bgids = append(bgids, 0)
}
}
options := []models.BuildTargetWhereOption{
models.BuildTargetWhereWithBgids(bgids),
models.BuildTargetWhereWithDsIds(dsIds),
models.BuildTargetWhereWithQuery(query),
models.BuildTargetWhereWithDowntime(downtime),
models.BuildTargetWhereWithHosts(hosts),
}
total, err := models.TargetTotal(rt.Ctx, options...)
ginx.Dangerous(err)
@@ -82,13 +78,6 @@ func (rt *Router) targetGets(c *gin.Context) {
ginx.Offset(c, limit), order, desc, options...)
ginx.Dangerous(err)
tgs, err := models.TargetBusiGroupsGetAll(rt.Ctx)
ginx.Dangerous(err)
for _, t := range list {
t.GroupIds = tgs[t.Ident]
}
if err == nil {
now := time.Now()
cache := make(map[int64]*models.BusiGroup)
@@ -393,15 +382,8 @@ type targetBgidForm struct {
Bgid int64 `json:"bgid"`
}
type targetBgidsForm struct {
Idents []string `json:"idents" binding:"required_without=HostIps"`
HostIps []string `json:"host_ips" binding:"required_without=Idents"`
Bgids []int64 `json:"bgids"`
Action string `json:"action"` // add del reset
}
func (rt *Router) targetBindBgids(c *gin.Context) {
var f targetBgidsForm
func (rt *Router) targetUpdateBgid(c *gin.Context) {
var f targetBgidForm
var err error
var failedResults = make(map[string]string)
ginx.BindJSON(c, &f)
@@ -417,24 +399,35 @@ func (rt *Router) targetBindBgids(c *gin.Context) {
}
user := c.MustGet("user").(*models.User)
if !user.IsAdmin() {
// 普通用户,检查用户是否有权限操作所有请求的业务组
existing, _, err := models.SeparateTargetIdents(rt.Ctx, f.Idents)
if user.IsAdmin() {
ginx.NewRender(c).Data(failedResults, models.TargetUpdateBgid(rt.Ctx, f.Idents, f.Bgid, false))
return
}
if f.Bgid > 0 {
// 把要操作的机器分成两部分一部分是bgid为0需要管理员分配另一部分bgid>0说明是业务组内部想调整
// 比如原来分配给didiyun的机器didiyun的管理员想把部分机器调整到didiyun-ceph下
// 对于调整的这种情况当前登录用户要对这批机器有操作权限同时还要对目标BG有操作权限
orphans, err := models.IdentsFilter(rt.Ctx, f.Idents, "group_id = ?", 0)
ginx.Dangerous(err)
rt.checkTargetPerm(c, existing)
var groupIds []int64
if f.Action == "reset" {
// 如果是复写,则需要检查用户是否有权限操作机器之前的业务组
bgids, err := models.TargetGroupIdsGetByIdents(rt.Ctx, f.Idents)
// 机器里边存在未归组的登录用户就需要是admin
if len(orphans) > 0 && !user.IsAdmin() {
can, err := user.CheckPerm(rt.Ctx, "/targets/bind")
ginx.Dangerous(err)
groupIds = append(groupIds, bgids...)
if !can {
ginx.Bomb(http.StatusForbidden, "No permission. Only admin can assign BG")
}
}
groupIds = append(groupIds, f.Bgids...)
for _, bgid := range groupIds {
bg := BusiGroup(rt.Ctx, bgid)
reBelongs, err := models.IdentsFilter(rt.Ctx, f.Idents, "group_id > ?", 0)
ginx.Dangerous(err)
if len(reBelongs) > 0 {
// 对于这些要重新分配的机器操作者要对这些机器本身有权限同时要对目标bgid有权限
rt.checkTargetPerm(c, f.Idents)
bg := BusiGroup(rt.Ctx, f.Bgid)
can, err := user.CanDoBusiGroup(rt.Ctx, bg, "rw")
ginx.Dangerous(err)
@@ -442,24 +435,14 @@ func (rt *Router) targetBindBgids(c *gin.Context) {
ginx.Bomb(http.StatusForbidden, "No permission. You are not admin of BG(%s)", bg.Name)
}
}
can, err := user.CheckPerm(rt.Ctx, "/targets/bind")
ginx.Dangerous(err)
if !can {
ginx.Bomb(http.StatusForbidden, "No permission. Only admin can assign BG")
}
} else if f.Bgid == 0 {
// 退还机器
rt.checkTargetPerm(c, f.Idents)
} else {
ginx.Bomb(http.StatusBadRequest, "invalid bgid")
}
switch f.Action {
case "add":
ginx.NewRender(c).Data(failedResults, models.TargetBindBgids(rt.Ctx, f.Idents, f.Bgids))
case "del":
ginx.NewRender(c).Data(failedResults, models.TargetUnbindBgids(rt.Ctx, f.Idents, f.Bgids))
case "reset":
ginx.NewRender(c).Data(failedResults, models.TargetOverrideBgids(rt.Ctx, f.Idents, f.Bgids))
default:
ginx.Bomb(http.StatusBadRequest, "invalid action")
}
ginx.NewRender(c).Data(failedResults, models.TargetUpdateBgid(rt.Ctx, f.Idents, f.Bgid, false))
}
func (rt *Router) targetUpdateBgidByService(c *gin.Context) {
@@ -478,7 +461,7 @@ func (rt *Router) targetUpdateBgidByService(c *gin.Context) {
ginx.Bomb(http.StatusBadRequest, err.Error())
}
ginx.NewRender(c).Data(failedResults, models.TargetOverrideBgids(rt.Ctx, f.Idents, []int64{f.Bgid}))
ginx.NewRender(c).Data(failedResults, models.TargetUpdateBgid(rt.Ctx, f.Idents, f.Bgid, false))
}
type identsForm struct {
@@ -502,7 +485,7 @@ func (rt *Router) targetDel(c *gin.Context) {
ginx.Bomb(http.StatusBadRequest, err.Error())
}
ginx.NewRender(c).Data(failedResults, models.TargetDel(rt.Ctx, f.Idents, rt.TargetDeleteHook))
ginx.NewRender(c).Data(failedResults, models.TargetDel(rt.Ctx, f.Idents))
}
func (rt *Router) targetDelByService(c *gin.Context) {
@@ -521,7 +504,7 @@ func (rt *Router) targetDelByService(c *gin.Context) {
ginx.Bomb(http.StatusBadRequest, err.Error())
}
ginx.NewRender(c).Data(failedResults, models.TargetDel(rt.Ctx, f.Idents, rt.TargetDeleteHook))
ginx.NewRender(c).Data(failedResults, models.TargetDel(rt.Ctx, f.Idents))
}
func (rt *Router) checkTargetPerm(c *gin.Context, idents []string) {

View File

@@ -126,7 +126,7 @@ func (rt *Router) taskAdd(c *gin.Context) {
rt.checkTargetPerm(c, f.Hosts)
// call ibex
taskId, err := sender.TaskAdd(f, user.Username, rt.Ctx.IsCenter)
taskId, err := sender.TaskAdd(rt.Ctx, f, user.Username, rt.Ctx.IsCenter)
ginx.Dangerous(err)
if taskId <= 0 {

View File

@@ -18,7 +18,7 @@ func Upgrade(configFile string) error {
return err
}
ctx := ctx.NewContext(context.Background(), db, true)
ctx := ctx.NewContext(context.Background(), db, nil, true)
for _, cluster := range config.Clusters {
count, err := models.GetDatasourcesCountByName(ctx, cluster.Name)
if err != nil {

View File

@@ -12,6 +12,7 @@ import (
"github.com/ccfos/nightingale/v6/center/metas"
"github.com/ccfos/nightingale/v6/conf"
"github.com/ccfos/nightingale/v6/dumper"
"github.com/ccfos/nightingale/v6/ibex"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/httpx"
@@ -22,8 +23,6 @@ import (
"github.com/ccfos/nightingale/v6/pushgw/writer"
"github.com/ccfos/nightingale/v6/storage"
"github.com/ccfos/nightingale/v6/tdengine"
"github.com/flashcatcloud/ibex/src/cmd/ibex"
)
func Initialize(configDir string, cryptoKey string) (func(), error) {
@@ -40,7 +39,6 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
if len(config.CenterApi.Addrs) < 1 {
return nil, errors.New("failed to init config: the CenterApi configuration is missing")
}
ctx := ctx.NewContext(context.Background(), nil, false, config.CenterApi)
var redis storage.Redis
redis, err = storage.NewRedis(config.Redis)
@@ -48,6 +46,8 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
return nil, err
}
ctx := ctx.NewContext(context.Background(), nil, redis, false, config.CenterApi)
syncStats := memsto.NewSyncStats()
targetCache := memsto.NewTargetCache(ctx, syncStats, redis)
@@ -82,12 +82,12 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
alertrtRouter.Config(r)
if config.Ibex.Enable {
ibex.ServerStart(false, nil, redis, config.HTTP.APIForService.BasicAuth, config.Alert.Heartbeat, &config.CenterApi, r, nil, config.Ibex, config.HTTP.Port)
ibex.ServerStart(ctx, false, nil, redis, config.HTTP.APIForService.BasicAuth, config.Alert.Heartbeat, &config.CenterApi, r, nil, config.Ibex, config.HTTP.Port)
}
}
dumper.ConfigRouter(r)
httpClean := httpx.Init(config.HTTP, r)
httpClean := httpx.Init(config.HTTP, context.Background(), r)
return func() {
logxClean()

119
cmd/ibex/main.go Normal file
View File

@@ -0,0 +1,119 @@
package main
import (
"fmt"
"os"
"github.com/ccfos/nightingale/v6/ibex/agentd"
"github.com/ccfos/nightingale/v6/ibex/server"
"github.com/toolkits/pkg/net/tcpx"
"github.com/toolkits/pkg/runner"
"github.com/urfave/cli/v2"
)
// VERSION go build -ldflags "-X main.VERSION=x.x.x"
var VERSION = "not specified"
func main() {
app := cli.NewApp()
app.Name = "ibex"
app.Version = VERSION
app.Usage = "Ibex, running scripts on large scale machines"
app.Commands = []*cli.Command{
newCenterServerCmd(),
newEdgeServerCmd(),
newAgentdCmd(),
}
app.Run(os.Args)
}
func newCenterServerCmd() *cli.Command {
return &cli.Command{
Name: "server",
Usage: "Run server",
Flags: []cli.Flag{
&cli.StringFlag{
Name: "conf",
Aliases: []string{"c"},
Usage: "specify configuration file(.json,.yaml,.toml)",
},
},
Action: func(c *cli.Context) error {
printEnv()
tcpx.WaitHosts()
var opts []server.ServerOption
if c.String("conf") != "" {
opts = append(opts, server.SetConfigFile(c.String("conf")))
}
opts = append(opts, server.SetVersion(VERSION))
server.Run(true, opts...)
return nil
},
}
}
func newEdgeServerCmd() *cli.Command {
return &cli.Command{
Name: "edge server",
Usage: "Run edge server",
Flags: []cli.Flag{
&cli.StringFlag{
Name: "conf",
Aliases: []string{"c"},
Usage: "specify configuration file(.json,.yaml,.toml)",
},
},
Action: func(c *cli.Context) error {
printEnv()
tcpx.WaitHosts()
var opts []server.ServerOption
if c.String("conf") != "" {
opts = append(opts, server.SetConfigFile(c.String("conf")))
}
opts = append(opts, server.SetVersion(VERSION))
server.Run(false, opts...)
return nil
},
}
}
func newAgentdCmd() *cli.Command {
return &cli.Command{
Name: "agentd",
Usage: "Run agentd",
Flags: []cli.Flag{
&cli.StringFlag{
Name: "conf",
Aliases: []string{"c"},
Usage: "specify configuration file(.json,.yaml,.toml)",
},
},
Action: func(c *cli.Context) error {
printEnv()
var opts []agentd.AgentdOption
if c.String("conf") != "" {
opts = append(opts, agentd.SetConfigFile(c.String("conf")))
}
opts = append(opts, agentd.SetVersion(VERSION))
agentd.Run(opts...)
return nil
},
}
}
func printEnv() {
runner.Init()
fmt.Println("runner.cwd:", runner.Cwd)
fmt.Println("runner.hostname:", runner.Hostname)
fmt.Println("runner.fd_limits:", runner.FdLimits())
fmt.Println("runner.vm_limits:", runner.VMLimits())
}

View File

@@ -561,7 +561,7 @@ CREATE TABLE alert_cur_event (
target_note varchar(191) not null default '' ,
first_trigger_time bigint,
trigger_time bigint not null,
trigger_value varchar(2048) not null,
trigger_value varchar(255) not null,
annotations text not null ,
rule_config text not null ,
tags varchar(1024) not null default '' ,
@@ -621,7 +621,7 @@ CREATE TABLE alert_his_event (
target_note varchar(191) not null default '' ,
first_trigger_time bigint,
trigger_time bigint not null,
trigger_value varchar(2048) not null,
trigger_value varchar(255) not null,
recover_time bigint not null default 0,
last_eval_time bigint not null default 0 ,
tags varchar(1024) not null default '' ,

View File

@@ -453,7 +453,7 @@ CREATE TABLE `alert_cur_event` (
`target_note` varchar(191) not null default '' comment 'target note',
`first_trigger_time` bigint,
`trigger_time` bigint not null,
`trigger_value` text not null,
`trigger_value` varchar(255) not null,
`annotations` text not null comment 'annotations',
`rule_config` text not null comment 'annotations',
`tags` varchar(1024) not null default '' comment 'merge data_tags rule_tags, split by ,,',
@@ -493,7 +493,7 @@ CREATE TABLE `alert_his_event` (
`target_note` varchar(191) not null default '' comment 'target note',
`first_trigger_time` bigint,
`trigger_time` bigint not null,
`trigger_value` text not null,
`trigger_value` varchar(255) not null,
`recover_time` bigint not null default 0,
`last_eval_time` bigint not null default 0 comment 'for time filter',
`tags` varchar(1024) not null default '' comment 'merge data_tags rule_tags, split by ,,',
@@ -528,7 +528,6 @@ CREATE TABLE `builtin_components` (
CREATE TABLE `builtin_payloads` (
`id` bigint(20) NOT NULL AUTO_INCREMENT COMMENT '''unique identifier''',
`component_id` bigint(20) NOT NULL DEFAULT 0 COMMENT 'component_id',
`uuid` bigint(20) NOT NULL COMMENT '''uuid of payload''',
`type` varchar(191) NOT NULL COMMENT '''type of payload''',
`component` varchar(191) NOT NULL COMMENT '''component of payload''',

View File

@@ -389,7 +389,7 @@ CREATE TABLE `alert_cur_event` (
`target_note` varchar(191) not null default '',
`first_trigger_time` bigint,
`trigger_time` bigint not null,
`trigger_value` varchar(2048) not null,
`trigger_value` varchar(255) not null,
`annotations` text not null,
`rule_config` text not null,
`tags` varchar(1024) not null default ''
@@ -427,7 +427,7 @@ CREATE TABLE `alert_his_event` (
`target_note` varchar(191) not null default '',
`first_trigger_time` bigint,
`trigger_time` bigint not null,
`trigger_value` varchar(2048) not null,
`trigger_value` varchar(255) not null,
`recover_time` bigint not null default 0,
`last_eval_time` bigint not null default 0,
`tags` varchar(1024) not null default '',

View File

@@ -103,17 +103,4 @@ CREATE TABLE notification_record (
/* v7.3.0 2024-08-26 */
ALTER TABLE `target` ADD COLUMN `host_tags` TEXT COMMENT 'global labels set in conf file';
/* v7.3.4 2024-08-28 */
ALTER TABLE `builtin_payloads` ADD COLUMN `component_id` bigint(20) NOT NULL DEFAULT 0 COMMENT 'component_id';
/* v7.4.0 2024-09-20 */
CREATE TABLE `target_busi_group` (
`id` bigint NOT NULL AUTO_INCREMENT,
`target_ident` varchar(191) NOT NULL,
`group_id` bigint NOT NULL,
`update_at` bigint NOT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `idx_target_group` (`target_ident`,`group_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
ALTER TABLE `target` ADD COLUMN `host_tags` TEXT COMMENT 'global labels set in conf file';

38
etc/ibex/agentd.toml Normal file
View File

@@ -0,0 +1,38 @@
# debug, release
RunMode = "debug"
# task meta storage dir
MetaDir = "./meta"
[HTTP]
Enable = true
# http listening address
Host = "0.0.0.0"
# http listening port
Port = 2090
# https cert file path
CertFile = ""
# https key file path
KeyFile = ""
# whether print access log
PrintAccessLog = true
# whether enable pprof
PProf = false
# http graceful shutdown timeout, unit: s
ShutdownTimeout = 30
# max content length: 64M
MaxContentLength = 67108864
# http server read timeout, unit: s
ReadTimeout = 20
# http server write timeout, unit: s
WriteTimeout = 40
# http server idle timeout, unit: s
IdleTimeout = 120
[Heartbeat]
# unit: ms
Interval = 1000
# rpc servers
Servers = ["127.0.0.1:20090"]
# $ip or $hostname or specified string
Host = "$hostname"

View File

@@ -0,0 +1,20 @@
[Unit]
Description="ibex-agentd"
After=network.target
[Service]
Type=simple
ExecStart=/root/gopath/ibex/ibex agentd
WorkingDirectory=/root/gopath/ibex
Restart=on-failure
SuccessExitStatus=0
LimitNOFILE=65536
StandardOutput=syslog
StandardError=syslog
SyslogIdentifier=ibex-agentd
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,20 @@
[Unit]
Description="ibex-server"
After=network.target
[Service]
Type=simple
ExecStart=/root/gopath/ibex/ibex server
WorkingDirectory=/root/gopath/ibex
Restart=on-failure
SuccessExitStatus=0
LimitNOFILE=65536
StandardOutput=syslog
StandardError=syslog
SyslogIdentifier=ibex-server
[Install]
WantedBy=multi-user.target

86
etc/ibex/server.toml Normal file
View File

@@ -0,0 +1,86 @@
# debug, release
RunMode = "debug"
[Log]
# log write dir
Dir = "logs-server"
# log level: DEBUG INFO WARNING ERROR
Level = "DEBUG"
# stdout, stderr, file
Output = "stdout"
# # rotate by time
# KeepHours: 4
# # rotate by size
# RotateNum = 3
# # unit: MB
# RotateSize = 256
[HTTP]
Enable = true
# http listening address
Host = "0.0.0.0"
# http listening port
Port = 10090
# https cert file path
CertFile = ""
# https key file path
KeyFile = ""
# whether print access log
PrintAccessLog = true
# whether enable pprof
PProf = false
# http graceful shutdown timeout, unit: s
ShutdownTimeout = 30
# max content length: 64M
MaxContentLength = 67108864
# http server read timeout, unit: s
ReadTimeout = 20
# http server write timeout, unit: s
WriteTimeout = 40
# http server idle timeout, unit: s
IdleTimeout = 120
[BasicAuth]
# using when call apis
ibex = "ibex"
[RPC]
Listen = "0.0.0.0:20090"
[Heartbeat]
# auto detect if blank
IP = ""
# unit: ms
Interval = 1000
[Output]
# database | remote
ComeFrom = "database"
AgtdPort = 2090
[DB]
# postgres: host=%s port=%s user=%s dbname=%s password=%s sslmode=%s
# postgres: DSN="host=127.0.0.1 port=5432 user=root dbname=n9e_v6 password=1234 sslmode=disable"
DSN="root:1234@tcp(127.0.0.1:3306)/ibex?charset=utf8mb4&parseTime=True&loc=Local&allowNativePasswords=true"
# enable debug mode or not
Debug = false
# mysql postgres
DBType = "mysql"
# unit: s
MaxLifetime = 7200
# max open connections
MaxOpenConns = 150
# max idle connections
MaxIdleConns = 50
# table prefix
TablePrefix = ""
[Redis]
# address, ip:port or ip1:port,ip2:port for cluster and sentinel(SentinelAddrs)
Address = "127.0.0.1:6379"
# Username = ""
# Password = ""
# DB = 0
# UseTLS = false
# TLSMinVersion = "1.2"
# standalone cluster sentinel

12
go.mod
View File

@@ -3,12 +3,11 @@ module github.com/ccfos/nightingale/v6
go 1.18
require (
github.com/BurntSushi/toml v0.3.1
github.com/BurntSushi/toml v1.3.2
github.com/coreos/go-oidc v2.2.1+incompatible
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc
github.com/dgrijalva/jwt-go v3.2.0+incompatible
github.com/expr-lang/expr v1.16.1
github.com/flashcatcloud/ibex v1.3.5
github.com/gin-contrib/pprof v1.4.0
github.com/gin-gonic/gin v1.9.1
github.com/go-ldap/ldap/v3 v3.4.4
@@ -34,6 +33,7 @@ require (
github.com/spaolacci/murmur3 v1.1.0
github.com/tidwall/gjson v1.14.0
github.com/toolkits/pkg v1.3.6
github.com/urfave/cli/v2 v2.27.4
golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1
golang.org/x/oauth2 v0.10.0
gopkg.in/gomail.v2 v2.0.0-20160411212932-81ebce5c23df
@@ -44,6 +44,12 @@ require (
gorm.io/gorm v1.25.7-0.20240204074919-46816ad31dde
)
require (
github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect
)
require (
github.com/Azure/go-ntlmssp v0.0.0-20220621081337-cb9428e4ac1e // indirect
github.com/beorn7/perks v1.0.1 // indirect
@@ -90,7 +96,7 @@ require (
github.com/tidwall/match v1.1.1 // indirect
github.com/tidwall/pretty v1.2.0 // indirect
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
github.com/ugorji/go/codec v1.2.11 // indirect
github.com/ugorji/go/codec v1.2.11
go.uber.org/atomic v1.11.0 // indirect
go.uber.org/automaxprocs v1.5.2 // indirect
golang.org/x/arch v0.3.0 // indirect

13
go.sum
View File

@@ -5,8 +5,9 @@ github.com/Azure/azure-sdk-for-go/sdk/internal v1.3.0 h1:sXr+ck84g/ZlZUOZiNELInm
github.com/Azure/go-ntlmssp v0.0.0-20220621081337-cb9428e4ac1e h1:NeAW1fUYUEWhft7pkxDf6WoUvEZJ/uOKsvtpjLnn8MU=
github.com/Azure/go-ntlmssp v0.0.0-20220621081337-cb9428e4ac1e/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU=
github.com/AzureAD/microsoft-authentication-library-for-go v1.0.0 h1:OBhqkivkhkMqLPymWEppkm7vgPQY2XsHoEkaMQ0AdZY=
github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/toml v1.3.2 h1:o7IhLm0Msx3BaB+n3Ag7L8EVlByGnpq14C4YWiu/gL8=
github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
github.com/Masterminds/semver/v3 v3.1.1 h1:hLg3sBzpNErnxhQtUy/mmLR2I9foDujNK030IGemrRc=
github.com/Masterminds/semver/v3 v3.1.1/go.mod h1:VPu/7SZ7ePZ3QOrcuXROw5FAcLl4a0cBrbBpGY/8hQs=
github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137 h1:s6gZFSlWYmbqAuRjVTiNNhvNRfY2Wxp9nhfyel4rklc=
@@ -29,6 +30,8 @@ github.com/coreos/go-oidc v2.2.1+incompatible h1:mh48q/BqXqgjVHpy2ZY7WnWAbenxRjs
github.com/coreos/go-oidc v2.2.1+incompatible/go.mod h1:CgnwVTmzoESiwO9qyAFEMiHoZ1nMCKZlZ9V6mm3/LKc=
github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
github.com/cpuguy83/go-md2man/v2 v2.0.4 h1:wfIWP927BUkWJb2NmU/kNDYIBTh/ziUX91+lVfRxZq4=
github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@@ -47,8 +50,6 @@ github.com/fatih/camelcase v1.0.0 h1:hxNvNX/xYBp0ovncs8WyWZrOrpBNub/JfaMvbURyft8
github.com/fatih/camelcase v1.0.0/go.mod h1:yN2Sb0lFhZJUdVvtELVWefmrXpuZESvPmqwoZc+/fpc=
github.com/fatih/structs v1.1.0 h1:Q7juDM0QtcnhCpeyLGQKyg4TOIghuNXrkL32pHAUMxo=
github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M=
github.com/flashcatcloud/ibex v1.3.5 h1:8GOOf5+aJT0TP/MC6izz7CO5JKJSdKVFBwL0vQp93Nc=
github.com/flashcatcloud/ibex v1.3.5/go.mod h1:T8hbMUySK2q6cXUaYp0AUVeKkU9Od2LjzwmB5lmTRBM=
github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU=
github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA=
github.com/garyburd/redigo v1.6.2/go.mod h1:NR3MbYisc3/PwhQ00EMzDiPmrwpPxAn5GI05/YaO1SY=
@@ -262,6 +263,8 @@ github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjR
github.com/rs/xid v1.2.1/go.mod h1:+uKXf+4Djp6Md1KODXJxgGQPKngRmWyn10oCKFzNHOQ=
github.com/rs/zerolog v1.13.0/go.mod h1:YbFCdg8HfsridGWAh22vktObvhZbQsZXe4/zB0OKkWU=
github.com/rs/zerolog v1.15.0/go.mod h1:xYTKnLHcpfU2225ny5qZjxnj9NvkumZYjJHlAThCjNc=
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0=
github.com/shopspring/decimal v0.0.0-20180709203117-cd690d0c9e24/go.mod h1:M+9NzErvs504Cn4c5DxATwIqPbtswREoFCre64PpcG4=
github.com/shopspring/decimal v1.2.0 h1:abSATXmQEYyShuxI4/vyW3tV1MrKAJzCZ/0zLUXYbsQ=
@@ -302,6 +305,10 @@ github.com/ugorji/go v1.2.7/go.mod h1:nF9osbDWLy6bDVv/Rtoh6QgnvNDpmCalQV5urGCCS6
github.com/ugorji/go/codec v1.2.7/go.mod h1:WGN1fab3R1fzQlVQTkfxVtIBhWDRqOviHU95kRgeqEY=
github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU=
github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
github.com/urfave/cli/v2 v2.27.4 h1:o1owoI+02Eb+K107p27wEX9Bb8eqIoZCfLXloLUSWJ8=
github.com/urfave/cli/v2 v2.27.4/go.mod h1:m4QzxcD2qpra4z7WhzEGn74WZLViBnMpb1ToCAKdGRQ=
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4=
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=

117
ibex/agentd/agentd.go Normal file
View File

@@ -0,0 +1,117 @@
package agentd
import (
"context"
"fmt"
"log"
"os"
"os/signal"
"path/filepath"
"syscall"
"github.com/toolkits/pkg/i18n"
"github.com/ccfos/nightingale/v6/ibex/agentd/config"
"github.com/ccfos/nightingale/v6/ibex/agentd/router"
"github.com/ccfos/nightingale/v6/ibex/agentd/timer"
"github.com/ccfos/nightingale/v6/pkg/httpx"
)
type Agentd struct {
ConfigFile string
Version string
}
type AgentdOption func(*Agentd)
func SetConfigFile(f string) AgentdOption {
return func(s *Agentd) {
s.ConfigFile = f
}
}
func SetVersion(v string) AgentdOption {
return func(s *Agentd) {
s.Version = v
}
}
// Run run agentd
func Run(opts ...AgentdOption) {
code := 1
sc := make(chan os.Signal, 1)
signal.Notify(sc, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT)
agentd := Agentd{
ConfigFile: filepath.Join("etc", "ibex", "agentd.toml"),
Version: "not specified",
}
for _, opt := range opts {
opt(&agentd)
}
cleanFunc, err := agentd.initialize()
if err != nil {
fmt.Println("agentd init fail:", err)
os.Exit(code)
}
EXIT:
for {
sig := <-sc
fmt.Println("received signal:", sig.String())
switch sig {
case syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGINT:
code = 0
break EXIT
case syscall.SIGHUP:
// reload configuration?
default:
break EXIT
}
}
cleanFunc()
fmt.Println("agentd exited")
os.Exit(code)
}
func (s Agentd) initialize() (func(), error) {
fns := Functions{}
ctx, cancel := context.WithCancel(context.Background())
fns.Add(cancel)
log.SetFlags(log.Ldate | log.Ltime | log.Lshortfile)
// parse config file
config.MustLoad(s.ConfigFile)
// init i18n
i18n.Init()
// init http server
r := router.New(s.Version)
httpClean := httpx.Init(config.C.HTTP, ctx, r)
fns.Add(httpClean)
go timer.Heartbeat(ctx)
return fns.Ret(), nil
}
type Functions struct {
List []func()
}
func (fs *Functions) Add(f func()) {
fs.List = append(fs.List, f)
}
func (fs *Functions) Ret() func() {
return func() {
for i := 0; i < len(fs.List); i++ {
fs.List[i]()
}
}
}

110
ibex/agentd/client/cli.go Normal file
View File

@@ -0,0 +1,110 @@
package client
import (
"bufio"
"io"
"log"
"net"
"net/rpc"
"reflect"
"time"
"github.com/toolkits/pkg/net/gobrpc"
"github.com/ugorji/go/codec"
"github.com/ccfos/nightingale/v6/ibex/agentd/config"
)
var cli *gobrpc.RPCClient
func getCli() *gobrpc.RPCClient {
if cli != nil {
return cli
}
// detect the fastest server
var (
address string
client *rpc.Client
duration int64 = 999999999999
)
// auto close other slow server
acm := make(map[string]*rpc.Client)
l := len(config.C.Heartbeat.Servers)
for i := 0; i < l; i++ {
addr := config.C.Heartbeat.Servers[i]
begin := time.Now()
conn, err := net.DialTimeout("tcp", addr, time.Second*5)
if err != nil {
log.Printf("W: dial %s fail: %s", addr, err)
continue
}
var bufConn = struct {
io.Closer
*bufio.Reader
*bufio.Writer
}{conn, bufio.NewReader(conn), bufio.NewWriter(conn)}
var mh codec.MsgpackHandle
mh.MapType = reflect.TypeOf(map[string]interface{}(nil))
rpcCodec := codec.MsgpackSpecRpc.ClientCodec(bufConn, &mh)
c := rpc.NewClientWithCodec(rpcCodec)
acm[addr] = c
var out string
err = c.Call("Server.Ping", "", &out)
if err != nil {
log.Printf("W: ping %s fail: %s", addr, err)
continue
}
use := time.Since(begin).Nanoseconds()
if use < duration {
address = addr
client = c
duration = use
}
}
if address == "" {
log.Println("E: no job server found")
return nil
}
log.Printf("I: choose server: %s, duration: %dms", address, duration/1000000)
for addr, c := range acm {
if addr == address {
continue
}
c.Close()
}
cli = gobrpc.NewRPCClient(address, client, 5*time.Second)
return cli
}
// GetCli 探测所有server端的延迟自动选择最快的
func GetCli() *gobrpc.RPCClient {
for {
c := getCli()
if c != nil {
return c
}
time.Sleep(time.Second * 10)
}
}
// CloseCli 关闭客户端连接
func CloseCli() {
if cli != nil {
cli.Close()
cli = nil
}
}

View File

@@ -0,0 +1,31 @@
package client
import (
"fmt"
"log"
"github.com/ccfos/nightingale/v6/ibex/types"
)
// Meta 从Server端获取任务元信息
func Meta(id int64) (script string, args string, account string, stdin string, err error) {
var resp types.TaskMetaResponse
err = GetCli().Call("Server.GetTaskMeta", id, &resp)
if err != nil {
log.Println("E: rpc call Server.GetTaskMeta:", err)
CloseCli()
return
}
if resp.Message != "" {
log.Println("E: rpc call Server.GetTaskMeta:", resp.Message)
err = fmt.Errorf(resp.Message)
return
}
script = resp.Script
args = resp.Args
account = resp.Account
stdin = resp.Stdin
return
}

View File

@@ -0,0 +1,140 @@
package config
import (
"fmt"
"log"
"net"
"os"
"strings"
"sync"
"github.com/koding/multiconfig"
"github.com/toolkits/pkg/file"
"github.com/ccfos/nightingale/v6/pkg/httpx"
)
var (
C = new(Config)
once sync.Once
)
func MustLoad(fpaths ...string) {
once.Do(func() {
loaders := []multiconfig.Loader{
&multiconfig.TagLoader{},
&multiconfig.EnvironmentLoader{},
}
for _, fpath := range fpaths {
handled := false
if strings.HasSuffix(fpath, "toml") {
loaders = append(loaders, &multiconfig.TOMLLoader{Path: fpath})
handled = true
}
if strings.HasSuffix(fpath, "conf") {
loaders = append(loaders, &multiconfig.TOMLLoader{Path: fpath})
handled = true
}
if strings.HasSuffix(fpath, "json") {
loaders = append(loaders, &multiconfig.JSONLoader{Path: fpath})
handled = true
}
if strings.HasSuffix(fpath, "yaml") {
loaders = append(loaders, &multiconfig.YAMLLoader{Path: fpath})
handled = true
}
if !handled {
fmt.Println("config file invalid, valid file exts: .conf,.yaml,.toml,.json")
os.Exit(1)
}
}
m := multiconfig.DefaultLoader{
Loader: multiconfig.MultiLoader(loaders...),
Validator: multiconfig.MultiValidator(&multiconfig.RequiredValidator{}),
}
m.MustLoad(C)
if C.Heartbeat.Host == "" {
fmt.Println("heartbeat.host is blank")
os.Exit(1)
}
if C.Heartbeat.Host == "$ip" {
C.Heartbeat.Endpoint = fmt.Sprint(GetOutboundIP())
if C.Heartbeat.Endpoint == "" {
fmt.Println("ip auto got is blank")
os.Exit(1)
}
fmt.Println("host.ip:", C.Heartbeat.Endpoint)
}
host, err := C.GetHost()
if err != nil {
log.Println("E: failed to GetHost:", err)
os.Exit(1)
}
fmt.Println("host:", host)
if C.MetaDir == "" {
C.MetaDir = "./meta"
}
C.MetaDir, err = file.RealPath(C.MetaDir)
if err != nil {
log.Println("E: failed to get real path of MetaDir:", err)
os.Exit(1)
}
file.EnsureDir(C.MetaDir)
file.EnsureDirRW(C.MetaDir)
})
}
type Config struct {
RunMode string
MetaDir string
Heartbeat Heartbeat
HTTP httpx.Config
}
type Heartbeat struct {
Interval int64
Servers []string
Host string
Endpoint string
}
func (c *Config) IsDebugMode() bool {
return c.RunMode == "debug"
}
func (c *Config) GetHost() (string, error) {
if c.Heartbeat.Host == "$ip" {
return c.Heartbeat.Endpoint, nil
}
if c.Heartbeat.Host == "$hostname" {
return os.Hostname()
}
return c.Heartbeat.Host, nil
}
// Get preferred outbound ip of this machine
func GetOutboundIP() net.IP {
conn, err := net.Dial("udp", "8.8.8.8:80")
if err != nil {
fmt.Println("auto get outbound ip fail:", err)
os.Exit(1)
}
defer conn.Close()
localAddr := conn.LocalAddr().(*net.UDPAddr)
return localAddr.IP
}

View File

@@ -0,0 +1,60 @@
package router
import (
"fmt"
"os"
"strings"
"github.com/gin-contrib/pprof"
"github.com/gin-gonic/gin"
"github.com/ccfos/nightingale/v6/ibex/agentd/config"
"github.com/ccfos/nightingale/v6/pkg/aop"
)
func New(version string) *gin.Engine {
gin.SetMode(config.C.RunMode)
loggerMid := aop.Logger()
recoveryMid := aop.Recovery()
if strings.ToLower(config.C.RunMode) == "release" {
aop.DisableConsoleColor()
}
r := gin.New()
r.Use(recoveryMid)
// whether print access log
if config.C.HTTP.PrintAccessLog {
r.Use(loggerMid)
}
configRoute(r, version)
return r
}
func configRoute(r *gin.Engine, version string) {
if config.C.HTTP.PProf {
pprof.Register(r, "/debug/pprof")
}
r.GET("/ping", func(c *gin.Context) {
c.String(200, "pong")
})
r.GET("/pid", func(c *gin.Context) {
c.String(200, fmt.Sprintf("%d", os.Getpid()))
})
r.GET("/addr", func(c *gin.Context) {
c.String(200, c.Request.RemoteAddr)
})
r.GET("/version", func(c *gin.Context) {
c.String(200, version)
})
}

View File

@@ -0,0 +1,18 @@
//go:build !windows
// +build !windows
package timer
import (
"os/exec"
"syscall"
)
func CmdStart(cmd *exec.Cmd) error {
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
return cmd.Start()
}
func CmdKill(cmd *exec.Cmd) error {
return syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
}

View File

@@ -0,0 +1,16 @@
//go:build windows
// +build windows
package timer
import (
"os/exec"
)
func CmdStart(cmd *exec.Cmd) error {
return cmd.Start()
}
func CmdKill(cmd *exec.Cmd) error {
return cmd.Process.Kill()
}

View File

@@ -0,0 +1,74 @@
package timer
import (
"context"
"log"
"time"
"github.com/ccfos/nightingale/v6/ibex/agentd/client"
"github.com/ccfos/nightingale/v6/ibex/agentd/config"
"github.com/ccfos/nightingale/v6/ibex/types"
)
func Heartbeat(ctx context.Context) {
interval := time.Duration(config.C.Heartbeat.Interval) * time.Millisecond
for {
select {
case <-ctx.Done():
return
case <-time.After(interval):
heartbeat()
}
}
}
func heartbeat() {
ident, err := config.C.GetHost()
if err != nil {
log.Println("E: GetHost fail:", err)
return
}
req := types.ReportRequest{
Ident: ident,
ReportTasks: Locals.ReportTasks(),
}
var resp types.ReportResponse
err = client.GetCli().Call("Server.Report", req, &resp)
if err != nil {
log.Println("E: rpc call Server.Report fail:", err)
client.CloseCli()
return
}
if resp.Message != "" {
log.Println("E: error from server:", resp.Message)
return
}
assigned := make(map[int64]struct{})
if resp.AssignTasks != nil {
count := len(resp.AssignTasks)
for i := 0; i < count; i++ {
at := resp.AssignTasks[i]
assigned[at.Id] = struct{}{}
Locals.AssignTask(at)
}
}
if len(assigned) > 0 {
log.Println("D: assigned tasks:", mapKeys(assigned))
}
Locals.Clean(assigned)
}
func mapKeys(m map[int64]struct{}) []int64 {
lst := make([]int64, 0, len(m))
for k := range m {
lst = append(lst, k)
}
return lst
}

333
ibex/agentd/timer/task.go Normal file
View File

@@ -0,0 +1,333 @@
package timer
import (
"bytes"
"fmt"
"log"
"os/exec"
"os/user"
"path"
"strings"
"sync"
"github.com/toolkits/pkg/file"
"github.com/toolkits/pkg/runner"
"github.com/toolkits/pkg/sys"
"github.com/ccfos/nightingale/v6/ibex/agentd/client"
"github.com/ccfos/nightingale/v6/ibex/agentd/config"
)
type Task struct {
sync.Mutex
Id int64
Clock int64
Action string
Status string
alive bool
Cmd *exec.Cmd
Stdout bytes.Buffer
Stderr bytes.Buffer
Stdin *bytes.Reader
Args string
Account string
StdinStr string
}
func (t *Task) SetStatus(status string) {
t.Lock()
t.Status = status
t.Unlock()
}
func (t *Task) GetStatus() string {
t.Lock()
s := t.Status
t.Unlock()
return s
}
func (t *Task) GetAlive() bool {
t.Lock()
pa := t.alive
t.Unlock()
return pa
}
func (t *Task) SetAlive(pa bool) {
t.Lock()
t.alive = pa
t.Unlock()
}
func (t *Task) GetStdout() string {
t.Lock()
out := t.Stdout.String()
t.Unlock()
return out
}
func (t *Task) GetStderr() string {
t.Lock()
out := t.Stderr.String()
t.Unlock()
return out
}
func (t *Task) ResetBuff() {
t.Lock()
t.Stdout.Reset()
t.Stderr.Reset()
t.Unlock()
}
func (t *Task) doneBefore() bool {
doneFlag := path.Join(config.C.MetaDir, fmt.Sprint(t.Id), fmt.Sprintf("%d.done", t.Clock))
return file.IsExist(doneFlag)
}
func (t *Task) loadResult() {
metadir := config.C.MetaDir
doneFlag := path.Join(metadir, fmt.Sprint(t.Id), fmt.Sprintf("%d.done", t.Clock))
stdoutFile := path.Join(metadir, fmt.Sprint(t.Id), "stdout")
stderrFile := path.Join(metadir, fmt.Sprint(t.Id), "stderr")
var err error
t.Status, err = file.ReadStringTrim(doneFlag)
if err != nil {
log.Printf("E: read file %s fail %v", doneFlag, err)
}
stdout, err := file.ReadString(stdoutFile)
if err != nil {
log.Printf("E: read file %s fail %v", stdoutFile, err)
}
stderr, err := file.ReadString(stderrFile)
if err != nil {
log.Printf("E: read file %s fail %v", stderrFile, err)
}
t.Stdout = *bytes.NewBufferString(stdout)
t.Stderr = *bytes.NewBufferString(stderr)
}
func (t *Task) prepare() error {
if t.Account != "" {
// already prepared
return nil
}
IdDir := path.Join(config.C.MetaDir, fmt.Sprint(t.Id))
err := file.EnsureDir(IdDir)
if err != nil {
log.Printf("E: mkdir -p %s fail: %v", IdDir, err)
return err
}
writeFlag := path.Join(IdDir, ".write")
if file.IsExist(writeFlag) {
// 从磁盘读取
argsFile := path.Join(IdDir, "args")
args, err := file.ReadStringTrim(argsFile)
if err != nil {
log.Printf("E: read %s fail %v", argsFile, err)
return err
}
accountFile := path.Join(IdDir, "account")
account, err := file.ReadStringTrim(accountFile)
if err != nil {
log.Printf("E: read %s fail %v", accountFile, err)
return err
}
stdinFile := path.Join(IdDir, "stdin")
stdin, err := file.ReadStringTrim(stdinFile)
if err != nil {
log.Printf("E: read %s fail %v", stdinFile, err)
return err
}
t.Args = args
t.Account = account
t.StdinStr = stdin
} else {
// 从远端读取,再写入磁盘
script, args, account, stdin, err := client.Meta(t.Id)
if err != nil {
log.Println("E: query task meta fail:", err)
return err
}
scriptFile := path.Join(IdDir, "script")
_, err = file.WriteString(scriptFile, script)
if err != nil {
log.Printf("E: write script to %s fail: %v", scriptFile, err)
return err
}
out, err := sys.CmdOutTrim("chmod", "+x", scriptFile)
if err != nil {
log.Printf("E: chmod +x %s fail %v. output: %s", scriptFile, err, out)
return err
}
argsFile := path.Join(IdDir, "args")
_, err = file.WriteString(argsFile, args)
if err != nil {
log.Printf("E: write args to %s fail: %v", argsFile, err)
return err
}
accountFile := path.Join(IdDir, "account")
_, err = file.WriteString(accountFile, account)
if err != nil {
log.Printf("E: write account to %s fail: %v", accountFile, err)
return err
}
stdinFile := path.Join(IdDir, "stdin")
_, err = file.WriteString(stdinFile, stdin)
if err != nil {
log.Printf("E: write tags to %s fail: %v", stdinFile, err)
return err
}
_, err = file.WriteString(writeFlag, "")
if err != nil {
log.Printf("E: create %s flag file fail: %v", writeFlag, err)
return err
}
t.Args = args
t.Account = account
t.StdinStr = stdin
}
t.Stdin = bytes.NewReader([]byte(t.StdinStr))
return nil
}
func (t *Task) start() {
if t.GetAlive() {
return
}
err := t.prepare()
if err != nil {
return
}
args := t.Args
if args != "" {
args = strings.Replace(args, ",,", "' '", -1)
args = "'" + args + "'"
}
scriptFile := path.Join(config.C.MetaDir, fmt.Sprint(t.Id), "script")
if !path.IsAbs(scriptFile) {
scriptFile = path.Join(runner.Cwd, scriptFile)
}
sh := fmt.Sprintf("%s %s", scriptFile, args)
var cmd *exec.Cmd
loginUser, err := user.Current()
if err != nil {
log.Println("E: cannot get current login user:", err)
return
}
if loginUser.Username == "root" {
// current login user is root
if t.Account == "root" {
cmd = exec.Command("sh", "-c", sh)
cmd.Dir = loginUser.HomeDir
} else {
cmd = exec.Command("su", "-c", sh, "-", t.Account)
}
} else {
// current login user not root
cmd = exec.Command("sh", "-c", sh)
cmd.Dir = loginUser.HomeDir
}
cmd.Stdout = &t.Stdout
cmd.Stderr = &t.Stderr
cmd.Stdin = t.Stdin
t.Cmd = cmd
err = CmdStart(cmd)
if err != nil {
log.Printf("E: cannot start cmd of task[%d]: %v", t.Id, err)
return
}
go runProcess(t)
}
func (t *Task) kill() {
go killProcess(t)
}
func runProcess(t *Task) {
t.SetAlive(true)
defer t.SetAlive(false)
err := t.Cmd.Wait()
if err != nil {
if strings.Contains(err.Error(), "signal: killed") {
t.SetStatus("killed")
log.Printf("D: process of task[%d] killed", t.Id)
} else if strings.Contains(err.Error(), "signal: terminated") {
// kill children process manually
t.SetStatus("killed")
log.Printf("D: process of task[%d] terminated", t.Id)
} else {
t.SetStatus("failed")
log.Printf("D: process of task[%d] return error: %v", t.Id, err)
}
} else {
t.SetStatus("success")
log.Printf("D: process of task[%d] done", t.Id)
}
persistResult(t)
}
func persistResult(t *Task) {
metadir := config.C.MetaDir
stdout := path.Join(metadir, fmt.Sprint(t.Id), "stdout")
stderr := path.Join(metadir, fmt.Sprint(t.Id), "stderr")
doneFlag := path.Join(metadir, fmt.Sprint(t.Id), fmt.Sprintf("%d.done", t.Clock))
file.WriteString(stdout, t.GetStdout())
file.WriteString(stderr, t.GetStderr())
file.WriteString(doneFlag, t.GetStatus())
}
func killProcess(t *Task) {
t.SetAlive(true)
defer t.SetAlive(false)
log.Printf("D: begin kill process of task[%d]", t.Id)
err := CmdKill(t.Cmd)
if err != nil {
t.SetStatus("killfailed")
log.Printf("D: kill process of task[%d] fail: %v", t.Id, err)
} else {
t.SetStatus("killed")
log.Printf("D: process of task[%d] killed", t.Id)
}
persistResult(t)
}

120
ibex/agentd/timer/tasks.go Normal file
View File

@@ -0,0 +1,120 @@
package timer
import (
"log"
"github.com/ccfos/nightingale/v6/ibex/types"
)
type LocalTasksT struct {
M map[int64]*Task
}
var Locals = &LocalTasksT{M: make(map[int64]*Task)}
func (lt *LocalTasksT) ReportTasks() []types.ReportTask {
ret := make([]types.ReportTask, 0, len(lt.M))
for id, t := range lt.M {
rt := types.ReportTask{Id: id, Clock: t.Clock}
rt.Status = t.GetStatus()
if rt.Status == "running" || rt.Status == "killing" {
// intermediate state
continue
}
rt.Stdout = t.GetStdout()
rt.Stderr = t.GetStderr()
stdoutLen := len(rt.Stdout)
stderrLen := len(rt.Stderr)
// 输出太长的话,截断,要不然把数据库撑爆了
if stdoutLen > 65535 {
start := stdoutLen - 65535
rt.Stdout = rt.Stdout[start:]
}
if stderrLen > 65535 {
start := stderrLen - 65535
rt.Stderr = rt.Stderr[start:]
}
ret = append(ret, rt)
}
return ret
}
func (lt *LocalTasksT) GetTask(id int64) (*Task, bool) {
t, found := lt.M[id]
return t, found
}
func (lt *LocalTasksT) SetTask(t *Task) {
lt.M[t.Id] = t
}
func (lt *LocalTasksT) AssignTask(at types.AssignTask) {
local, found := lt.GetTask(at.Id)
if found {
if local.Clock == at.Clock && local.Action == at.Action {
// ignore repeat task
return
}
local.Clock = at.Clock
local.Action = at.Action
} else {
if at.Action == "kill" {
// no process in local, no need kill
return
}
local = &Task{
Id: at.Id,
Clock: at.Clock,
Action: at.Action,
}
lt.SetTask(local)
if local.doneBefore() {
local.loadResult()
return
}
}
if local.Action == "kill" {
local.SetStatus("killing")
local.kill()
} else if local.Action == "start" {
local.SetStatus("running")
local.start()
} else {
log.Printf("W: unknown action: %s of task %d", at.Action, at.Id)
}
}
func (lt *LocalTasksT) Clean(assigned map[int64]struct{}) {
del := make(map[int64]struct{})
for id := range lt.M {
if _, found := assigned[id]; !found {
del[id] = struct{}{}
}
}
for id := range del {
// 远端已经不关注这个任务了但是本地来看任务还是running的
// 可能是远端认为超时了,此时本地不能删除,仍然要继续上报
if lt.M[id].GetStatus() == "running" {
continue
}
lt.M[id].ResetBuff()
cmd := lt.M[id].Cmd
delete(lt.M, id)
if cmd != nil && cmd.Process != nil {
cmd.Process.Release()
}
}
}

82
ibex/ibex.go Normal file
View File

@@ -0,0 +1,82 @@
package ibex
import (
"fmt"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"os"
"strings"
"github.com/ccfos/nightingale/v6/ibex/server/config"
"github.com/ccfos/nightingale/v6/ibex/server/router"
"github.com/ccfos/nightingale/v6/ibex/server/rpc"
"github.com/ccfos/nightingale/v6/ibex/server/timer"
"github.com/ccfos/nightingale/v6/storage"
"github.com/ccfos/nightingale/v6/alert/aconf"
n9eRouter "github.com/ccfos/nightingale/v6/center/router"
"github.com/ccfos/nightingale/v6/conf"
n9eConf "github.com/ccfos/nightingale/v6/conf"
"github.com/gin-gonic/gin"
"github.com/redis/go-redis/v9"
"gorm.io/gorm"
)
var (
HttpPort int
)
func ServerStart(ctx *ctx.Context, isCenter bool, db *gorm.DB, rc redis.Cmdable, basicAuth gin.Accounts, heartbeat aconf.HeartbeatConfig,
api *n9eConf.CenterApi, r *gin.Engine, centerRouter *n9eRouter.Router, ibex conf.Ibex, httpPort int) {
config.C.IsCenter = isCenter
config.C.BasicAuth = make(gin.Accounts)
if len(basicAuth) > 0 {
config.C.BasicAuth = basicAuth
}
config.C.Heartbeat.IP = heartbeat.IP
config.C.Heartbeat.Interval = heartbeat.Interval
config.C.Heartbeat.LocalAddr = schedulerAddrGet(ibex.RPCListen)
HttpPort = httpPort
config.C.Output.ComeFrom = ibex.Output.ComeFrom
config.C.Output.AgtdPort = ibex.Output.AgtdPort
rou := router.NewRouter(ctx)
if centerRouter != nil {
rou.ConfigRouter(r, centerRouter)
} else {
rou.ConfigRouter(r)
}
ctx.Redis = rc
if err := storage.IdInit(ctx.Redis); err != nil {
fmt.Println("cannot init id generator: ", err)
os.Exit(1)
}
rpc.Start(ibex.RPCListen, ctx)
if isCenter {
go timer.Heartbeat(ctx)
go timer.Schedule(ctx)
go timer.CleanLong(ctx)
} else {
config.C.CenterApi = *api
}
timer.CacheHostDoing(ctx)
timer.ReportResult(ctx)
}
func schedulerAddrGet(rpcListen string) string {
ip := fmt.Sprint(config.GetOutboundIP())
if ip == "" {
fmt.Println("heartbeat ip auto got is blank")
os.Exit(1)
}
port := strings.Split(rpcListen, ":")[1]
localAddr := ip + ":" + port
return localAddr
}

View File

@@ -0,0 +1,135 @@
package config
import (
"fmt"
"net"
"os"
"strings"
"sync"
"github.com/ccfos/nightingale/v6/pkg/httpx"
"github.com/ccfos/nightingale/v6/pkg/logx"
"github.com/ccfos/nightingale/v6/conf"
"github.com/ccfos/nightingale/v6/pkg/ormx"
"github.com/ccfos/nightingale/v6/storage"
"github.com/gin-gonic/gin"
"github.com/koding/multiconfig"
)
var (
C = new(Config)
once sync.Once
)
func MustLoad(fpaths ...string) {
once.Do(func() {
loaders := []multiconfig.Loader{
&multiconfig.TagLoader{},
&multiconfig.EnvironmentLoader{},
}
for _, fpath := range fpaths {
handled := false
if strings.HasSuffix(fpath, "toml") {
loaders = append(loaders, &multiconfig.TOMLLoader{Path: fpath})
handled = true
}
if strings.HasSuffix(fpath, "conf") {
loaders = append(loaders, &multiconfig.TOMLLoader{Path: fpath})
handled = true
}
if strings.HasSuffix(fpath, "json") {
loaders = append(loaders, &multiconfig.JSONLoader{Path: fpath})
handled = true
}
if strings.HasSuffix(fpath, "yaml") {
loaders = append(loaders, &multiconfig.YAMLLoader{Path: fpath})
handled = true
}
if !handled {
fmt.Println("config file invalid, valid file exts: .conf,.yaml,.toml,.json")
os.Exit(1)
}
}
m := multiconfig.DefaultLoader{
Loader: multiconfig.MultiLoader(loaders...),
Validator: multiconfig.MultiValidator(&multiconfig.RequiredValidator{}),
}
m.MustLoad(C)
if C.Heartbeat.IP == "" {
// auto detect
C.Heartbeat.IP = fmt.Sprint(GetOutboundIP())
if C.Heartbeat.IP == "" {
fmt.Println("heartbeat ip auto got is blank")
os.Exit(1)
}
}
port := strings.Split(C.RPC.Listen, ":")[1]
endpoint := C.Heartbeat.IP + ":" + port
C.Heartbeat.LocalAddr = endpoint
// 正常情况肯定不是127.0.0.1,但是,如果就是单机部署,并且这个机器没有网络,比如本地调试并且本机没网的时候
// if C.Heartbeat.IP == "127.0.0.1" {
// fmt.Println("heartbeat ip is 127.0.0.1 and it is useless, so, exit")
// os.Exit(1)
// }
fmt.Println("heartbeat.ip:", C.Heartbeat.IP)
fmt.Printf("heartbeat.interval: %dms\n", C.Heartbeat.Interval)
})
}
type Config struct {
RunMode string
RPC RPC
Heartbeat Heartbeat
Output Output
IsCenter bool
CenterApi conf.CenterApi
Log logx.Config
HTTP httpx.Config
BasicAuth gin.Accounts
DB ormx.DBConfig
Redis storage.RedisConfig
}
type RPC struct {
Listen string
}
type Heartbeat struct {
IP string
Interval int64
LocalAddr string
}
type Output struct {
ComeFrom string
AgtdPort int
}
func (c *Config) IsDebugMode() bool {
return c.RunMode == "debug"
}
// Get preferred outbound ip of this machine
func GetOutboundIP() net.IP {
conn, err := net.Dial("udp", "8.8.8.8:80")
if err != nil {
fmt.Println("auto get outbound ip fail:", err)
os.Exit(1)
}
defer conn.Close()
localAddr := conn.LocalAddr().(*net.UDPAddr)
return localAddr.IP
}

View File

@@ -0,0 +1,144 @@
package logic
import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/slice"
"github.com/toolkits/pkg/str"
)
func ScheduleTask(ctx *ctx.Context, id int64) {
logger.Debugf("task[%d] scheduling...", id)
count, err := models.WaitingHostCount(ctx, id)
if err != nil {
logger.Errorf("cannot get task[%d] waiting host count: %v", id, err)
return
}
if count == 0 {
cleanDoneTask(ctx, id)
return
}
action, err := models.TaskActionGet(ctx, "id=?", id)
if err != nil {
logger.Errorf("cannot get task[%d] action: %v", id, err)
return
}
if action == nil {
logger.Errorf("[W] no action found of task[%d]", id)
return
}
switch action.Action {
case "start":
startTask(ctx, id, action)
case "pause":
return
case "cancel":
return
case "kill":
return
default:
logger.Errorf("unknown action: %s of task[%d]", action.Action, id)
}
}
func cleanDoneTask(ctx *ctx.Context, id int64) {
ingCount, err := models.IngStatusHostCount(ctx, id)
if err != nil {
logger.Errorf("cannot get task[%d] ing status host count: %v", id, err)
return
}
if ingCount > 0 {
return
}
err = models.CleanDoneTask(ctx, id)
if err != nil {
logger.Errorf("cannot clean done task[%d]: %v", id, err)
}
logger.Debugf("task[%d] done", id)
}
func startTask(ctx *ctx.Context, id int64, action *models.TaskAction) {
meta, err := models.TaskMetaGetByID(ctx, id)
if err != nil {
logger.Errorf("cannot get task[%d] meta: %v", id, err)
return
}
if meta == nil {
logger.Errorf("task[%d] meta lost", id)
return
}
count, err := models.UnexpectedHostCount(ctx, id)
if err != nil {
logger.Errorf("cannot get task[%d] unexpected host count: %v", id, err)
return
}
if count > int64(meta.Tolerance) {
err = action.Update(ctx, "pause")
if err != nil {
logger.Errorf("cannot update task[%d] action to 'pause': %v", id, err)
}
return
}
waitings, err := models.WaitingHostList(ctx, id)
if err != nil {
logger.Errorf("cannot get task[%d] waiting host: %v", id, err)
return
}
waitingsCount := len(waitings)
if waitingsCount == 0 {
return
}
doingsCount, err := models.TableRecordCount(ctx, models.TaskHostDoing{}.TableName(), "id=?", id)
if err != nil {
logger.Errorf("cannot get task[%d] doing host count: %v", id, err)
return
}
need := meta.Batch - int(doingsCount)
if meta.Batch == 0 {
need = waitingsCount
}
if need <= 0 {
return
}
if need > waitingsCount {
need = waitingsCount
}
arr := str.ParseCommaTrim(meta.Pause)
end := need
for i := 0; i < need; i++ {
if slice.ContainsString(arr, waitings[i].Host) {
end = i + 1
err = action.Update(ctx, "pause")
if err != nil {
logger.Errorf("cannot update task[%d] action to 'pause': %v", id, err)
return
}
break
}
}
err = models.RunWaitingHosts(ctx, waitings[:end])
if err != nil {
logger.Errorf("cannot run waiting hosts: %v", err)
}
}

View File

@@ -0,0 +1,45 @@
package logic
import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"time"
"github.com/toolkits/pkg/logger"
)
func CheckTimeout(ctx *ctx.Context, id int64) {
meta, err := models.TaskMetaGetByID(ctx, id)
if err != nil {
logger.Errorf("cannot get task[%d] meta: %v", id, err)
return
}
if meta == nil {
logger.Errorf("task[%d] meta lost", id)
return
}
hosts, err := models.TableRecordGets[[]models.TaskHostDoing](ctx, models.TaskHostDoing{}.TableName(), "id=?", id)
if err != nil {
logger.Errorf("cannot get task[%d] doing host list: %v", id, err)
return
}
count := len(hosts)
if count == 0 {
return
}
// 3s: task dispatch duration: web -> db -> scheduler -> executor
timeout := int64(meta.Timeout + 3)
now := time.Now().Unix()
for i := 0; i < count; i++ {
if now-hosts[i].Clock > timeout {
err = models.MarkDoneStatus(ctx, hosts[i].Id, hosts[i].Clock, hosts[i].Host, "timeout", "", "")
if err != nil {
logger.Errorf("cannot mark task[%d] done status: %v", id, err)
}
}
}
}

View File

@@ -0,0 +1,40 @@
package router
import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"net/http"
"strings"
"github.com/toolkits/pkg/errorx"
)
func TaskMeta(ctx *ctx.Context, id int64) *models.TaskMeta {
obj, err := models.TaskMetaGet(ctx, "id = ?", id)
errorx.Dangerous(err)
if obj == nil {
errorx.Bomb(http.StatusNotFound, "no such task meta")
}
return obj
}
func cleanHosts(formHosts []string) []string {
cnt := len(formHosts)
arr := make([]string, 0, cnt)
for i := 0; i < cnt; i++ {
item := strings.TrimSpace(formHosts[i])
if item == "" {
continue
}
if strings.HasPrefix(item, "#") {
continue
}
arr = append(arr, item)
}
return arr
}

View File

@@ -0,0 +1,612 @@
package router
import (
"fmt"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"strconv"
"io/ioutil"
"net/http"
"time"
"github.com/ccfos/nightingale/v6/ibex/server/config"
"github.com/ccfos/nightingale/v6/storage"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/errorx"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/slice"
"github.com/toolkits/pkg/str"
)
func (rou *Router) taskStdout(c *gin.Context) {
meta := TaskMeta(rou.ctx, UrlParamsInt64(c, "id"))
stdouts, err := meta.Stdouts(rou.ctx)
ginx.NewRender(c).Data(stdouts, err)
}
func (rou *Router) taskStderr(c *gin.Context) {
meta := TaskMeta(rou.ctx, UrlParamsInt64(c, "id"))
stderrs, err := meta.Stderrs(rou.ctx)
ginx.NewRender(c).Data(stderrs, err)
}
// TODO: 不能只判断task_action还应该看所有的host执行情况
func (rou *Router) taskState(c *gin.Context) {
action, err := models.TaskActionGet(rou.ctx, "id=?", UrlParamsInt64(c, "id"))
if err != nil {
ginx.NewRender(c).Data("", err)
return
}
state := "done"
if action != nil {
state = action.Action
}
ginx.NewRender(c).Data(state, err)
}
func (rou *Router) taskResult(c *gin.Context) {
id := UrlParamsInt64(c, "id")
hosts, err := models.TaskHostStatus(rou.ctx, id)
if err != nil {
errorx.Bomb(500, "load task hosts of %d occur error %v", id, err)
}
ss := make(map[string][]string)
total := len(hosts)
for i := 0; i < total; i++ {
s := hosts[i].Status
ss[s] = append(ss[s], hosts[i].Host)
}
ginx.NewRender(c).Data(ss, nil)
}
func (rou *Router) taskHostOutput(c *gin.Context) {
obj, err := models.TaskHostGet(rou.ctx, UrlParamsInt64(c, "id"), ginx.UrlParamStr(c, "host"))
ginx.NewRender(c).Data(obj, err)
}
func (rou *Router) taskHostStdout(c *gin.Context) {
id := UrlParamsInt64(c, "id")
host := ginx.UrlParamStr(c, "host")
if config.C.Output.ComeFrom == "database" || config.C.Output.ComeFrom == "" {
obj, err := models.TaskHostGet(rou.ctx, id, host)
ginx.NewRender(c).Data(obj.Stdout, err)
return
}
if config.C.Output.AgtdPort <= 0 || config.C.Output.AgtdPort > 65535 {
ginx.NewRender(c).Message(fmt.Errorf("remotePort(%d) invalid", config.C.Output.AgtdPort))
return
}
url := fmt.Sprintf("http://%s:%d/output/%d/stdout.json", host, config.C.Output.AgtdPort, id)
client := &http.Client{
CheckRedirect: func(req *http.Request, via []*http.Request) error {
return http.ErrUseLastResponse
},
}
resp, err := client.Get(url)
errorx.Dangerous(err)
defer resp.Body.Close()
bs, err := ioutil.ReadAll(resp.Body)
errorx.Dangerous(err)
c.Writer.Header().Set("Content-Type", "application/json; charset=UTF-8")
c.Writer.Write(bs)
}
func (rou *Router) taskHostStderr(c *gin.Context) {
id := UrlParamsInt64(c, "id")
host := ginx.UrlParamStr(c, "host")
if config.C.Output.ComeFrom == "database" || config.C.Output.ComeFrom == "" {
obj, err := models.TaskHostGet(rou.ctx, id, host)
ginx.NewRender(c).Data(obj.Stderr, err)
return
}
if config.C.Output.AgtdPort <= 0 || config.C.Output.AgtdPort > 65535 {
ginx.NewRender(c).Message(fmt.Errorf("remotePort(%d) invalid", config.C.Output.AgtdPort))
return
}
url := fmt.Sprintf("http://%s:%d/output/%d/stderr.json", host, config.C.Output.AgtdPort, id)
client := &http.Client{
CheckRedirect: func(req *http.Request, via []*http.Request) error {
return http.ErrUseLastResponse
},
}
resp, err := client.Get(url)
errorx.Dangerous(err)
defer resp.Body.Close()
bs, err := ioutil.ReadAll(resp.Body)
errorx.Dangerous(err)
c.Writer.Header().Set("Content-Type", "application/json; charset=UTF-8")
c.Writer.Write(bs)
}
func (rou *Router) taskStdoutTxt(c *gin.Context) {
id := UrlParamsInt64(c, "id")
meta, err := models.TaskMetaGet(rou.ctx, "id = ?", id)
if err != nil {
c.String(500, err.Error())
return
}
if meta == nil {
c.String(404, "no such task")
return
}
stdouts, err := meta.Stdouts(rou.ctx)
if err != nil {
c.String(500, err.Error())
return
}
w := c.Writer
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
count := len(stdouts)
for i := 0; i < count; i++ {
if i != 0 {
w.Write([]byte("\n\n"))
}
w.Write([]byte(stdouts[i].Host + ":\n"))
w.Write([]byte(stdouts[i].Stdout))
}
}
func (rou *Router) taskStderrTxt(c *gin.Context) {
id := UrlParamsInt64(c, "id")
meta, err := models.TaskMetaGet(rou.ctx, "id = ?", id)
if err != nil {
c.String(500, err.Error())
return
}
if meta == nil {
c.String(404, "no such task")
return
}
stderrs, err := meta.Stderrs(rou.ctx)
if err != nil {
c.String(500, err.Error())
return
}
w := c.Writer
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
count := len(stderrs)
for i := 0; i < count; i++ {
if i != 0 {
w.Write([]byte("\n\n"))
}
w.Write([]byte(stderrs[i].Host + ":\n"))
w.Write([]byte(stderrs[i].Stderr))
}
}
type TaskStdoutData struct {
Host string `json:"host"`
Stdout string `json:"stdout"`
}
type TaskStderrData struct {
Host string `json:"host"`
Stderr string `json:"stderr"`
}
func (rou *Router) taskStdoutJSON(c *gin.Context) {
task := TaskMeta(rou.ctx, UrlParamsInt64(c, "id"))
host := ginx.QueryStr(c, "host", "")
var hostsLen int
var ret []TaskStdoutData
if host != "" {
obj, err := models.TaskHostGet(rou.ctx, task.Id, host)
if err != nil {
ginx.NewRender(c).Data("", err)
return
} else if obj == nil {
ginx.NewRender(c).Data("", fmt.Errorf("task: %d, host(%s) not eixsts", task.Id, host))
return
} else {
ret = append(ret, TaskStdoutData{
Host: host,
Stdout: obj.Stdout,
})
}
} else {
hosts, err := models.TaskHostGets(rou.ctx, task.Id)
if err != nil {
ginx.NewRender(c).Data("", err)
return
}
hostsLen = len(hosts)
ret = make([]TaskStdoutData, 0, hostsLen)
for i := 0; i < hostsLen; i++ {
ret = append(ret, TaskStdoutData{
Host: hosts[i].Host,
Stdout: hosts[i].Stdout,
})
}
}
ginx.NewRender(c).Data(ret, nil)
}
func (rou *Router) taskStderrJSON(c *gin.Context) {
task := TaskMeta(rou.ctx, UrlParamsInt64(c, "id"))
host := ginx.QueryStr(c, "host", "")
var hostsLen int
var ret []TaskStderrData
if host != "" {
obj, err := models.TaskHostGet(rou.ctx, task.Id, host)
if err != nil {
ginx.NewRender(c).Data("", err)
return
} else if obj == nil {
ginx.NewRender(c).Data("", fmt.Errorf("task: %d, host(%s) not eixsts", task.Id, host))
return
} else {
ret = append(ret, TaskStderrData{
Host: host,
Stderr: obj.Stderr,
})
}
} else {
hosts, err := models.TaskHostGets(rou.ctx, task.Id)
if err != nil {
ginx.NewRender(c).Data("", err)
return
}
hostsLen = len(hosts)
ret = make([]TaskStderrData, 0, hostsLen)
for i := 0; i < hostsLen; i++ {
ret = append(ret, TaskStderrData{
Host: hosts[i].Host,
Stderr: hosts[i].Stderr,
})
}
}
ginx.NewRender(c).Data(ret, nil)
}
type taskForm struct {
Title string `json:"title" binding:"required"`
Account string `json:"account" binding:"required"`
Batch int `json:"batch"`
Tolerance int `json:"tolerance"`
Timeout int `json:"timeout"`
Pause string `json:"pause"`
Script string `json:"script" binding:"required"`
Args string `json:"args"`
Stdin string `json:"stdin"`
Action string `json:"action" binding:"required"`
Creator string `json:"creator" binding:"required"`
Hosts []string `json:"hosts" binding:"required"`
AlertTriggered bool `json:"alert_triggered"`
}
func (rou *Router) taskAdd(c *gin.Context) {
var f taskForm
ginx.BindJSON(c, &f)
hosts := cleanHosts(f.Hosts)
if len(hosts) == 0 {
errorx.Bomb(http.StatusBadRequest, "arg(hosts) empty")
}
taskMeta := &models.TaskMeta{
Title: f.Title,
Account: f.Account,
Batch: f.Batch,
Tolerance: f.Tolerance,
Timeout: f.Timeout,
Pause: f.Pause,
Script: f.Script,
Args: f.Args,
Stdin: f.Stdin,
Creator: f.Creator,
}
err := taskMeta.CleanFields()
ginx.Dangerous(err)
taskMeta.HandleFH(hosts[0])
authUser := c.MustGet(gin.AuthUserKey).(string)
// 任务类型分为"告警规则触发"和"n9e center用户下发"两种;
// 边缘机房"告警规则触发"的任务不需要规划并且它可能是失联的无法使用db资源所以放入redis缓存中直接下发给agentd执行
if !config.C.IsCenter && f.AlertTriggered {
if err := taskMeta.Create(rou.ctx); err != nil {
// 当网络不连通时生成唯一的id防止边缘机房中不同任务的id相同
// 方法是redis自增id去防止同一个机房的不同n9e edge生成的id相同
// 但没法防止不同边缘机房生成同样的id所以生成id的数据不会上报存入数据库只用于闭环执行。
taskMeta.Id, err = storage.IdGet(rou.ctx.Redis)
ginx.Dangerous(err)
}
if err == nil {
taskHost := models.TaskHost{
Id: taskMeta.Id,
Host: hosts[0],
Status: "running",
}
if err = taskHost.Create(rou.ctx); err != nil {
logger.Warningf("task_add_fail: authUser=%s title=%s err=%s", authUser, taskMeta.Title, err.Error())
}
}
// 缓存任务元信息和待下发的任务
err = taskMeta.Cache(rou.ctx, hosts[0])
ginx.Dangerous(err)
} else {
// 如果是中心机房,还是保持之前的逻辑
err = taskMeta.Save(rou.ctx, hosts, f.Action)
ginx.Dangerous(err)
}
logger.Infof("task_add_succ: authUser=%s title=%s", authUser, taskMeta.Title)
ginx.NewRender(c).Data(taskMeta.Id, err)
}
func (rou *Router) taskGet(c *gin.Context) {
meta := TaskMeta(rou.ctx, UrlParamsInt64(c, "id"))
hosts, err := meta.Hosts(rou.ctx)
errorx.Dangerous(err)
action, err := meta.Action(rou.ctx)
errorx.Dangerous(err)
actionStr := ""
if action != nil {
actionStr = action.Action
} else {
meta.Done = true
}
ginx.NewRender(c).Data(gin.H{
"meta": meta,
"hosts": hosts,
"action": actionStr,
}, nil)
}
// 传进来一堆ids返回已经done的任务的ids
func (rou *Router) doneIds(c *gin.Context) {
ids := ginx.QueryStr(c, "ids", "")
if ids == "" {
errorx.Dangerous("arg(ids) empty")
}
idsint64 := str.IdsInt64(ids, ",")
if len(idsint64) == 0 {
errorx.Dangerous("arg(ids) empty")
}
exists, err := models.TaskActionExistsIds(rou.ctx, idsint64)
errorx.Dangerous(err)
dones := slice.SubInt64(idsint64, exists)
ginx.NewRender(c).Data(gin.H{
"list": dones,
}, nil)
}
func (rou *Router) taskGets(c *gin.Context) {
query := ginx.QueryStr(c, "query", "")
limit := ginx.QueryInt(c, "limit", 20)
creator := ginx.QueryStr(c, "creator", "")
days := ginx.QueryInt64(c, "days", 7)
before := time.Unix(time.Now().Unix()-days*24*3600, 0)
total, err := models.TaskMetaTotal(rou.ctx, creator, query, before)
errorx.Dangerous(err)
list, err := models.TaskMetaGets(rou.ctx, creator, query, before, limit, ginx.Offset(c, limit))
errorx.Dangerous(err)
cnt := len(list)
ids := make([]int64, cnt)
for i := 0; i < cnt; i++ {
ids[i] = list[i].Id
}
exists, err := models.TaskActionExistsIds(rou.ctx, ids)
errorx.Dangerous(err)
for i := 0; i < cnt; i++ {
if slice.ContainsInt64(exists, list[i].Id) {
list[i].Done = false
} else {
list[i].Done = true
}
}
ginx.NewRender(c).Data(gin.H{
"total": total,
"list": list,
}, nil)
}
type actionForm struct {
Action string `json:"action"`
}
func (rou *Router) taskAction(c *gin.Context) {
meta := TaskMeta(rou.ctx, UrlParamsInt64(c, "id"))
var f actionForm
ginx.BindJSON(c, &f)
action, err := models.TaskActionGet(rou.ctx, "id=?", meta.Id)
errorx.Dangerous(err)
if action == nil {
errorx.Bomb(200, "task already finished, no more action can do")
}
ginx.NewRender(c).Message(action.Update(rou.ctx, f.Action))
}
func (rou *Router) taskHostAction(c *gin.Context) {
host := ginx.UrlParamStr(c, "host")
meta := TaskMeta(rou.ctx, UrlParamsInt64(c, "id"))
noopWhenDone(rou.ctx, meta.Id)
var f actionForm
ginx.BindJSON(c, &f)
if f.Action == "ignore" {
errorx.Dangerous(meta.IgnoreHost(rou.ctx, host))
action, err := models.TaskActionGet(rou.ctx, "id=?", meta.Id)
errorx.Dangerous(err)
if action != nil && action.Action == "pause" {
ginx.NewRender(c).Data("you can click start to run the task", nil)
return
}
}
if f.Action == "kill" {
errorx.Dangerous(meta.KillHost(rou.ctx, host))
}
if f.Action == "redo" {
errorx.Dangerous(meta.RedoHost(rou.ctx, host))
}
ginx.NewRender(c).Message(nil)
}
func noopWhenDone(ctx *ctx.Context, id int64) {
action, err := models.TaskActionGet(ctx, "id=?", id)
errorx.Dangerous(err)
if action == nil {
errorx.Bomb(200, "task already finished, no more taskAction can do")
}
}
type sqlCondForm struct {
Table string
Where string
Args []interface{}
}
func (rou *Router) tableRecordListGet(c *gin.Context) {
var f sqlCondForm
ginx.BindJSON(c, &f)
switch f.Table {
case models.TaskHostDoing{}.TableName():
lst, err := models.TableRecordGets[[]models.TaskHostDoing](rou.ctx, f.Table, f.Where, f.Args)
ginx.NewRender(c).Data(lst, err)
case models.TaskMeta{}.TableName():
lst, err := models.TableRecordGets[[]models.TaskMeta](rou.ctx, f.Table, f.Where, f.Args)
ginx.NewRender(c).Data(lst, err)
default:
ginx.Bomb(http.StatusBadRequest, "table[%v] not support", f.Table)
}
}
func (rou *Router) tableRecordCount(c *gin.Context) {
var f sqlCondForm
ginx.BindJSON(c, &f)
ginx.NewRender(c).Data(models.TableRecordCount(rou.ctx, f.Table, f.Where, f.Args))
}
type markDoneForm struct {
Id int64
Clock int64
Host string
Status string
Stdout string
Stderr string
}
func (rou *Router) markDone(c *gin.Context) {
var f markDoneForm
ginx.BindJSON(c, &f)
ginx.NewRender(c).Message(models.MarkDoneStatus(rou.ctx, f.Id, f.Clock, f.Host, f.Status, f.Stdout, f.Stderr))
}
func (rou *Router) taskMetaAdd(c *gin.Context) {
var f models.TaskMeta
ginx.BindJSON(c, &f)
err := f.Create(rou.ctx)
ginx.NewRender(c).Data(f.Id, err)
}
func (rou *Router) taskHostAdd(c *gin.Context) {
var f models.TaskHost
ginx.BindJSON(c, &f)
ginx.NewRender(c).Message(f.Upsert(rou.ctx))
}
func (rou *Router) taskHostUpsert(c *gin.Context) {
var f []models.TaskHost
ginx.BindJSON(c, &f)
ginx.NewRender(c).Data(models.TaskHostUpserts(rou.ctx, f))
}
func UrlParamsInt64(c *gin.Context, field string) int64 {
var params []gin.Param
for _, p := range c.Params {
if p.Key == "id" {
params = append(params, p)
}
}
var strval string
if len(params) == 1 {
strval = ginx.UrlParamStr(c, field)
} else if len(params) == 2 {
strval = params[1].Value
} else {
logger.Warningf("url param[%+v] not ok", params)
errorx.Bomb(http.StatusBadRequest, "url param[%s] is blank", field)
}
intval, err := strconv.ParseInt(strval, 10, 64)
if err != nil {
errorx.Bomb(http.StatusBadRequest, "cannot convert %s to int64", strval)
}
return intval
}

View File

@@ -0,0 +1,132 @@
package router
import (
"fmt"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"os"
"strings"
"github.com/ccfos/nightingale/v6/ibex/server/config"
"github.com/ccfos/nightingale/v6/pkg/aop"
"github.com/ccfos/nightingale/v6/center/router"
"github.com/gin-contrib/pprof"
"github.com/gin-gonic/gin"
)
func New(ctx *ctx.Context, version string) *gin.Engine {
gin.SetMode(config.C.RunMode)
loggerMid := aop.Logger()
recoveryMid := aop.Recovery()
if strings.ToLower(config.C.RunMode) == "release" {
aop.DisableConsoleColor()
}
r := gin.New()
r.Use(recoveryMid)
// whether print access log
if config.C.HTTP.PrintAccessLog {
r.Use(loggerMid)
}
rou := NewRouter(ctx)
rou.configBaseRouter(r, version)
rou.ConfigRouter(r)
return r
}
type Router struct {
ctx *ctx.Context
}
func NewRouter(ctx *ctx.Context) *Router {
return &Router{
ctx: ctx,
}
}
func (rou *Router) configBaseRouter(r *gin.Engine, version string) {
if config.C.HTTP.PProf {
pprof.Register(r, "/debug/pprof")
}
r.GET("/ping", func(c *gin.Context) {
c.String(200, "pong")
})
r.GET("/pid", func(c *gin.Context) {
c.String(200, fmt.Sprintf("%d", os.Getpid()))
})
r.GET("/addr", func(c *gin.Context) {
c.String(200, c.Request.RemoteAddr)
})
r.GET("/version", func(c *gin.Context) {
c.String(200, version)
})
}
func (rou *Router) ConfigRouter(r *gin.Engine, rts ...*router.Router) {
if len(rts) > 0 {
rt := rts[0]
pagesPrefix := "/api/n9e/busi-group/:id"
pages := r.Group(pagesPrefix)
{
pages.GET("/task/:id", rt.Auth(), rt.User(), rt.Perm("/job-tasks"), rou.taskGet)
pages.PUT("/task/:id/action", rt.Auth(), rt.User(), rt.Perm("/job-tasks/put"), rt.Bgrw(), rou.taskAction)
pages.GET("/task/:id/stdout", rt.Auth(), rt.User(), rt.Perm("/job-tasks"), rou.taskStdout)
pages.GET("/task/:id/stderr", rt.Auth(), rt.User(), rt.Perm("/job-tasks"), rou.taskStderr)
pages.GET("/task/:id/state", rt.Auth(), rt.User(), rt.Perm("/job-tasks"), rou.taskState)
pages.GET("/task/:id/result", rt.Auth(), rt.User(), rt.Perm("/job-tasks"), rou.taskResult)
pages.PUT("/task/:id/host/:host/action", rt.Auth(), rt.User(), rt.Perm("/job-tasks/put"), rt.Bgrw(), rou.taskHostAction)
pages.GET("/task/:id/host/:host/output", rt.Auth(), rt.User(), rt.Perm("/job-tasks"), rou.taskHostOutput)
pages.GET("/task/:id/host/:host/stdout", rt.Auth(), rt.User(), rt.Perm("/job-tasks"), rou.taskHostStdout)
pages.GET("/task/:id/host/:host/stderr", rt.Auth(), rt.User(), rt.Perm("/job-tasks"), rou.taskHostStderr)
pages.GET("/task/:id/stdout.txt", rt.Auth(), rt.User(), rt.Perm("/job-tasks"), rou.taskStdoutTxt)
pages.GET("/task/:id/stderr.txt", rt.Auth(), rt.User(), rt.Perm("/job-tasks"), rou.taskStderrTxt)
pages.GET("/task/:id/stdout.json", rt.Auth(), rt.User(), rt.Perm("/job-tasks"), rou.taskStdoutJSON)
pages.GET("/task/:id/stderr.json", rt.Auth(), rt.User(), rt.Perm("/job-tasks"), rou.taskStderrJSON)
}
}
api := r.Group("/ibex/v1")
if len(config.C.BasicAuth) > 0 {
api = r.Group("/ibex/v1", gin.BasicAuth(config.C.BasicAuth))
}
{
api.POST("/tasks", rou.taskAdd)
api.GET("/tasks", rou.taskGets)
api.GET("/tasks/done-ids", rou.doneIds)
api.GET("/task/:id", rou.taskGet)
api.PUT("/task/:id/action", rou.taskAction)
api.GET("/task/:id/stdout", rou.taskStdout)
api.GET("/task/:id/stderr", rou.taskStderr)
api.GET("/task/:id/state", rou.taskState)
api.GET("/task/:id/result", rou.taskResult)
api.PUT("/task/:id/host/:host/action", rou.taskHostAction)
api.GET("/task/:id/host/:host/output", rou.taskHostOutput)
api.GET("/task/:id/host/:host/stdout", rou.taskHostStdout)
api.GET("/task/:id/host/:host/stderr", rou.taskHostStderr)
api.GET("/task/:id/stdout.txt", rou.taskStdoutTxt)
api.GET("/task/:id/stderr.txt", rou.taskStderrTxt)
api.GET("/task/:id/stdout.json", rou.taskStdoutJSON)
api.GET("/task/:id/stderr.json", rou.taskStderrJSON)
// api for edge server
api.POST("/table/record/list", rou.tableRecordListGet)
api.POST("/table/record/count", rou.tableRecordCount)
api.POST("/mark/done", rou.markDone)
api.POST("/task/meta", rou.taskMetaAdd)
api.POST("/task/host/", rou.taskHostAdd)
api.POST("/task/hosts/upsert", rou.taskHostUpsert)
}
}

93
ibex/server/rpc/method.go Normal file
View File

@@ -0,0 +1,93 @@
package rpc
import (
"fmt"
"github.com/ccfos/nightingale/v6/models"
"os"
"github.com/toolkits/pkg/logger"
"github.com/ccfos/nightingale/v6/ibex/types"
)
// Ping return string 'pong', just for test
func (*Server) Ping(input string, output *string) error {
*output = "pong"
return nil
}
func (*Server) GetTaskMeta(id int64, resp *types.TaskMetaResponse) error {
meta, err := models.TaskMetaGetByID(ctxC, id)
if err != nil {
resp.Message = err.Error()
return nil
}
if meta == nil {
resp.Message = fmt.Sprintf("task %d not found", id)
return nil
}
resp.Script = meta.Script
resp.Args = meta.Args
resp.Account = meta.Account
resp.Stdin = meta.Stdin
return nil
}
func (*Server) Report(req types.ReportRequest, resp *types.ReportResponse) error {
if req.ReportTasks != nil && len(req.ReportTasks) > 0 {
err := handleDoneTask(req)
if err != nil {
resp.Message = err.Error()
return nil
}
}
doings := models.GetDoingCache(req.Ident)
tasks := make([]types.AssignTask, 0, len(doings))
for _, doing := range doings {
tasks = append(tasks, types.AssignTask{
Id: doing.Id,
Clock: doing.Clock,
Action: doing.Action,
})
}
resp.AssignTasks = tasks
return nil
}
func handleDoneTask(req types.ReportRequest) error {
count := len(req.ReportTasks)
val, ok := os.LookupEnv("CONTINUOUS_OUTPUT")
for i := 0; i < count; i++ {
t := req.ReportTasks[i]
if ok && val == "1" && t.Status == "running" {
err := models.RealTimeUpdateOutput(ctxC, t.Id, req.Ident, t.Stdout, t.Stderr)
if err != nil {
logger.Errorf("cannot update output, id:%d, hostname:%s, clock:%d, status:%s, err: %v", t.Id, req.Ident, t.Clock, t.Status, err)
return err
}
} else {
if t.Status == "success" || t.Status == "failed" {
exist, isEdgeAlertTriggered := models.CheckExistAndEdgeAlertTriggered(req.Ident, t.Id)
// ibex agent可能会重复上报结果如果任务已经不在task_host_doing缓存中了说明该任务已经MarkDone了不需要再处理
if !exist {
continue
}
err := models.MarkDoneStatus(ctxC, t.Id, t.Clock, req.Ident, t.Status, t.Stdout, t.Stderr, isEdgeAlertTriggered)
if err != nil {
logger.Errorf("cannot mark task done, id:%d, hostname:%s, clock:%d, status:%s, err: %v", t.Id, req.Ident, t.Clock, t.Status, err)
return err
}
}
}
}
return nil
}

61
ibex/server/rpc/rpc.go Normal file
View File

@@ -0,0 +1,61 @@
package rpc
import (
"bufio"
"fmt"
"io"
"net"
"net/rpc"
"os"
"reflect"
"time"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/toolkits/pkg/logger"
"github.com/ugorji/go/codec"
)
type Server int
var ctxC *ctx.Context
func Start(listen string, ctx *ctx.Context) {
ctxC = ctx
go serve(listen)
}
func serve(listen string) {
server := rpc.NewServer()
server.Register(new(Server))
l, err := net.Listen("tcp", listen)
if err != nil {
fmt.Printf("fail to listen on: %s, error: %v\n", listen, err)
os.Exit(1)
}
fmt.Println("rpc.listening:", listen)
var mh codec.MsgpackHandle
mh.MapType = reflect.TypeOf(map[string]interface{}(nil))
duration := time.Duration(100) * time.Millisecond
for {
conn, err := l.Accept()
if err != nil {
logger.Warningf("listener accept error: %v", err)
time.Sleep(duration)
continue
}
var bufconn = struct {
io.Closer
*bufio.Reader
*bufio.Writer
}{conn, bufio.NewReader(conn), bufio.NewWriter(conn)}
go server.ServeCodec(codec.MsgpackSpecRpc.ServerCodec(bufconn, &mh))
}
}

159
ibex/server/server.go Normal file
View File

@@ -0,0 +1,159 @@
package server
import (
"context"
"fmt"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"os"
"os/signal"
"path/filepath"
"syscall"
"github.com/ccfos/nightingale/v6/ibex/server/config"
"github.com/ccfos/nightingale/v6/ibex/server/router"
"github.com/ccfos/nightingale/v6/ibex/server/rpc"
"github.com/ccfos/nightingale/v6/ibex/server/timer"
"github.com/ccfos/nightingale/v6/pkg/httpx"
"github.com/ccfos/nightingale/v6/pkg/logx"
"github.com/ccfos/nightingale/v6/storage"
"github.com/toolkits/pkg/i18n"
)
type Server struct {
ConfigFile string
Version string
}
type ServerOption func(*Server)
func SetConfigFile(f string) ServerOption {
return func(s *Server) {
s.ConfigFile = f
}
}
func SetVersion(v string) ServerOption {
return func(s *Server) {
s.Version = v
}
}
// Run run server
func Run(isCenter bool, opts ...ServerOption) {
code := 1
sc := make(chan os.Signal, 1)
signal.Notify(sc, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT)
server := Server{
ConfigFile: filepath.Join("etc", "ibex", "server.toml"),
Version: "not specified",
}
for _, opt := range opts {
opt(&server)
}
// parse config file
config.MustLoad(server.ConfigFile)
config.C.IsCenter = isCenter
cleanFunc, err := server.initialize()
if err != nil {
fmt.Println("server init fail:", err)
os.Exit(code)
}
EXIT:
for {
sig := <-sc
fmt.Println("received signal:", sig.String())
switch sig {
case syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGINT:
code = 0
break EXIT
case syscall.SIGHUP:
// reload configuration?
default:
break EXIT
}
}
cleanFunc()
fmt.Println("server exited")
os.Exit(code)
}
func (s Server) initialize() (func(), error) {
fns := Functions{}
bgCtx, cancel := context.WithCancel(context.Background())
fns.Add(cancel)
// init i18n
i18n.Init()
// init logger
loggerClean, err := logx.Init(config.C.Log)
if err != nil {
return fns.Ret(), err
} else {
fns.Add(loggerClean)
}
var ctxC *ctx.Context
var redis storage.Redis
if redis, err = storage.NewRedis(config.C.Redis); err != nil {
return fns.Ret(), err
}
// init database
if config.C.IsCenter {
db, err := storage.New(config.C.DB)
if err != nil {
return fns.Ret(), err
}
ctxC = ctx.NewContext(context.Background(), db, redis, true, config.C.CenterApi)
} else {
ctxC = ctx.NewContext(context.Background(), nil, redis, false, config.C.CenterApi)
}
if err := storage.IdInit(ctxC.Redis); err != nil {
fmt.Println("cannot init id generator: ", err)
os.Exit(1)
}
timer.CacheHostDoing(ctxC)
timer.ReportResult(ctxC)
if config.C.IsCenter {
go timer.Heartbeat(ctxC)
go timer.Schedule(ctxC)
go timer.CleanLong(ctxC)
}
// init http server
r := router.New(ctxC, s.Version)
httpClean := httpx.Init(config.C.HTTP, bgCtx, r)
fns.Add(httpClean)
// start rpc server
rpc.Start(config.C.RPC.Listen, ctxC)
// release all the resources
return fns.Ret(), nil
}
type Functions struct {
List []func()
}
func (fs *Functions) Add(f func()) {
fs.List = append(fs.List, f)
}
func (fs *Functions) Ret() func() {
return func() {
for i := 0; i < len(fs.List); i++ {
fs.List[i]()
}
}
}

View File

@@ -0,0 +1,76 @@
package timer
import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"time"
"github.com/ccfos/nightingale/v6/ibex/server/config"
"github.com/toolkits/pkg/logger"
)
func Heartbeat(ctx *ctx.Context) {
if config.C.Heartbeat.Interval == 0 {
config.C.Heartbeat.Interval = 1000
}
for {
heartbeat(ctx)
time.Sleep(time.Duration(config.C.Heartbeat.Interval) * time.Millisecond)
}
}
func heartbeat(ctx *ctx.Context) {
ident := config.C.Heartbeat.LocalAddr
err := models.TaskSchedulerHeartbeat(ctx, ident)
if err != nil {
logger.Errorf("task scheduler(%s) cannot heartbeat: %v", ident, err)
return
}
dss, err := models.DeadTaskSchedulers(ctx)
if err != nil {
logger.Errorf("cannot get dead task schedulers: %v", err)
return
}
cnt := len(dss)
if cnt == 0 {
return
}
for i := 0; i < cnt; i++ {
ids, err := models.TasksOfScheduler(ctx, dss[i])
if err != nil {
logger.Errorf("cannot get tasks of scheduler(%s): %v", dss[i], err)
return
}
if len(ids) == 0 {
err = models.DelDeadTaskScheduler(ctx, dss[i])
if err != nil {
logger.Errorf("cannot del dead task scheduler(%s): %v", dss[i], err)
return
}
}
takeOverTasks(ctx, ident, dss[i], ids)
}
}
func takeOverTasks(ctx *ctx.Context, alive, dead string, ids []int64) {
count := len(ids)
for i := 0; i < count; i++ {
success, err := models.TakeOverTask(ctx, ids[i], dead, alive)
if err != nil {
logger.Errorf("cannot take over task: %v", err)
return
}
if success {
logger.Infof("%s take over task[%d] of %s", alive, ids[i], dead)
}
}
}

View File

@@ -0,0 +1,53 @@
package timer
import (
"fmt"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"time"
"github.com/toolkits/pkg/logger"
)
// CacheHostDoing 缓存task_host_doing表全部内容减轻DB压力
func CacheHostDoing(ctx *ctx.Context) {
if err := cacheHostDoing(ctx); err != nil {
fmt.Println("cannot cache task_host_doing data: ", err)
}
go loopCacheHostDoing(ctx)
}
func loopCacheHostDoing(ctx *ctx.Context) {
for {
time.Sleep(time.Millisecond * 400)
if err := cacheHostDoing(ctx); err != nil {
logger.Warning("cannot cache task_host_doing data: ", err)
}
}
}
func cacheHostDoing(ctx *ctx.Context) error {
doingsFromDb, err := models.TableRecordGets[[]models.TaskHostDoing](ctx, models.TaskHostDoing{}.TableName(), "")
if err != nil {
logger.Errorf("models.TableRecordGets fail: %v", err)
}
doingsFromRedis, err := models.CacheRecordGets[models.TaskHostDoing](ctx)
if err != nil {
logger.Errorf("models.CacheRecordGets fail: %v", err)
}
set := make(map[string][]models.TaskHostDoing)
for _, doing := range doingsFromDb {
doing.AlertTriggered = false
set[doing.Host] = append(set[doing.Host], doing)
}
for _, doing := range doingsFromRedis {
doing.AlertTriggered = true
set[doing.Host] = append(set[doing.Host], doing)
}
models.SetDoingCache(set)
return err
}

View File

@@ -0,0 +1,27 @@
package timer
import (
"fmt"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"time"
"github.com/toolkits/pkg/logger"
)
func ReportResult(ctx *ctx.Context) {
if err := models.ReportCacheResult(ctx); err != nil {
fmt.Println("cannot report task_host result from alter trigger: ", err)
}
go loopReport(ctx)
}
func loopReport(ctx *ctx.Context) {
d := time.Duration(2) * time.Second
for {
time.Sleep(d)
if err := models.ReportCacheResult(ctx); err != nil {
logger.Warning("cannot report task_host result from alter trigger: ", err)
}
}
}

View File

@@ -0,0 +1,79 @@
package timer
import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"time"
"github.com/ccfos/nightingale/v6/ibex/server/config"
"github.com/ccfos/nightingale/v6/ibex/server/logic"
"github.com/toolkits/pkg/logger"
)
func Schedule(ctx *ctx.Context) {
for {
scheduleOrphan(ctx)
scheduleMine(ctx)
time.Sleep(time.Second)
}
}
func scheduleMine(ctx *ctx.Context) {
ids, err := models.TasksOfScheduler(ctx, config.C.Heartbeat.LocalAddr)
if err != nil {
logger.Errorf("cannot get tasks of scheduler(%s): %v", config.C.Heartbeat.LocalAddr, err)
return
}
count := len(ids)
for i := 0; i < count; i++ {
logic.CheckTimeout(ctx, ids[i])
logic.ScheduleTask(ctx, ids[i])
}
}
func scheduleOrphan(ctx *ctx.Context) {
ids, err := models.OrphanTaskIds(ctx)
if err != nil {
logger.Errorf("cannot get orphan task ids: %v", err)
return
}
count := len(ids)
if count == 0 {
return
}
logger.Debug("orphan task ids:", ids)
for i := 0; i < count; i++ {
action, err := models.TaskActionGet(ctx, "id=?", ids[i])
if err != nil {
logger.Errorf("cannot get task[%d] action: %v", ids[i], err)
continue
}
if action == nil {
continue
}
if action.Action == "pause" {
continue
}
mine, err := models.TakeOverTask(ctx, ids[i], "", config.C.Heartbeat.LocalAddr)
if err != nil {
logger.Errorf("cannot take over task[%d]: %v", ids[i], err)
continue
}
if !mine {
continue
}
logger.Debugf("task[%d] is mine", ids[i])
logic.ScheduleTask(ctx, ids[i])
}
}

View File

@@ -0,0 +1,38 @@
package timer
import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"time"
"github.com/toolkits/pkg/logger"
)
func CleanLong(ctx *ctx.Context) {
d := time.Duration(24) * time.Hour
for {
cleanLongTask(ctx)
time.Sleep(d)
}
}
func cleanLongTask(ctx *ctx.Context) {
ids, err := models.LongTaskIds(ctx)
if err != nil {
logger.Error("LongTaskIds:", err)
return
}
if ids == nil {
return
}
count := len(ids)
for i := 0; i < count; i++ {
action := models.TaskAction{Id: ids[i]}
err = action.Update(ctx, "cancel")
if err != nil {
logger.Errorf("cannot cancel long task[%d]: %v", ids[i], err)
}
}
}

33
ibex/types/types.go Normal file
View File

@@ -0,0 +1,33 @@
package types
type TaskMetaResponse struct {
Message string
Script string
Args string
Account string
Stdin string
}
type ReportTask struct {
Id int64
Clock int64
Status string
Stdout string
Stderr string
}
type ReportRequest struct {
Ident string
ReportTasks []ReportTask
}
type AssignTask struct {
Id int64
Clock int64
Action string
}
type ReportResponse struct {
Message string
AssignTasks []AssignTask
}

View File

@@ -1,463 +0,0 @@
{
"name": "IPMI for Prometheus",
"ident": "",
"configs": {
"version": "2.0.0",
"links": [],
"var": [
{
"name": "node",
"type": "query",
"datasource": {
"cate": "prometheus"
},
"definition": "label_values(ipmi_bmc_info, ident)",
"reg": "",
"multi": false
}
],
"panels": [
{
"type": "gauge",
"id": "f975fded-f57e-4a6e-80b4-50d5be6dd84c",
"layout": {
"h": 7,
"w": 24,
"x": 0,
"y": 0,
"i": "f975fded-f57e-4a6e-80b4-50d5be6dd84c",
"isResizable": true
},
"version": "2.0.0",
"datasourceCate": "prometheus",
"targets": [
{
"refId": "A",
"expr": "ipmi_temperature_celsius{ident='$node'}",
"legend": "{{name}}"
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"name": "Temperatures",
"links": [],
"custom": {
"textMode": "valueAndName",
"calc": "avg"
},
"options": {
"valueMappings": [],
"standardOptions": {
"util": "none"
},
"thresholds": {
"steps": [
{
"color": "green",
"value": null,
"type": "base"
},
{
"color": "red",
"value": 80
}
]
}
}
},
{
"type": "timeseries",
"id": "681f1191-4777-4377-8b77-404d9f036406",
"layout": {
"h": 5,
"w": 12,
"x": 0,
"y": 7,
"i": "681f1191-4777-4377-8b77-404d9f036406",
"isResizable": true
},
"version": "2.0.0",
"datasourceCate": "prometheus",
"targets": [
{
"refId": "A",
"expr": "ipmi_power_watts{ident='$node'}",
"legend": "{{name}}"
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"name": "Power",
"links": [],
"options": {
"tooltip": {
"mode": "all",
"sort": "none"
},
"legend": {
"displayMode": "hidden"
},
"standardOptions": {},
"thresholds": {
"steps": [
{
"color": "#634CD9",
"value": null,
"type": "base"
}
]
}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"spanNulls": false,
"lineWidth": 1,
"fillOpacity": 0.5,
"gradientMode": "none",
"stack": "off",
"scaleDistribution": {
"type": "linear"
}
}
},
{
"type": "timeseries",
"id": "feede24c-8296-4127-982e-08cfc4151933",
"layout": {
"h": 5,
"w": 12,
"x": 12,
"y": 7,
"i": "feede24c-8296-4127-982e-08cfc4151933",
"isResizable": true
},
"version": "2.0.0",
"datasourceCate": "prometheus",
"targets": [
{
"refId": "A",
"expr": "ipmi_power_watts{ident='$node'} * 30 * 24 ",
"legend": "{{name}}"
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"name": "Power usage 30d",
"links": [],
"options": {
"tooltip": {
"mode": "all",
"sort": "none"
},
"legend": {
"displayMode": "hidden"
},
"standardOptions": {},
"thresholds": {
"steps": [
{
"color": "#634CD9",
"value": null,
"type": "base"
}
]
}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "smooth",
"spanNulls": false,
"lineWidth": 1,
"fillOpacity": 0.5,
"gradientMode": "none",
"stack": "off",
"scaleDistribution": {
"type": "linear"
}
}
},
{
"type": "timeseries",
"id": "9e11e7f5-ed3c-49eb-8a72-ee76c8700c24",
"layout": {
"h": 7,
"w": 12,
"x": 0,
"y": 12,
"i": "9e11e7f5-ed3c-49eb-8a72-ee76c8700c24",
"isResizable": true
},
"version": "2.0.0",
"datasourceCate": "prometheus",
"targets": [
{
"refId": "A",
"expr": "ipmi_temperature_celsius{ident='$node'}",
"legend": "{{name}}"
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"name": "Temperatures",
"links": [],
"description": "",
"options": {
"tooltip": {
"mode": "multi"
},
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"standardOptions": {
"util": "none"
},
"thresholds": {
"steps": [
{
"color": "green",
"value": null,
"type": "base"
},
{
"color": "red",
"value": 80
}
]
}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "linear",
"spanNulls": false,
"lineWidth": 1,
"fillOpacity": 0.5,
"gradientMode": "none",
"stack": "off",
"scaleDistribution": {
"type": "linear"
}
}
},
{
"type": "timeseries",
"id": "95c734f7-26cb-41a7-8376-49332cc220c2",
"layout": {
"h": 7,
"w": 12,
"x": 12,
"y": 12,
"i": "95c734f7-26cb-41a7-8376-49332cc220c2",
"isResizable": true
},
"version": "2.0.0",
"datasourceCate": "prometheus",
"targets": [
{
"refId": "A",
"expr": "ipmi_power_watts{ident='$node'}",
"legend": "{{name}}"
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"name": "Power",
"links": [],
"description": "",
"options": {
"tooltip": {
"mode": "multi"
},
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"standardOptions": {
"util": "none"
},
"thresholds": {
"steps": [
{
"color": "green",
"value": null,
"type": "base"
},
{
"color": "red",
"value": 80
}
]
}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "linear",
"spanNulls": false,
"lineWidth": 1,
"fillOpacity": 0.01,
"gradientMode": "none",
"stack": "off",
"scaleDistribution": {
"type": "linear"
}
}
},
{
"type": "timeseries",
"id": "0313f34f-afcf-41e9-8f69-9a3dbd4b2e56",
"layout": {
"h": 7,
"w": 12,
"x": 0,
"y": 19,
"i": "0313f34f-afcf-41e9-8f69-9a3dbd4b2e56",
"isResizable": true
},
"version": "2.0.0",
"datasourceCate": "prometheus",
"targets": [
{
"refId": "A",
"expr": "ipmi_fan_speed_rpm{ident='$node'}",
"legend": "{{name}}"
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"name": "Fans",
"links": [],
"description": "",
"options": {
"tooltip": {
"mode": "multi"
},
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"standardOptions": {
"util": "none"
},
"thresholds": {
"steps": [
{
"color": "green",
"value": null,
"type": "base"
},
{
"color": "red",
"value": 80
}
]
}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "linear",
"spanNulls": false,
"lineWidth": 1,
"fillOpacity": 0.5,
"gradientMode": "none",
"stack": "off",
"scaleDistribution": {
"type": "linear"
}
}
},
{
"type": "timeseries",
"id": "29ee004d-a95c-405d-97d1-d715fab4e1de",
"layout": {
"h": 7,
"w": 12,
"x": 12,
"y": 19,
"i": "29ee004d-a95c-405d-97d1-d715fab4e1de",
"isResizable": true
},
"version": "2.0.0",
"datasourceCate": "prometheus",
"targets": [
{
"refId": "A",
"expr": "ipmi_voltage_volts{ident='$node',name!~\"Voltage 1|Voltage 2\"}",
"legend": "{{name}}"
}
],
"transformations": [
{
"id": "organize",
"options": {}
}
],
"name": "Voltages",
"links": [],
"description": "",
"options": {
"tooltip": {
"mode": "multi"
},
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"standardOptions": {
"util": "none"
},
"thresholds": {
"steps": [
{
"color": "green",
"value": null,
"type": "base"
},
{
"color": "red",
"value": 80
}
]
}
},
"custom": {
"drawStyle": "lines",
"lineInterpolation": "linear",
"spanNulls": false,
"lineWidth": 1,
"fillOpacity": 0.5,
"gradientMode": "none",
"stack": "off",
"scaleDistribution": {
"type": "linear"
}
}
}
]
},
"uuid": 1727587308068775200
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1425,7 +1425,7 @@
"rule_config": {
"queries": [
{
"prom_ql": "increase(kernel_vmstat_oom_kill[2m]) > 0",
"prom_ql": "kernel_vmstat_oom_kill != 0",
"severity": 2
}
]
@@ -2139,4 +2139,4 @@
"update_by": "",
"uuid": 1717556327737117000
}
]
]

View File

@@ -259,11 +259,11 @@
"uuid": 1717556327796195000,
"collector": "Categraf",
"typ": "Linux",
"name": "1分钟内 OOM 次数统计",
"name": "OOM 次数统计",
"unit": "none",
"note": "取自 `/proc/vmstat`,需要较高版本的内核,没记错的话应该是 4.13 以上版本",
"lang": "zh_CN",
"expression": "increase(kernel_vmstat_oom_kill[1m])",
"expression": "kernel_vmstat_oom_kill",
"created_at": 0,
"created_by": "",
"updated_at": 0,
@@ -1334,4 +1334,4 @@
"updated_at": 0,
"updated_by": ""
}
]
]

View File

@@ -160,9 +160,8 @@ func (tc *TargetCacheType) syncTargets() error {
}
m := make(map[string]*models.Target)
metaMap := tc.GetHostMetas(lst)
if len(metaMap) > 0 {
if tc.ctx.IsCenter {
metaMap := tc.GetHostMetas(lst)
for i := 0; i < len(lst); i++ {
if meta, ok := metaMap[lst[i].Ident]; ok {
lst[i].FillMeta(meta)

View File

@@ -67,7 +67,6 @@ type AlertCurEvent struct {
Claimant string `json:"claimant" gorm:"-"`
SubRuleId int64 `json:"sub_rule_id" gorm:"-"`
ExtraInfo []string `json:"extra_info" gorm:"-"`
Target *Target `json:"target" gorm:"-"`
}
func (e *AlertCurEvent) TableName() string {
@@ -342,7 +341,7 @@ func (e *AlertCurEvent) DB2Mem() {
continue
}
arr := strings.SplitN(pair, "=", 2)
arr := strings.Split(pair, "=")
if len(arr) != 2 {
continue
}
@@ -419,8 +418,7 @@ func (e *AlertCurEvent) FillNotifyGroups(ctx *ctx.Context, cache map[int64]*User
return nil
}
func AlertCurEventTotal(ctx *ctx.Context, prods []string, bgids []int64, stime, etime int64,
severity int, dsIds []int64, cates []string, ruleId int64, query string) (int64, error) {
func AlertCurEventTotal(ctx *ctx.Context, prods []string, bgids []int64, stime, etime int64, severity int, dsIds []int64, cates []string, query string) (int64, error) {
session := DB(ctx).Model(&AlertCurEvent{})
if stime != 0 && etime != 0 {
session = session.Where("trigger_time between ? and ?", stime, etime)
@@ -445,10 +443,6 @@ func AlertCurEventTotal(ctx *ctx.Context, prods []string, bgids []int64, stime,
session = session.Where("cate in ?", cates)
}
if ruleId > 0 {
session = session.Where("rule_id = ?", ruleId)
}
if query != "" {
arr := strings.Fields(query)
for i := 0; i < len(arr); i++ {
@@ -460,9 +454,7 @@ func AlertCurEventTotal(ctx *ctx.Context, prods []string, bgids []int64, stime,
return Count(session)
}
func AlertCurEventsGet(ctx *ctx.Context, prods []string, bgids []int64, stime, etime int64,
severity int, dsIds []int64, cates []string, ruleId int64, query string, limit, offset int) (
[]AlertCurEvent, error) {
func AlertCurEventGets(ctx *ctx.Context, prods []string, bgids []int64, stime, etime int64, severity int, dsIds []int64, cates []string, query string, limit, offset int) ([]AlertCurEvent, error) {
session := DB(ctx).Model(&AlertCurEvent{})
if stime != 0 && etime != 0 {
session = session.Where("trigger_time between ? and ?", stime, etime)
@@ -487,10 +479,6 @@ func AlertCurEventsGet(ctx *ctx.Context, prods []string, bgids []int64, stime, e
session = session.Where("cate in ?", cates)
}
if ruleId > 0 {
session = session.Where("rule_id = ?", ruleId)
}
if query != "" {
arr := strings.Fields(query)
for i := 0; i < len(arr); i++ {
@@ -511,26 +499,6 @@ func AlertCurEventsGet(ctx *ctx.Context, prods []string, bgids []int64, stime, e
return lst, err
}
func AlertCurEventCountByRuleId(ctx *ctx.Context, rids []int64, stime, etime int64) map[int64]int64 {
type Row struct {
RuleId int64
Cnt int64
}
var rows []Row
err := DB(ctx).Model(&AlertCurEvent{}).Select("rule_id, count(*) as cnt").
Where("trigger_time between ? and ?", stime, etime).Group("rule_id").Find(&rows).Error
if err != nil {
logger.Errorf("Failed to count group by rule_id: %v", err)
return nil
}
curEventTotalByRid := make(map[int64]int64, len(rids))
for _, r := range rows {
curEventTotalByRid[r.RuleId] = r.Cnt
}
return curEventTotalByRid
}
func AlertCurEventDel(ctx *ctx.Context, ids []int64) error {
if len(ids) == 0 {
return nil

View File

@@ -121,9 +121,7 @@ func (e *AlertHisEvent) FillNotifyGroups(ctx *ctx.Context, cache map[int64]*User
// }
func AlertHisEventTotal(
ctx *ctx.Context, prods []string, bgids []int64, stime, etime int64, severity int,
recovered int, dsIds []int64, cates []string, ruleId int64, query string) (int64, error) {
func AlertHisEventTotal(ctx *ctx.Context, prods []string, bgids []int64, stime, etime int64, severity int, recovered int, dsIds []int64, cates []string, query string) (int64, error) {
session := DB(ctx).Model(&AlertHisEvent{}).Where("last_eval_time between ? and ?", stime, etime)
if len(prods) > 0 {
@@ -150,10 +148,6 @@ func AlertHisEventTotal(
session = session.Where("cate in ?", cates)
}
if ruleId > 0 {
session = session.Where("rule_id = ?", ruleId)
}
if query != "" {
arr := strings.Fields(query)
for i := 0; i < len(arr); i++ {
@@ -165,9 +159,7 @@ func AlertHisEventTotal(
return Count(session)
}
func AlertHisEventGets(ctx *ctx.Context, prods []string, bgids []int64, stime, etime int64,
severity int, recovered int, dsIds []int64, cates []string, ruleId int64, query string,
limit, offset int) ([]AlertHisEvent, error) {
func AlertHisEventGets(ctx *ctx.Context, prods []string, bgids []int64, stime, etime int64, severity int, recovered int, dsIds []int64, cates []string, query string, limit, offset int) ([]AlertHisEvent, error) {
session := DB(ctx).Where("last_eval_time between ? and ?", stime, etime)
if len(prods) != 0 {
@@ -194,10 +186,6 @@ func AlertHisEventGets(ctx *ctx.Context, prods []string, bgids []int64, stime, e
session = session.Where("cate in ?", cates)
}
if ruleId > 0 {
session = session.Where("rule_id = ?", ruleId)
}
if query != "" {
arr := strings.Fields(query)
for i := 0; i < len(arr); i++ {

View File

@@ -98,8 +98,6 @@ type AlertRule struct {
UpdateAt int64 `json:"update_at"`
UpdateBy string `json:"update_by"`
UUID int64 `json:"uuid" gorm:"-"` // tpl identifier
CurEventCount int64 `json:"cur_event_count" gorm:"-"`
UpdateByNickname string `json:"update_by_nickname" gorm:"-"` // for fe
}
type Tpl struct {
@@ -163,12 +161,10 @@ type Trigger struct {
Duration int `json:"duration,omitempty"`
Percent int `json:"percent,omitempty"`
Joins []Join `json:"joins"`
JoinRef string `json:"join_ref"`
}
type Join struct {
JoinType string `json:"join_type"`
Ref string `json:"ref"`
On []string `json:"on"`
}
@@ -180,10 +176,9 @@ func GetHostsQuery(queries []HostQuery) []map[string]interface{} {
case "group_ids":
ids := ParseInt64(q.Values)
if q.Op == "==" {
m["target_busi_group.group_id in (?)"] = ids
m["group_id in (?)"] = ids
} else {
m["target.ident not in (select target_ident "+
"from target_busi_group where group_id in (?))"] = ids
m["group_id not in (?)"] = ids
}
case "tags":
lst := []string{}
@@ -823,8 +818,7 @@ func AlertRuleGetsAll(ctx *ctx.Context) ([]*AlertRule, error) {
return lst, nil
}
func AlertRulesGetsBy(ctx *ctx.Context, prods []string, query, algorithm, cluster string,
cates []string, disabled int) ([]*AlertRule, error) {
func AlertRulesGetsBy(ctx *ctx.Context, prods []string, query, algorithm, cluster string, cates []string, disabled int) ([]*AlertRule, error) {
session := DB(ctx)
if len(prods) > 0 {

View File

@@ -286,53 +286,3 @@ func BoardSetHide(ctx *ctx.Context, ids []int64) error {
return nil
})
}
func BoardGetsByBids(ctx *ctx.Context, bids []int64) ([]map[string]interface{}, error) {
var boards []Board
err := DB(ctx).Where("id IN ?", bids).Find(&boards).Error
if err != nil {
return nil, err
}
// 收集所有唯一的 group_id
groupIDs := make([]int64, 0)
groupIDSet := make(map[int64]struct{})
for _, board := range boards {
if _, exists := groupIDSet[board.GroupId]; !exists {
groupIDs = append(groupIDs, board.GroupId)
groupIDSet[board.GroupId] = struct{}{}
}
}
// 一次性查询所有需要的 BusiGroup
var busiGroups []BusiGroup
err = DB(ctx).Where("id IN ?", groupIDs).Find(&busiGroups).Error
if err != nil {
return nil, err
}
// 创建 group_id 到 BusiGroup 的映射
groupMap := make(map[int64]BusiGroup)
for _, bg := range busiGroups {
groupMap[bg.Id] = bg
}
result := make([]map[string]interface{}, 0, len(boards))
for _, board := range boards {
busiGroup, exists := groupMap[board.GroupId]
if !exists {
// 处理找不到对应 BusiGroup 的情况
continue
}
item := map[string]interface{}{
"busi_group_name": busiGroup.Name,
"busi_group_id": busiGroup.Id,
"board_id": board.Id,
"board_name": board.Name,
}
result = append(result, item)
}
return result, nil
}

View File

@@ -11,7 +11,7 @@ import (
// BuiltinComponent represents a builtin component along with its metadata.
type BuiltinComponent struct {
ID uint64 `json:"id" gorm:"primaryKey;type:bigint;autoIncrement;comment:'unique identifier'"`
Ident string `json:"ident" gorm:"type:varchar(191);not null;uniqueIndex:idx_ident,sort:asc;comment:'identifier of component'"`
Ident string `json:"ident" gorm:"type:varchar(191);not null;index:idx_ident,sort:asc;comment:'identifier of component'"`
Logo string `json:"logo" gorm:"type:varchar(191);not null;comment:'logo of component'"`
Readme string `json:"readme" gorm:"type:text;not null;comment:'readme of component'"`
CreatedAt int64 `json:"created_at" gorm:"type:bigint;not null;default:0;comment:'create time'"`

View File

@@ -2,7 +2,6 @@ package models
import (
"errors"
"fmt"
"strings"
"time"
@@ -218,10 +217,3 @@ func BuiltinMetricCollectors(ctx *ctx.Context, lang, typ, query string) ([]strin
err := session.Select("distinct(collector)").Pluck("collector", &collectors).Error
return collectors, err
}
func BuiltinMetricBatchUpdateColumn(ctx *ctx.Context, col, old, new, updatedBy string) error {
if old == new {
return nil
}
return DB(ctx).Model(&BuiltinMetric{}).Where(fmt.Sprintf("%s = ?", col), old).Updates(map[string]interface{}{col: new, "updated_by": updatedBy}).Error
}

View File

@@ -9,19 +9,18 @@ import (
)
type BuiltinPayload struct {
ID int64 `json:"id" gorm:"primaryKey;type:bigint;autoIncrement;comment:'unique identifier'"`
Type string `json:"type" gorm:"type:varchar(191);not null;index:idx_type,sort:asc;comment:'type of payload'"` // Alert Dashboard Collet
Component string `json:"component" gorm:"type:varchar(191);not null;index:idx_component,sort:asc;comment:'component of payload'"` //
ComponentID uint64 `json:"component_id" gorm:"type:bigint;index:idx_component,sort:asc;comment:'component_id of payload'"` // ComponentID which the payload belongs to
Cate string `json:"cate" gorm:"type:varchar(191);not null;comment:'category of payload'"` // categraf_v1 telegraf_v1
Name string `json:"name" gorm:"type:varchar(191);not null;index:idx_buildinpayload_name,sort:asc;comment:'name of payload'"` //
Tags string `json:"tags" gorm:"type:varchar(191);not null;default:'';comment:'tags of payload'"` // {"host":"
Content string `json:"content" gorm:"type:longtext;not null;comment:'content of payload'"`
UUID int64 `json:"uuid" gorm:"type:bigint;not null;index:idx_uuid;comment:'uuid of payload'"`
CreatedAt int64 `json:"created_at" gorm:"type:bigint;not null;default:0;comment:'create time'"`
CreatedBy string `json:"created_by" gorm:"type:varchar(191);not null;default:'';comment:'creator'"`
UpdatedAt int64 `json:"updated_at" gorm:"type:bigint;not null;default:0;comment:'update time'"`
UpdatedBy string `json:"updated_by" gorm:"type:varchar(191);not null;default:'';comment:'updater'"`
ID int64 `json:"id" gorm:"primaryKey;type:bigint;autoIncrement;comment:'unique identifier'"`
Type string `json:"type" gorm:"type:varchar(191);not null;index:idx_type,sort:asc;comment:'type of payload'"` // Alert Dashboard Collet
Component string `json:"component" gorm:"type:varchar(191);not null;index:idx_component,sort:asc;comment:'component of payload'"` // Host MySQL Redis
Cate string `json:"cate" gorm:"type:varchar(191);not null;comment:'category of payload'"` // categraf_v1 telegraf_v1
Name string `json:"name" gorm:"type:varchar(191);not null;index:idx_buildinpayload_name,sort:asc;comment:'name of payload'"` //
Tags string `json:"tags" gorm:"type:varchar(191);not null;default:'';comment:'tags of payload'"` // {"host":"
Content string `json:"content" gorm:"type:longtext;not null;comment:'content of payload'"`
UUID int64 `json:"uuid" gorm:"type:bigint;not null;index:idx_uuid;comment:'uuid of payload'"`
CreatedAt int64 `json:"created_at" gorm:"type:bigint;not null;default:0;comment:'create time'"`
CreatedBy string `json:"created_by" gorm:"type:varchar(191);not null;default:'';comment:'creator'"`
UpdatedAt int64 `json:"updated_at" gorm:"type:bigint;not null;default:0;comment:'update time'"`
UpdatedBy string `json:"updated_by" gorm:"type:varchar(191);not null;default:'';comment:'updater'"`
}
func (bp *BuiltinPayload) TableName() string {
@@ -34,8 +33,9 @@ func (bp *BuiltinPayload) Verify() error {
return errors.New("type is blank")
}
if bp.ComponentID == 0 {
return errors.New("component_id is blank")
bp.Component = strings.TrimSpace(bp.Component)
if bp.Component == "" {
return errors.New("component is blank")
}
if bp.Name == "" {
@@ -47,7 +47,7 @@ func (bp *BuiltinPayload) Verify() error {
func BuiltinPayloadExists(ctx *ctx.Context, bp *BuiltinPayload) (bool, error) {
var count int64
err := DB(ctx).Model(bp).Where("type = ? AND component_id = ? AND name = ? AND cate = ?", bp.Type, bp.ComponentID, bp.Name, bp.Cate).Count(&count).Error
err := DB(ctx).Model(bp).Where("type = ? AND component = ? AND name = ? AND cate = ?", bp.Type, bp.Component, bp.Name, bp.Cate).Count(&count).Error
if err != nil {
return false, err
}
@@ -78,7 +78,7 @@ func (bp *BuiltinPayload) Update(ctx *ctx.Context, req BuiltinPayload) error {
return err
}
if bp.Type != req.Type || bp.ComponentID != req.ComponentID || bp.Name != req.Name {
if bp.Type != req.Type || bp.Component != req.Component || bp.Name != req.Name {
exists, err := BuiltinPayloadExists(ctx, &req)
if err != nil {
return err
@@ -117,13 +117,13 @@ func BuiltinPayloadGet(ctx *ctx.Context, where string, args ...interface{}) (*Bu
return &bp, nil
}
func BuiltinPayloadGets(ctx *ctx.Context, componentId uint64, typ, cate, query string) ([]*BuiltinPayload, error) {
func BuiltinPayloadGets(ctx *ctx.Context, typ, component, cate, query string) ([]*BuiltinPayload, error) {
session := DB(ctx)
if typ != "" {
session = session.Where("type = ?", typ)
}
if componentId != 0 {
session = session.Where("component_id = ?", componentId)
if component != "" {
session = session.Where("component = ?", component)
}
if cate != "" {
@@ -144,9 +144,9 @@ func BuiltinPayloadGets(ctx *ctx.Context, componentId uint64, typ, cate, query s
}
// get cates of BuiltinPayload by type and component, return []string
func BuiltinPayloadCates(ctx *ctx.Context, typ string, componentID uint64) ([]string, error) {
func BuiltinPayloadCates(ctx *ctx.Context, typ, component string) ([]string, error) {
var cates []string
err := DB(ctx).Model(new(BuiltinPayload)).Where("type = ? and component_id = ?", typ, componentID).Distinct("cate").Pluck("cate", &cates).Error
err := DB(ctx).Model(new(BuiltinPayload)).Where("type = ? and component = ?", typ, component).Distinct("cate").Pluck("cate", &cates).Error
return cates, err
}
@@ -163,37 +163,3 @@ func BuiltinPayloadComponents(ctx *ctx.Context, typ, cate string) (string, error
}
return components[0], nil
}
// InitBuiltinPayloads 兼容新旧 BuiltinPayload 格式
func InitBuiltinPayloads(ctx *ctx.Context) error {
var lst []*BuiltinPayload
components, err := BuiltinComponentGets(ctx, "")
if err != nil {
return err
}
identToId := make(map[string]uint64)
for _, component := range components {
identToId[component.Ident] = component.ID
}
err = DB(ctx).Where("component_id = 0 or component_id is NULL").Find(&lst).Error
if err != nil {
return err
}
for _, bp := range lst {
componentId, ok := identToId[bp.Component]
if !ok {
continue
}
bp.ComponentID = componentId
}
if len(lst) == 0 {
return nil
}
return DB(ctx).Save(&lst).Error
}

View File

@@ -134,7 +134,7 @@ func (bg *BusiGroup) Del(ctx *ctx.Context) error {
return errors.New("Some alert subscribes still in the BusiGroup")
}
has, err = Exists(DB(ctx).Model(&TargetBusiGroup{}).Where("group_id=?", bg.Id))
has, err = Exists(DB(ctx).Model(&Target{}).Where("group_id=?", bg.Id))
if err != nil {
return err
}

View File

@@ -35,7 +35,6 @@ type Datasource struct {
UpdatedBy string `json:"updated_by"`
IsDefault bool `json:"is_default"`
Transport *http.Transport `json:"-" gorm:"-"`
ForceSave bool `json:"force_save" gorm:"-"`
}
type Auth struct {

69
models/ibex_models.go Normal file
View File

@@ -0,0 +1,69 @@
package models
import (
"encoding/json"
"fmt"
"github.com/ccfos/nightingale/v6/ibex/server/config"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/poster"
"gorm.io/gorm"
)
func IbexCount(tx *gorm.DB) (int64, error) {
var cnt int64
err := tx.Count(&cnt).Error
return cnt, err
}
func tht(id int64) string {
return fmt.Sprintf("task_host_%d", id%100)
}
func TableRecordGets[T any](ctx *ctx.Context, table, where string, args ...interface{}) (lst T, err error) {
if config.C.IsCenter {
if where == "" || len(args) == 0 {
err = DB(ctx).Table(table).Find(&lst).Error
} else {
err = DB(ctx).Table(table).Where(where, args...).Find(&lst).Error
}
return
}
return poster.PostByUrlsWithResp[T](ctx, "/ibex/v1/table/record/list", map[string]interface{}{
"table": table,
"where": where,
"args": args,
})
}
func TableRecordCount(ctx *ctx.Context, table, where string, args ...interface{}) (int64, error) {
if config.C.IsCenter {
if where == "" || len(args) == 0 {
return IbexCount(DB(ctx).Table(table))
}
return IbexCount(DB(ctx).Table(table).Where(where, args...))
}
return poster.PostByUrlsWithResp[int64](ctx, "/ibex/v1/table/record/count", map[string]interface{}{
"table": table,
"where": where,
"args": args,
})
}
var IBEX_HOST_DOING = "ibex-host-doing"
func CacheRecordGets[T any](ctx *ctx.Context) ([]T, error) {
lst := make([]T, 0)
values, _ := ctx.Redis.HVals(ctx.Ctx, IBEX_HOST_DOING).Result()
for _, val := range values {
t := new(T)
if err := json.Unmarshal([]byte(val), t); err != nil {
return nil, err
}
lst = append(lst, *t)
}
return lst, nil
}

112
models/ibex_task_action.go Normal file
View File

@@ -0,0 +1,112 @@
package models
import (
"fmt"
"time"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"gorm.io/gorm"
)
type TaskAction struct {
Id int64 `gorm:"column:id;primaryKey"`
Action string `gorm:"column:action;size:32;not null"`
Clock int64 `gorm:"column:clock;not null;default:0"`
}
func (TaskAction) TableName() string {
return "task_action"
}
func TaskActionGet(ctx *ctx.Context, where string, args ...interface{}) (*TaskAction, error) {
var obj TaskAction
ret := DB(ctx).Where(where, args...).Find(&obj)
if ret.Error != nil {
return nil, ret.Error
}
if ret.RowsAffected == 0 {
return nil, nil
}
return &obj, nil
}
func TaskActionExistsIds(ctx *ctx.Context, ids []int64) ([]int64, error) {
if len(ids) == 0 {
return ids, nil
}
var ret []int64
err := DB(ctx).Model(&TaskAction{}).Where("id in ?", ids).Pluck("id", &ret).Error
return ret, err
}
func CancelWaitingHosts(ctx *ctx.Context, id int64) error {
return DB(ctx).Table(tht(id)).Where("id = ? and status = ?", id, "waiting").Update("status", "cancelled").Error
}
func StartTask(ctx *ctx.Context, id int64) error {
return DB(ctx).Model(&TaskScheduler{}).Where("id = ?", id).Update("scheduler", "").Error
}
func CancelTask(ctx *ctx.Context, id int64) error {
return CancelWaitingHosts(ctx, id)
}
func KillTask(ctx *ctx.Context, id int64) error {
if err := CancelWaitingHosts(ctx, id); err != nil {
return err
}
now := time.Now().Unix()
return DB(ctx).Transaction(func(tx *gorm.DB) error {
err := tx.Model(&TaskHostDoing{}).Where("id = ? and action <> ?", id, "kill").Updates(map[string]interface{}{
"clock": now,
"action": "kill",
}).Error
if err != nil {
return err
}
return tx.Table(tht(id)).Where("id = ? and status = ?", id, "running").Update("status", "killing").Error
})
}
func (a *TaskAction) Update(ctx *ctx.Context, action string) error {
if !(action == "start" || action == "cancel" || action == "kill" || action == "pause") {
return fmt.Errorf("action invalid")
}
err := DB(ctx).Model(a).Updates(map[string]interface{}{
"action": action,
"clock": time.Now().Unix(),
}).Error
if err != nil {
return err
}
if action == "start" {
return StartTask(ctx, a.Id)
}
if action == "cancel" {
return CancelTask(ctx, a.Id)
}
if action == "kill" {
return KillTask(ctx, a.Id)
}
return nil
}
// LongTaskIds two weeks ago
func LongTaskIds(ctx *ctx.Context) ([]int64, error) {
clock := time.Now().Unix() - 604800*2
var ids []int64
err := DB(ctx).Model(&TaskAction{}).Where("clock < ?", clock).Pluck("id", &ids).Error
return ids, err
}

262
models/ibex_task_host.go Normal file
View File

@@ -0,0 +1,262 @@
package models
import (
"fmt"
"sync"
"time"
"github.com/ccfos/nightingale/v6/ibex/server/config"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/poster"
"github.com/ccfos/nightingale/v6/storage"
"github.com/toolkits/pkg/logger"
"gorm.io/gorm"
"gorm.io/gorm/clause"
)
type TaskHost struct {
II int64 `gorm:"column:ii;primaryKey;autoIncrement" json:"-"`
Id int64 `gorm:"column:id;uniqueIndex:idx_id_host;not null" json:"id"`
Host string `gorm:"column:host;uniqueIndex:idx_id_host;size:128;not null" json:"host"`
Status string `gorm:"column:status;size:32;not null" json:"status"`
Stdout string `gorm:"column:stdout;type:text" json:"stdout"`
Stderr string `gorm:"column:stderr;type:text" json:"stderr"`
}
func (taskHost *TaskHost) Upsert(ctx *ctx.Context) error {
return DB(ctx).Table(tht(taskHost.Id)).Clauses(clause.OnConflict{
Columns: []clause.Column{{Name: "id"}, {Name: "host"}},
DoUpdates: clause.AssignmentColumns([]string{"status", "stdout", "stderr"}),
}).Create(taskHost).Error
}
func (taskHost *TaskHost) Create(ctx *ctx.Context) error {
if config.C.IsCenter {
return DB(ctx).Table(tht(taskHost.Id)).Create(taskHost).Error
}
return poster.PostByUrls(ctx, "/ibex/v1/task/host", taskHost)
}
func TaskHostUpserts(ctx *ctx.Context, lst []TaskHost) (map[string]error, error) {
if len(lst) == 0 {
return nil, fmt.Errorf("empty list")
}
if !config.C.IsCenter {
return poster.PostByUrlsWithResp[map[string]error](ctx, "/ibex/v1/task/hosts/upsert", lst)
}
errs := make(map[string]error, 0)
for _, taskHost := range lst {
if err := taskHost.Upsert(ctx); err != nil {
errs[fmt.Sprintf("%d:%s", taskHost.Id, taskHost.Host)] = err
}
}
return errs, nil
}
func TaskHostGet(ctx *ctx.Context, id int64, host string) (*TaskHost, error) {
var ret []*TaskHost
err := DB(ctx).Table(tht(id)).Where("id=? and host=?", id, host).Find(&ret).Error
if err != nil {
return nil, err
}
if len(ret) == 0 {
return nil, nil
}
return ret[0], nil
}
func MarkDoneStatus(ctx *ctx.Context, id, clock int64, host, status, stdout, stderr string, edgeAlertTriggered ...bool) error {
if len(edgeAlertTriggered) > 0 && edgeAlertTriggered[0] {
return CacheMarkDone(ctx, TaskHost{
Id: id,
Host: host,
Status: status,
Stdout: stdout,
Stderr: stderr,
})
}
if !config.C.IsCenter {
return poster.PostByUrls(ctx, "/ibex/v1/mark/done", map[string]interface{}{
"id": id,
"clock": clock,
"host": host,
"status": status,
"stdout": stdout,
"stderr": stderr,
})
}
count, err := TableRecordCount(ctx, TaskHostDoing{}.TableName(), "id=? and host=? and clock=?", id, host, clock)
if err != nil {
return err
}
if count == 0 {
// 如果是timeout了后来任务执行完成之后结果又上来了stdout和stderr最好还是存库让用户看到
count, err = TableRecordCount(ctx, tht(id), "id=? and host=? and status=?", id, host, "timeout")
if err != nil {
return err
}
if count == 1 {
return DB(ctx).Table(tht(id)).Where("id=? and host=?", id, host).Updates(map[string]interface{}{
"status": status,
"stdout": stdout,
"stderr": stderr,
}).Error
}
return nil
}
return DB(ctx).Transaction(func(tx *gorm.DB) error {
err = tx.Table(tht(id)).Where("id=? and host=?", id, host).Updates(map[string]interface{}{
"status": status,
"stdout": stdout,
"stderr": stderr,
}).Error
if err != nil {
return err
}
if err = tx.Where("id=? and host=?", id, host).Delete(&TaskHostDoing{}).Error; err != nil {
return err
}
return nil
})
}
func RealTimeUpdateOutput(ctx *ctx.Context, id int64, host, stdout, stderr string) error {
return DB(ctx).Transaction(func(tx *gorm.DB) error {
err := tx.Table(tht(id)).Where("id=? and host=?", id, host).Updates(map[string]interface{}{
"stdout": stdout,
"stderr": stderr,
}).Error
if err != nil {
return err
}
return nil
})
}
func CacheMarkDone(ctx *ctx.Context, taskHost TaskHost) error {
if err := ctx.Redis.HDel(ctx.Ctx, IBEX_HOST_DOING, hostDoingCacheKey(taskHost.Id, taskHost.Host)).Err(); err != nil {
return err
}
TaskHostCachePush(taskHost)
return nil
}
func WaitingHostList(ctx *ctx.Context, id int64, limit ...int) ([]TaskHost, error) {
var hosts []TaskHost
session := DB(ctx).Table(tht(id)).Where("id = ? and status = 'waiting'", id).Order("ii")
if len(limit) > 0 {
session = session.Limit(limit[0])
}
err := session.Find(&hosts).Error
return hosts, err
}
func WaitingHostCount(ctx *ctx.Context, id int64) (int64, error) {
return TableRecordCount(ctx, tht(id), "id=? and status='waiting'", id)
}
func UnexpectedHostCount(ctx *ctx.Context, id int64) (int64, error) {
return TableRecordCount(ctx, tht(id), "id=? and status in ('failed', 'timeout', 'killfailed')", id)
}
func IngStatusHostCount(ctx *ctx.Context, id int64) (int64, error) {
return TableRecordCount(ctx, tht(id), "id=? and status in ('waiting', 'running', 'killing')", id)
}
func RunWaitingHosts(ctx *ctx.Context, taskHosts []TaskHost) error {
count := len(taskHosts)
if count == 0 {
return nil
}
now := time.Now().Unix()
return DB(ctx).Transaction(func(tx *gorm.DB) error {
for i := 0; i < count; i++ {
if err := tx.Table(tht(taskHosts[i].Id)).Where("id=? and host=?", taskHosts[i].Id, taskHosts[i].Host).Update("status", "running").Error; err != nil {
return err
}
err := tx.Create(&TaskHostDoing{Id: taskHosts[i].Id, Host: taskHosts[i].Host, Clock: now, Action: "start"}).Error
if err != nil {
return err
}
}
return nil
})
}
func TaskHostStatus(ctx *ctx.Context, id int64) ([]TaskHost, error) {
var ret []TaskHost
err := DB(ctx).Table(tht(id)).Select("id", "host", "status").Where("id=?", id).Order("ii").Find(&ret).Error
return ret, err
}
func TaskHostGets(ctx *ctx.Context, id int64) ([]TaskHost, error) {
var ret []TaskHost
err := DB(ctx).Table(tht(id)).Where("id=?", id).Order("ii").Find(&ret).Error
return ret, err
}
var (
taskHostCache = make([]TaskHost, 0, 128)
taskHostLock sync.RWMutex
)
func TaskHostCachePush(taskHost TaskHost) {
taskHostLock.Lock()
defer taskHostLock.Unlock()
taskHostCache = append(taskHostCache, taskHost)
}
func TaskHostCachePopAll() []TaskHost {
taskHostLock.Lock()
defer taskHostLock.Unlock()
all := taskHostCache
taskHostCache = make([]TaskHost, 0, 128)
return all
}
func ReportCacheResult(ctx *ctx.Context) error {
result := TaskHostCachePopAll()
reports := make([]TaskHost, 0)
for _, th := range result {
// id大于redis初始id说明是edge与center失联时本地告警规则触发的自愈脚本生成的id
// 为了防止不同边缘机房生成的脚本任务id相同不上报结果至数据库
if th.Id >= storage.IDINITIAL {
logger.Infof("task[%d] host[%s] done, result:[%v]", th.Id, th.Host, th)
} else {
reports = append(reports, th)
}
}
if len(reports) == 0 {
return nil
}
errs, err := TaskHostUpserts(ctx, reports)
if err != nil {
return err
}
for key, err := range errs {
logger.Warningf("report task_host_cache[%s] result error: %v", key, err)
}
return nil
}

View File

@@ -0,0 +1,65 @@
package models
import (
"encoding/json"
"fmt"
"sync"
)
type TaskHostDoing struct {
Id int64 `gorm:"column:id;index"`
Host string `gorm:"column:host;size:128;not null;index"`
Clock int64 `gorm:"column:clock;not null;default:0"`
Action string `gorm:"column:action;size:16;not null"`
AlertTriggered bool `gorm:"-"`
}
func (TaskHostDoing) TableName() string {
return "task_host_doing"
}
func (doing *TaskHostDoing) MarshalBinary() ([]byte, error) {
return json.Marshal(doing)
}
func (doing *TaskHostDoing) UnmarshalBinary(data []byte) error {
return json.Unmarshal(data, doing)
}
func hostDoingCacheKey(id int64, host string) string {
return fmt.Sprintf("%s:%d", host, id)
}
var (
doingLock sync.RWMutex
doingMaps map[string][]TaskHostDoing
)
func SetDoingCache(v map[string][]TaskHostDoing) {
doingLock.Lock()
doingMaps = v
doingLock.Unlock()
}
func GetDoingCache(host string) []TaskHostDoing {
doingLock.RLock()
defer doingLock.RUnlock()
return doingMaps[host]
}
func CheckExistAndEdgeAlertTriggered(host string, id int64) (exist, isAlertTriggered bool) {
doingLock.RLock()
defer doingLock.RUnlock()
doings := doingMaps[host]
for _, doing := range doings {
if doing.Id == id {
exist = true
isAlertTriggered = doing.AlertTriggered
return
}
}
return false, false
}

364
models/ibex_task_meta.go Normal file
View File

@@ -0,0 +1,364 @@
package models
import (
"context"
"encoding/json"
"fmt"
"strings"
"time"
"github.com/ccfos/nightingale/v6/ibex/server/config"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/poster"
"github.com/ccfos/nightingale/v6/storage"
"github.com/toolkits/pkg/str"
"gorm.io/gorm"
)
type TaskMeta struct {
Id int64 `gorm:"column:id;primaryKey;autoIncrement" json:"id"`
Title string `gorm:"column:title;size:255;not null;default:''" json:"title"`
Account string `gorm:"column:account;size:64;not null" json:"account"`
Batch int `gorm:"column:batch;not null;default:0" json:"batch"`
Tolerance int `gorm:"column:tolerance;not null;default:0" json:"tolerance"`
Timeout int `gorm:"column:timeout;not null;default:0" json:"timeout"`
Pause string `gorm:"column:pause;size:255;not null;default:''" json:"pause"`
Script string `gorm:"column:script;type:text;not null" json:"script"`
Args string `gorm:"column:args;size:512;not null;default:''" json:"args"`
Stdin string `gorm:"column:stdin;size:1024;not null;default:''" json:"stdin"`
Creator string `gorm:"column:creator;size:64;not null;default:'';index" json:"creator"`
Created time.Time `gorm:"column:created;not null;default:CURRENT_TIMESTAMP;type:timestamp;index" json:"created"`
Done bool `json:"done" gorm:"-"`
}
func (TaskMeta) TableName() string {
return "task_meta"
}
func (taskMeta *TaskMeta) MarshalBinary() ([]byte, error) {
return json.Marshal(taskMeta)
}
func (taskMeta *TaskMeta) UnmarshalBinary(data []byte) error {
return json.Unmarshal(data, taskMeta)
}
func (taskMeta *TaskMeta) Create(ctx *ctx.Context) error {
if config.C.IsCenter {
return DB(ctx).Create(taskMeta).Error
}
id, err := poster.PostByUrlsWithResp[int64](ctx, "/ibex/v1/task/meta", taskMeta)
if err == nil {
taskMeta.Id = id
}
return err
}
func taskMetaCacheKey(id int64) string {
return fmt.Sprintf("task:meta:%d", id)
}
func TaskMetaGet(ctx *ctx.Context, where string, args ...interface{}) (*TaskMeta, error) {
lst, err := TableRecordGets[[]*TaskMeta](ctx, TaskMeta{}.TableName(), where, args...)
if err != nil {
return nil, err
}
if len(lst) == 0 {
return nil, nil
}
return lst[0], nil
}
// TaskMetaGet 根据ID获取任务元信息会用到缓存
func TaskMetaGetByID(ctx *ctx.Context, id int64) (*TaskMeta, error) {
meta, err := TaskMetaCacheGet(ctx, id)
if err == nil {
return meta, nil
}
meta, err = TaskMetaGet(ctx, "id=?", id)
if err != nil {
return nil, err
}
if meta == nil {
return nil, nil
}
_, err = ctx.Redis.Set(context.Background(), taskMetaCacheKey(id), meta, storage.DEFAULT).Result()
return meta, err
}
func TaskMetaCacheGet(ctx *ctx.Context, id int64) (*TaskMeta, error) {
res := ctx.Redis.Get(context.Background(), taskMetaCacheKey(id))
meta := new(TaskMeta)
err := res.Scan(meta)
return meta, err
}
func (m *TaskMeta) CleanFields() error {
if m.Batch < 0 {
return fmt.Errorf("arg(batch) should be nonnegative")
}
if m.Tolerance < 0 {
return fmt.Errorf("arg(tolerance) should be nonnegative")
}
if m.Timeout < 0 {
return fmt.Errorf("arg(timeout) should be nonnegative")
}
if m.Timeout > 3600*24*5 {
return fmt.Errorf("arg(timeout) longer than five days")
}
if m.Timeout == 0 {
m.Timeout = 30
}
m.Pause = strings.Replace(m.Pause, "", ",", -1)
m.Pause = strings.Replace(m.Pause, " ", "", -1)
m.Args = strings.Replace(m.Args, "", ",", -1)
if m.Title == "" {
return fmt.Errorf("arg(title) is required")
}
if str.Dangerous(m.Title) {
return fmt.Errorf("arg(title) is dangerous")
}
if m.Script == "" {
return fmt.Errorf("arg(script) is required")
}
if str.Dangerous(m.Args) {
return fmt.Errorf("arg(args) is dangerous")
}
if str.Dangerous(m.Pause) {
return fmt.Errorf("arg(pause) is dangerous")
}
return nil
}
func (m *TaskMeta) HandleFH(fh string) {
i := strings.Index(m.Title, " FH: ")
if i > 0 {
m.Title = m.Title[:i]
}
m.Title = m.Title + " FH: " + fh
}
func (taskMeta *TaskMeta) Cache(ctx *ctx.Context, host string) error {
tx := ctx.Redis.TxPipeline()
tx.Set(ctx.Ctx, taskMetaCacheKey(taskMeta.Id), taskMeta, storage.DEFAULT)
tx.HSet(ctx.Ctx, IBEX_HOST_DOING, hostDoingCacheKey(taskMeta.Id, host), &TaskHostDoing{
Id: taskMeta.Id,
Host: host,
Clock: time.Now().Unix(),
Action: "start",
})
_, err := tx.Exec(ctx.Ctx)
return err
}
func (taskMeta *TaskMeta) Save(ctx *ctx.Context, hosts []string, action string) error {
return DB(ctx).Transaction(func(tx *gorm.DB) error {
if err := tx.Create(taskMeta).Error; err != nil {
return err
}
id := taskMeta.Id
if err := tx.Create(&TaskScheduler{Id: id}).Error; err != nil {
return err
}
if err := tx.Create(&TaskAction{Id: id, Action: action, Clock: time.Now().Unix()}).Error; err != nil {
return err
}
for i := 0; i < len(hosts); i++ {
host := strings.TrimSpace(hosts[i])
if host == "" {
continue
}
err := tx.Exec("INSERT INTO "+tht(id)+" (id, host, status) VALUES (?, ?, ?)", id, host, "waiting").Error
if err != nil {
return err
}
}
return nil
})
}
func (m *TaskMeta) Action(ctx *ctx.Context) (*TaskAction, error) {
return TaskActionGet(ctx, "id=?", m.Id)
}
func (m *TaskMeta) Hosts(ctx *ctx.Context) ([]TaskHost, error) {
var ret []TaskHost
err := DB(ctx).Table(tht(m.Id)).Where("id=?", m.Id).Select("id", "host", "status").Order("ii").Find(&ret).Error
return ret, err
}
func (m *TaskMeta) KillHost(ctx *ctx.Context, host string) error {
bean, err := TaskHostGet(ctx, m.Id, host)
if err != nil {
return err
}
if bean == nil {
return fmt.Errorf("no such host")
}
if !(bean.Status == "running" || bean.Status == "timeout") {
return fmt.Errorf("current status cannot kill")
}
if err := redoHost(ctx, m.Id, host, "kill"); err != nil {
return err
}
return statusSet(ctx, m.Id, host, "killing")
}
func (m *TaskMeta) IgnoreHost(ctx *ctx.Context, host string) error {
return statusSet(ctx, m.Id, host, "ignored")
}
func (m *TaskMeta) RedoHost(ctx *ctx.Context, host string) error {
bean, err := TaskHostGet(ctx, m.Id, host)
if err != nil {
return err
}
if bean == nil {
return fmt.Errorf("no such host")
}
if err := redoHost(ctx, m.Id, host, "start"); err != nil {
return err
}
return statusSet(ctx, m.Id, host, "running")
}
func statusSet(ctx *ctx.Context, id int64, host, status string) error {
return DB(ctx).Table(tht(id)).Where("id=? and host=?", id, host).Update("status", status).Error
}
func redoHost(ctx *ctx.Context, id int64, host, action string) error {
count, err := IbexCount(DB(ctx).Model(&TaskHostDoing{}).Where("id=? and host=?", id, host))
if err != nil {
return err
}
now := time.Now().Unix()
if count == 0 {
err = DB(ctx).Table("task_host_doing").Create(map[string]interface{}{
"id": id,
"host": host,
"clock": now,
"action": action,
}).Error
} else {
err = DB(ctx).Table("task_host_doing").Where("id=? and host=? and action <> ?", id, host, action).Updates(map[string]interface{}{
"clock": now,
"action": action,
}).Error
}
return err
}
func (m *TaskMeta) HostStrs(ctx *ctx.Context) ([]string, error) {
var ret []string
err := DB(ctx).Table(tht(m.Id)).Where("id=?", m.Id).Order("ii").Pluck("host", &ret).Error
return ret, err
}
func (m *TaskMeta) Stdouts(ctx *ctx.Context) ([]TaskHost, error) {
var ret []TaskHost
err := DB(ctx).Table(tht(m.Id)).Where("id=?", m.Id).Select("id", "host", "status", "stdout").Order("ii").Find(&ret).Error
return ret, err
}
func (m *TaskMeta) Stderrs(ctx *ctx.Context) ([]TaskHost, error) {
var ret []TaskHost
err := DB(ctx).Table(tht(m.Id)).Where("id=?", m.Id).Select("id", "host", "status", "stderr").Order("ii").Find(&ret).Error
return ret, err
}
func TaskMetaTotal(ctx *ctx.Context, creator, query string, before time.Time) (int64, error) {
session := DB(ctx).Model(&TaskMeta{})
session = session.Where("created > '" + before.Format("2006-01-02 15:04:05") + "'")
if creator != "" {
session = session.Where("creator = ?", creator)
}
if query != "" {
// q1 q2 -q3
arr := strings.Fields(query)
for i := 0; i < len(arr); i++ {
if arr[i] == "" {
continue
}
if strings.HasPrefix(arr[i], "-") {
q := "%" + arr[i][1:] + "%"
session = session.Where("title not like ?", q)
} else {
q := "%" + arr[i] + "%"
session = session.Where("title like ?", q)
}
}
}
return IbexCount(session)
}
func TaskMetaGets(ctx *ctx.Context, creator, query string, before time.Time, limit, offset int) ([]TaskMeta, error) {
session := DB(ctx).Model(&TaskMeta{}).Order("created desc").Limit(limit).Offset(offset)
session = session.Where("created > '" + before.Format("2006-01-02 15:04:05") + "'")
if creator != "" {
session = session.Where("creator = ?", creator)
}
if query != "" {
// q1 q2 -q3
arr := strings.Fields(query)
for i := 0; i < len(arr); i++ {
if arr[i] == "" {
continue
}
if strings.HasPrefix(arr[i], "-") {
q := "%" + arr[i][1:] + "%"
session = session.Where("title not like ?", q)
} else {
q := "%" + arr[i] + "%"
session = session.Where("title like ?", q)
}
}
}
var objs []TaskMeta
err := session.Find(&objs).Error
return objs, err
}

View File

@@ -0,0 +1,47 @@
package models
import (
"github.com/ccfos/nightingale/v6/pkg/ctx"
"gorm.io/gorm"
)
type TaskScheduler struct {
Id int64 `gorm:"column:id;primaryKey"`
Scheduler string `gorm:"column:scheduler;size:128;not null;default:''"`
}
func (TaskScheduler) TableName() string {
return "task_scheduler"
}
func TasksOfScheduler(ctx *ctx.Context, scheduler string) ([]int64, error) {
var ids []int64
err := DB(ctx).Model(&TaskScheduler{}).Where("scheduler = ?", scheduler).Pluck("id", &ids).Error
return ids, err
}
func TakeOverTask(ctx *ctx.Context, id int64, pre, current string) (bool, error) {
ret := DB(ctx).Model(&TaskScheduler{}).Where("id = ? and scheduler = ?", id, pre).Update("scheduler", current)
if ret.Error != nil {
return false, ret.Error
}
return ret.RowsAffected > 0, nil
}
func OrphanTaskIds(ctx *ctx.Context) ([]int64, error) {
var ids []int64
err := DB(ctx).Model(&TaskScheduler{}).Where("scheduler = ''").Pluck("id", &ids).Error
return ids, err
}
func CleanDoneTask(ctx *ctx.Context, id int64) error {
return DB(ctx).Transaction(func(tx *gorm.DB) error {
if err := tx.Where("id = ?", id).Delete(&TaskScheduler{}).Error; err != nil {
return err
}
return tx.Where("id = ?", id).Delete(&TaskAction{}).Error
})
}

View File

@@ -0,0 +1,47 @@
package models
import (
"github.com/ccfos/nightingale/v6/pkg/ctx"
"time"
)
type TaskSchedulerHealth struct {
Scheduler string `gorm:"column:scheduler;uniqueIndex;size:128;not null"`
Clock int64 `gorm:"column:clock;not null;index"`
}
func (TaskSchedulerHealth) TableName() string {
return "task_scheduler_health"
}
func TaskSchedulerHeartbeat(ctx *ctx.Context, scheduler string) error {
var cnt int64
err := DB(ctx).Model(&TaskSchedulerHealth{}).Where("scheduler = ?", scheduler).Count(&cnt).Error
if err != nil {
return err
}
if cnt == 0 {
ret := DB(ctx).Create(&TaskSchedulerHealth{
Scheduler: scheduler,
Clock: time.Now().Unix(),
})
err = ret.Error
} else {
err = DB(ctx).Model(&TaskSchedulerHealth{}).Where("scheduler = ?", scheduler).Update("clock", time.Now().Unix()).Error
}
return err
}
func DeadTaskSchedulers(ctx *ctx.Context) ([]string, error) {
clock := time.Now().Unix() - 10
var arr []string
err := DB(ctx).Model(&TaskSchedulerHealth{}).Where("clock < ?", clock).Pluck("scheduler", &arr).Error
return arr, err
}
func DelDeadTaskScheduler(ctx *ctx.Context, scheduler string) error {
return DB(ctx).Where("scheduler = ?", scheduler).Delete(&TaskSchedulerHealth{}).Error
}

View File

@@ -2,11 +2,9 @@ package migrate
import (
"fmt"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ormx"
imodels "github.com/flashcatcloud/ibex/src/models"
"github.com/toolkits/pkg/logger"
"gorm.io/driver/mysql"
"gorm.io/gorm"
@@ -28,7 +26,7 @@ func MigrateIbexTables(db *gorm.DB) {
db = db.Set("gorm:table_options", tableOptions)
}
dts := []interface{}{&imodels.TaskMeta{}, &imodels.TaskScheduler{}, &imodels.TaskSchedulerHealth{}, &imodels.TaskHostDoing{}, &imodels.TaskAction{}}
dts := []interface{}{&models.TaskMeta{}, &models.TaskScheduler{}, &models.TaskSchedulerHealth{}, &models.TaskHostDoing{}, &models.TaskAction{}}
for _, dt := range dts {
err := db.AutoMigrate(dt)
if err != nil {
@@ -38,7 +36,7 @@ func MigrateIbexTables(db *gorm.DB) {
for i := 0; i < 100; i++ {
tableName := fmt.Sprintf("task_host_%d", i)
err := db.Table(tableName).AutoMigrate(&imodels.TaskHost{})
err := db.Table(tableName).AutoMigrate(&models.TaskHost{})
if err != nil {
logger.Errorf("failed to migrate table:%s %v", tableName, err)
}
@@ -58,8 +56,7 @@ func MigrateTables(db *gorm.DB) error {
dts := []interface{}{&RecordingRule{}, &AlertRule{}, &AlertSubscribe{}, &AlertMute{},
&TaskRecord{}, &ChartShare{}, &Target{}, &Configs{}, &Datasource{}, &NotifyTpl{},
&Board{}, &BoardBusigroup{}, &Users{}, &SsoConfig{}, &models.BuiltinMetric{},
&models.MetricFilter{}, &models.BuiltinComponent{}, &models.NotificaitonRecord{},
&models.TargetBusiGroup{}}
&models.MetricFilter{}, &models.BuiltinComponent{}, &models.NotificaitonRecord{}}
if !columnHasIndex(db, &AlertHisEvent{}, "original_tags") ||
!columnHasIndex(db, &AlertCurEvent{}, "original_tags") {
@@ -89,7 +86,7 @@ func MigrateTables(db *gorm.DB) error {
for _, dt := range dts {
err := db.AutoMigrate(dt)
if err != nil {
logger.Errorf("failed to migrate table:%v %v", dt, err)
logger.Errorf("failed to migrate table: %v", err)
}
}
@@ -277,6 +274,5 @@ type SsoConfig struct {
}
type BuiltinPayloads struct {
UUID int64 `json:"uuid" gorm:"type:bigint;not null;index:idx_uuid;comment:'uuid of payload'"`
ComponentID int64 `json:"component_id" gorm:"type:bigint;index:idx_component,sort:asc;not null;default:0;comment:'component_id of payload'"`
UUID int64 `json:"uuid" gorm:"type:bigint;not null;index:idx_uuid;comment:'uuid of payload'"`
}

View File

@@ -1,14 +1,12 @@
package models
import (
"log"
"sort"
"strings"
"time"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/poster"
"golang.org/x/exp/slices"
"github.com/pkg/errors"
"github.com/toolkits/pkg/container/set"
@@ -16,12 +14,10 @@ import (
"gorm.io/gorm"
)
type TargetDeleteHookFunc func(ctx *ctx.Context, idents []string) error
type Target struct {
Id int64 `json:"id" gorm:"primaryKey"`
GroupId int64 `json:"group_id"`
GroupObjs []*BusiGroup `json:"group_objs" gorm:"-"`
GroupObj *BusiGroup `json:"group_obj" gorm:"-"`
Ident string `json:"ident"`
Note string `json:"note"`
Tags string `json:"-"` // user tags
@@ -42,7 +38,6 @@ type Target struct {
CpuUtil float64 `json:"cpu_util" gorm:"-"`
Arch string `json:"arch" gorm:"-"`
RemoteAddr string `json:"remote_addr" gorm:"-"`
GroupIds []int64 `json:"group_ids" gorm:"-"`
}
func (t *Target) TableName() string {
@@ -50,49 +45,26 @@ func (t *Target) TableName() string {
}
func (t *Target) FillGroup(ctx *ctx.Context, cache map[int64]*BusiGroup) error {
var err error
if len(t.GroupIds) == 0 {
t.GroupIds, err = TargetGroupIdsGetByIdent(ctx, t.Ident)
if err != nil {
return errors.WithMessage(err, "failed to get target gids")
}
t.GroupObjs = make([]*BusiGroup, 0, len(t.GroupIds))
if t.GroupId <= 0 {
return nil
}
for _, gid := range t.GroupIds {
bg, has := cache[gid]
if has && bg != nil {
t.GroupObjs = append(t.GroupObjs, bg)
continue
}
bg, err := BusiGroupGetById(ctx, gid)
if err != nil {
return errors.WithMessage(err, "failed to get busi group")
}
if bg == nil {
continue
}
t.GroupObjs = append(t.GroupObjs, bg)
cache[gid] = bg
bg, has := cache[t.GroupId]
if has {
t.GroupObj = bg
return nil
}
bg, err := BusiGroupGetById(ctx, t.GroupId)
if err != nil {
return errors.WithMessage(err, "failed to get busi group")
}
t.GroupObj = bg
cache[t.GroupId] = bg
return nil
}
func (t *Target) MatchGroupId(gid ...int64) bool {
for _, tgId := range t.GroupIds {
for _, id := range gid {
if tgId == id {
return true
}
}
}
return false
}
func (t *Target) AfterFind(tx *gorm.DB) (err error) {
delta := time.Now().Unix() - t.UpdateAt
if delta < 60 {
@@ -119,39 +91,19 @@ func TargetStatistics(ctx *ctx.Context) (*Statistics, error) {
return stats[0], nil
}
func TargetDel(ctx *ctx.Context, idents []string, deleteHook TargetDeleteHookFunc) error {
func TargetDel(ctx *ctx.Context, idents []string) error {
if len(idents) == 0 {
panic("idents empty")
}
return DB(ctx).Transaction(func(tx *gorm.DB) error {
txErr := tx.Where("ident in ?", idents).Delete(new(Target)).Error
if txErr != nil {
return txErr
}
txErr = deleteHook(ctx, idents)
if txErr != nil {
return txErr
}
return nil
})
return DB(ctx).Where("ident in ?", idents).Delete(new(Target)).Error
}
type BuildTargetWhereOption func(session *gorm.DB) *gorm.DB
func BuildTargetWhereWithBgids(bgids []int64) BuildTargetWhereOption {
return func(session *gorm.DB) *gorm.DB {
if len(bgids) == 1 && bgids[0] == 0 {
session = session.Joins("left join target_busi_group on target.ident = " +
"target_busi_group.target_ident").Where("target_busi_group.target_ident is null")
} else if len(bgids) > 0 {
if slices.Contains(bgids, 0) {
session = session.Joins("left join target_busi_group on target.ident = target_busi_group.target_ident").
Where("target_busi_group.target_ident is null OR target_busi_group.group_id in (?)", bgids)
} else {
session = session.Joins("join target_busi_group on target.ident = "+
"target_busi_group.target_ident").Where("target_busi_group.group_id in (?)", bgids)
}
if len(bgids) > 0 {
session = session.Where("group_id in (?)", bgids)
}
return session
}
@@ -166,22 +118,13 @@ func BuildTargetWhereWithDsIds(dsIds []int64) BuildTargetWhereOption {
}
}
func BuildTargetWhereWithHosts(hosts []string) BuildTargetWhereOption {
return func(session *gorm.DB) *gorm.DB {
if len(hosts) > 0 {
session = session.Where("ident in (?) or host_ip in (?)", hosts, hosts)
}
return session
}
}
func BuildTargetWhereWithQuery(query string) BuildTargetWhereOption {
return func(session *gorm.DB) *gorm.DB {
if query != "" {
arr := strings.Fields(query)
for i := 0; i < len(arr); i++ {
q := "%" + arr[i] + "%"
session = session.Where("ident like ? or host_ip like ? or note like ? or tags like ? or host_tags like ? or os like ?", q, q, q, q, q, q)
session = session.Where("ident like ? or note like ? or tags like ? or host_tags like ? or os like ?", q, q, q, q, q)
}
}
return session
@@ -191,18 +134,18 @@ func BuildTargetWhereWithQuery(query string) BuildTargetWhereOption {
func BuildTargetWhereWithDowntime(downtime int64) BuildTargetWhereOption {
return func(session *gorm.DB) *gorm.DB {
if downtime > 0 {
session = session.Where("target.update_at < ?", time.Now().Unix()-downtime)
session = session.Where("update_at < ?", time.Now().Unix()-downtime)
}
return session
}
}
func buildTargetWhere(ctx *ctx.Context, options ...BuildTargetWhereOption) *gorm.DB {
sub := DB(ctx).Model(&Target{}).Distinct("target.ident")
session := DB(ctx).Model(&Target{})
for _, opt := range options {
sub = opt(sub)
session = opt(session)
}
return DB(ctx).Model(&Target{}).Where("ident in (?)", sub)
return session
}
func TargetTotal(ctx *ctx.Context, options ...BuildTargetWhereOption) (int64, error) {
@@ -260,18 +203,15 @@ func MissTargetCountByFilter(ctx *ctx.Context, query []map[string]interface{}, t
}
func TargetFilterQueryBuild(ctx *ctx.Context, query []map[string]interface{}, limit, offset int) *gorm.DB {
sub := DB(ctx).Model(&Target{}).Distinct("target.ident").Joins("left join " +
"target_busi_group on target.ident = target_busi_group.target_ident")
session := DB(ctx).Model(&Target{})
for _, q := range query {
tx := DB(ctx).Model(&Target{})
for k, v := range q {
tx = tx.Or(k, v)
}
sub = sub.Where(tx)
session = session.Where(tx)
}
session := DB(ctx).Model(&Target{}).Where("ident in (?)", sub)
if limit > 0 {
session = session.Limit(limit).Offset(offset)
}
@@ -287,20 +227,9 @@ func TargetGetsAll(ctx *ctx.Context) ([]*Target, error) {
var lst []*Target
err := DB(ctx).Model(&Target{}).Find(&lst).Error
if err != nil {
return lst, err
}
tgs, err := TargetBusiGroupsGetAll(ctx)
if err != nil {
return lst, err
}
for i := 0; i < len(lst); i++ {
lst[i].FillTagsMap()
lst[i].GroupIds = tgs[lst[i].Ident]
}
return lst, err
}
@@ -558,73 +487,3 @@ func IdentsFilter(ctx *ctx.Context, idents []string, where string, args ...inter
func (m *Target) UpdateFieldsMap(ctx *ctx.Context, fields map[string]interface{}) error {
return DB(ctx).Model(m).Updates(fields).Error
}
func MigrateBg(ctx *ctx.Context, bgLabelKey string) {
// 1. 判断是否已经完成迁移
var maxGroupId int64
if err := DB(ctx).Model(&Target{}).Select("MAX(group_id)").Scan(&maxGroupId).Error; err != nil {
log.Println("failed to get max group_id from target table, err:", err)
return
}
if maxGroupId == 0 {
log.Println("migration bgid has been completed.")
return
}
err := DoMigrateBg(ctx, bgLabelKey)
if err != nil {
log.Println("failed to migrate bgid, err:", err)
return
}
log.Println("migration bgid has been completed")
}
func DoMigrateBg(ctx *ctx.Context, bgLabelKey string) error {
// 2. 获取全量 target
targets, err := TargetGetsAll(ctx)
if err != nil {
return err
}
// 3. 获取全量 busi_group
bgs, err := BusiGroupGetAll(ctx)
if err != nil {
return err
}
bgById := make(map[int64]*BusiGroup, len(bgs))
for _, bg := range bgs {
bgById[bg.Id] = bg
}
// 4. 如果某 busi_group 有 label将其存至对应的 target tags 中
for _, t := range targets {
if t.GroupId == 0 {
continue
}
err := DB(ctx).Transaction(func(tx *gorm.DB) error {
// 4.1 将 group_id 迁移至关联表
if err := TargetBindBgids(ctx, []string{t.Ident}, []int64{t.GroupId}); err != nil {
return err
}
if err := TargetUpdateBgid(ctx, []string{t.Ident}, 0, false); err != nil {
return err
}
// 4.2 判断该机器是否需要新增 tag
if bg, ok := bgById[t.GroupId]; !ok || bg.LabelEnable == 0 ||
strings.Contains(t.Tags, bgLabelKey+"=") {
return nil
} else {
return t.AddTags(ctx, []string{bgLabelKey + "=" + bg.LabelValue})
}
})
if err != nil {
log.Printf("failed to migrate %v bg, err: %v\n", t.Ident, err)
continue
}
}
return nil
}

View File

@@ -1,158 +0,0 @@
package models
import (
"time"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"gorm.io/gorm"
"gorm.io/gorm/clause"
)
type TargetBusiGroup struct {
Id int64 `json:"id" gorm:"primaryKey;type:bigint;autoIncrement"`
TargetIdent string `json:"target_ident" gorm:"type:varchar(191);not null;index:idx_target_group,unique,priority:1"`
GroupId int64 `json:"group_id" gorm:"type:bigint;not null;index:idx_target_group,unique,priority:2"`
UpdateAt int64 `json:"update_at" gorm:"type:bigint;not null"`
}
func (t *TargetBusiGroup) TableName() string {
return "target_busi_group"
}
func TargetBusiGroupsGetAll(ctx *ctx.Context) (map[string][]int64, error) {
var lst []*TargetBusiGroup
err := DB(ctx).Find(&lst).Error
if err != nil {
return nil, err
}
tgs := make(map[string][]int64)
for _, tg := range lst {
tgs[tg.TargetIdent] = append(tgs[tg.TargetIdent], tg.GroupId)
}
return tgs, nil
}
func TargetGroupIdsGetByIdent(ctx *ctx.Context, ident string) ([]int64, error) {
var lst []*TargetBusiGroup
err := DB(ctx).Where("target_ident = ?", ident).Find(&lst).Error
if err != nil {
return nil, err
}
groupIds := make([]int64, 0, len(lst))
for _, tg := range lst {
groupIds = append(groupIds, tg.GroupId)
}
return groupIds, nil
}
func TargetGroupIdsGetByIdents(ctx *ctx.Context, idents []string) ([]int64, error) {
var groupIds []int64
err := DB(ctx).Model(&TargetBusiGroup{}).
Where("target_ident IN ?", idents).
Distinct().
Pluck("group_id", &groupIds).
Error
if err != nil {
return nil, err
}
return groupIds, nil
}
func TargetBindBgids(ctx *ctx.Context, idents []string, bgids []int64) error {
lst := make([]TargetBusiGroup, 0, len(bgids)*len(idents))
updateAt := time.Now().Unix()
for _, bgid := range bgids {
for _, ident := range idents {
cur := TargetBusiGroup{
TargetIdent: ident,
GroupId: bgid,
UpdateAt: updateAt,
}
lst = append(lst, cur)
}
}
var cl clause.Expression = clause.Insert{Modifier: "ignore"}
switch DB(ctx).Dialector.Name() {
case "sqlite":
cl = clause.Insert{Modifier: "or ignore"}
case "postgres":
cl = clause.OnConflict{DoNothing: true}
}
return DB(ctx).Clauses(cl).CreateInBatches(&lst, 10).Error
}
func TargetUnbindBgids(ctx *ctx.Context, idents []string, bgids []int64) error {
return DB(ctx).Where("target_ident in ? and group_id in ?",
idents, bgids).Delete(&TargetBusiGroup{}).Error
}
func TargetDeleteBgids(ctx *ctx.Context, idents []string) error {
return DB(ctx).Where("target_ident in ?", idents).Delete(&TargetBusiGroup{}).Error
}
func TargetOverrideBgids(ctx *ctx.Context, idents []string, bgids []int64) error {
return DB(ctx).Transaction(func(tx *gorm.DB) error {
// 先删除旧的关联
if err := tx.Where("target_ident IN ?", idents).Delete(&TargetBusiGroup{}).Error; err != nil {
return err
}
// 准备新的关联数据
lst := make([]TargetBusiGroup, 0, len(bgids)*len(idents))
updateAt := time.Now().Unix()
for _, ident := range idents {
for _, bgid := range bgids {
cur := TargetBusiGroup{
TargetIdent: ident,
GroupId: bgid,
UpdateAt: updateAt,
}
lst = append(lst, cur)
}
}
if len(lst) == 0 {
return nil
}
// 添加新的关联
var cl clause.Expression = clause.Insert{Modifier: "ignore"}
switch tx.Dialector.Name() {
case "sqlite":
cl = clause.Insert{Modifier: "or ignore"}
case "postgres":
cl = clause.OnConflict{DoNothing: true}
}
return tx.Clauses(cl).CreateInBatches(&lst, 10).Error
})
}
func SeparateTargetIdents(ctx *ctx.Context, idents []string) (existing, nonExisting []string, err error) {
existingMap := make(map[string]bool)
// 查询已存在的 idents 并直接填充 map
err = DB(ctx).Model(&TargetBusiGroup{}).
Where("target_ident IN ?", idents).
Distinct().
Pluck("target_ident", &existing).
Error
if err != nil {
return nil, nil, err
}
for _, ident := range existing {
existingMap[ident] = true
}
// 分离不存在的 idents
for _, ident := range idents {
if !existingMap[ident] {
nonExisting = append(nonExisting, ident)
}
}
return
}

View File

@@ -296,34 +296,6 @@ func UserGet(ctx *ctx.Context, where string, args ...interface{}) (*User, error)
return lst[0], nil
}
func UsersGet(ctx *ctx.Context, where string, args ...interface{}) ([]*User, error) {
var lst []*User
err := DB(ctx).Where(where, args...).Find(&lst).Error
if err != nil {
return nil, err
}
for _, user := range lst {
user.RolesLst = strings.Fields(user.Roles)
user.Admin = user.IsAdmin()
}
return lst, nil
}
func UserMapGet(ctx *ctx.Context, where string, args ...interface{}) map[string]*User {
lst, err := UsersGet(ctx, where, args...)
if err != nil {
logger.Errorf("UsersGet err: %v", err)
return nil
}
um := make(map[string]*User, len(lst))
for _, user := range lst {
um[user.Username] = user
}
return um
}
func UserGetByUsername(ctx *ctx.Context, username string) (*User, error) {
return UserGet(ctx, "username=?", username)
}
@@ -732,10 +704,7 @@ func (u *User) NopriIdents(ctx *ctx.Context, idents []string) ([]string, error)
}
var allowedIdents []string
sub := DB(ctx).Model(&Target{}).Distinct("target.ident").
Joins("join target_busi_group on target.ident = target_busi_group.target_ident").
Where("target_busi_group.group_id in (?)", bgids)
err = DB(ctx).Model(&Target{}).Where("ident in (?)", sub).Pluck("ident", &allowedIdents).Error
err = DB(ctx).Model(&Target{}).Where("group_id in ?", bgids).Pluck("ident", &allowedIdents).Error
if err != nil {
return []string{}, err
}
@@ -767,11 +736,7 @@ func (u *User) BusiGroups(ctx *ctx.Context, limit int, query string, all ...bool
return lst, nil
}
t.GroupIds, err = TargetGroupIdsGetByIdent(ctx, t.Ident)
if err != nil {
return nil, err
}
err = DB(ctx).Order("name").Limit(limit).Where("id in ?", t.GroupIds).Find(&lst).Error
err = DB(ctx).Order("name").Limit(limit).Where("id=?", t.GroupId).Find(&lst).Error
}
return lst, err
@@ -803,12 +768,8 @@ func (u *User) BusiGroups(ctx *ctx.Context, limit int, query string, all ...bool
return lst, err
}
t.GroupIds, err = TargetGroupIdsGetByIdent(ctx, t.Ident)
if err != nil {
return nil, err
}
if t != nil && t.MatchGroupId(busiGroupIds...) {
err = DB(ctx).Order("name").Limit(limit).Where("id in ?", t.GroupIds).Find(&lst).Error
if t != nil && slice.ContainsInt64(busiGroupIds, t.GroupId) {
err = DB(ctx).Order("name").Limit(limit).Where("id=?", t.GroupId).Find(&lst).Error
}
}

View File

@@ -2,20 +2,21 @@ package ctx
import (
"context"
"github.com/ccfos/nightingale/v6/storage"
"github.com/ccfos/nightingale/v6/conf"
"gorm.io/gorm"
)
type Context struct {
DB *gorm.DB
Redis storage.Redis
CenterApi conf.CenterApi
Ctx context.Context
IsCenter bool
}
func NewContext(ctx context.Context, db *gorm.DB, isCenter bool, centerApis ...conf.CenterApi) *Context {
func NewContext(ctx context.Context, db *gorm.DB, redis storage.Redis, isCenter bool, centerApis ...conf.CenterApi) *Context {
var api conf.CenterApi
if len(centerApis) > 0 {
api = centerApis[0]
@@ -24,6 +25,7 @@ func NewContext(ctx context.Context, db *gorm.DB, isCenter bool, centerApis ...c
return &Context{
Ctx: ctx,
DB: db,
Redis: redis,
CenterApi: api,
IsCenter: isCenter,
}

View File

@@ -120,7 +120,7 @@ func GinEngine(mode string, cfg Config) *gin.Engine {
return r
}
func Init(cfg Config, handler http.Handler) func() {
func Init(cfg Config, ctx context.Context, handler http.Handler) func() {
addr := fmt.Sprintf("%s:%d", cfg.Host, cfg.Port)
srv := &http.Server{
Addr: addr,
@@ -146,7 +146,7 @@ func Init(cfg Config, handler http.Handler) func() {
}()
return func() {
ctx, cancel := context.WithTimeout(context.Background(), time.Second*time.Duration(cfg.ShutdownTimeout))
ctx, cancel := context.WithTimeout(ctx, time.Second*time.Duration(cfg.ShutdownTimeout))
defer cancel()
srv.SetKeepAlivesEnabled(false)

View File

@@ -104,56 +104,6 @@ var I18N = `
"builtin metric already exists":"内置指标已存在",
"AlertRule already exists":"告警规则已存在",
"This functionality has not been enabled. Please contact the system administrator to activate it.":"此功能尚未启用。请联系系统管理员启用"
},
"ja_JP": {
"Username or password invalid": "ユーザー名またはパスワードが無効です",
"incorrect verification code": "認証コードが正しくありません",
"roles empty": "役割を空にすることはできません",
"Username already exists": "このユーザー名は既に存在します。別のユーザー名を使用してください",
"failed to count user-groups": "データの検証に失敗しました。もう一度お試しください",
"UserGroup already exists": "グループ名は既に存在します。別の名前を使用してください",
"members empty": "メンバーを空にすることはできません",
"At least one team have rw permission": "少なくとも1つのチームに読み書き権限が必要です",
"Failed to create BusiGroup(%s)": "[%s]の作成に失敗しました。もう一度お試しください",
"business group id invalid": "ビジネスグループIDが正しくありません",
"idents empty": "監視対象を空にすることはできません",
"invalid tag(%s)": "タグ[%s]が無効です",
"invalid tagkey(%s): cannot contains . ": "タグキー[%s]にドット(.)を含めることはできません",
"invalid tagkey(%s): cannot contains _ ": "タグキー[%s]にアンダースコア(_)を含めることはできません",
"invalid tagkey(%s)": "タグキー[%s]が無効です",
"duplicate tagkey(%s)": "タグキー(%s)が重複しています",
"name is empty": "名前を空にすることはできません",
"Ident duplicate": "ダッシュボードの一意の識別子が既に存在します",
"No such dashboard": "ダッシュボードが存在しません",
"Name has invalid characters": "名前に無効な文字が含まれています",
"Name is blank": "名前を空白にすることはできません",
"forbidden": "権限がありません",
"builtin alerts is empty, file: %s": "ビルトインアラートテンプレートが空です %s",
"input json is empty": "提出内容を空にすることはできません",
"fields empty": "選択フィールドを空にすることはできません",
"No such AlertRule": "そのようなアラートルールはありません",
"GroupId(%d) invalid": "ビジネスグループIDが無効です",
"No such recording rule": "そのような記録ルールはありません",
"tags is blank": "タグを空白にすることはできません",
"oops... etime(%d) <= btime(%d)": "開始時間は終了時間より大きくすることはできません",
"group_id invalid": "ビジネスグループが無効です",
"No such AlertMute": "そのようなアラートミュートルールはありません",
"rule_id and tags are both blank": "アラートルールとタグを同時に空にすることはできません",
"rule is blank": "ルールを空にすることはできません",
"rule invalid": "ルールが無効です。正しいかどうか確認してください",
"unsupported field: %s": "フィールド %s はサポートされていません",
"arg(batch) should be nonnegative": "batchは負の数にできません",
"arg(tolerance) should be nonnegative": "toleranceは負の数にできません",
"arg(timeout) should be nonnegative": "timeoutは負の数にできません",
"arg(timeout) longer than five days": "timeoutは5日を超えることはできません",
"arg(title) is required": "titleは必須項目です",
"created task.id is zero": "作成されたタスクIDがゼロです",
"invalid ibex address: %s": "ibex %s のアドレスが無効です",
"url path invalid": "URLパスが無効です",
"no such server": "そのようなインスタンスはありません",
"admin role can not be modified": "管理者ロールは変更できません",
"builtin payload already exists": "ビルトインテンプレートは既に存在します",
"This functionality has not been enabled. Please contact the system administrator to activate it.": "この機能はまだ有効になっていません。システム管理者に連絡して有効にしてください"
}
}
`

View File

@@ -43,7 +43,6 @@ type SsoClient struct {
Host string
Port int
BaseDn string
BaseDns []string
BindUser string
BindPass string
SyncAdd bool
@@ -132,8 +131,6 @@ func (s *SsoClient) Reload(cf Config) {
if s.SyncInterval > 0 {
s.Ticker.Reset(s.SyncInterval * time.Second)
}
s.BaseDns = strings.Split(s.BaseDn, "|")
}
func (s *SsoClient) Copy() *SsoClient {
@@ -161,40 +158,27 @@ func (s *SsoClient) LoginCheck(user, pass string) (*ldap.SearchResult, error) {
}
defer conn.Close()
srs, err := lc.ldapReq(conn, lc.AuthFilter, user)
sr, err := lc.ldapReq(conn, lc.AuthFilter, user)
if err != nil {
return nil, fmt.Errorf("ldap.error: ldap search fail: %v", err)
}
var sr *ldap.SearchResult
for i := range srs {
if srs[i] == nil || len(srs[i].Entries) == 0 {
continue
}
// 多个 dn 中,账号的唯一性由 LDAP 保证
if len(srs[i].Entries) > 1 {
return nil, fmt.Errorf("ldap.error: search user(%s), multi entries found", user)
}
sr = srs[i]
if err := conn.Bind(srs[i].Entries[0].DN, pass); err != nil {
return nil, fmt.Errorf("username or password invalid")
}
for _, info := range srs[i].Entries[0].Attributes {
logger.Infof("ldap.info: user(%s) info: %+v", user, info)
}
break
if len(sr.Entries) == 0 {
return nil, fmt.Errorf("username or password invalid")
}
if sr == nil {
if len(sr.Entries) > 1 {
return nil, fmt.Errorf("ldap.error: search user(%s), multi entries found", user)
}
if err := conn.Bind(sr.Entries[0].DN, pass); err != nil {
return nil, fmt.Errorf("username or password invalid")
}
for _, info := range sr.Entries[0].Attributes {
logger.Infof("ldap.info: user(%s) info: %+v", user, info)
}
return sr, nil
}
@@ -234,26 +218,21 @@ func (s *SsoClient) newLdapConn() (*ldap.Conn, error) {
return conn, nil
}
func (s *SsoClient) ldapReq(conn *ldap.Conn, filter string, values ...interface{}) ([]*ldap.SearchResult, error) {
srs := make([]*ldap.SearchResult, 0, len(s.BaseDns))
func (s *SsoClient) ldapReq(conn *ldap.Conn, filter string, values ...interface{}) (*ldap.SearchResult, error) {
searchRequest := ldap.NewSearchRequest(
s.BaseDn, // The base dn to search
ldap.ScopeWholeSubtree, ldap.NeverDerefAliases, 0, 0, false,
fmt.Sprintf(filter, values...), // The filter to apply
s.genLdapAttributeSearchList(), // A list attributes to retrieve
nil,
)
for i := range s.BaseDns {
searchRequest := ldap.NewSearchRequest(
strings.TrimSpace(s.BaseDns[i]), // The base dn to search
ldap.ScopeWholeSubtree, ldap.NeverDerefAliases, 0, 0, false,
fmt.Sprintf(filter, values...), // The filter to apply
s.genLdapAttributeSearchList(), // A list attributes to retrieve
nil,
)
sr, err := conn.Search(searchRequest)
if err != nil {
logger.Errorf("ldap.error: ldap search fail: %v", err)
continue
}
srs = append(srs, sr)
sr, err := conn.Search(searchRequest)
if err != nil {
return nil, fmt.Errorf("ldap.error: ldap search fail: %v", err)
}
return srs, nil
return sr, nil
}
// GetUserRolesAndTeams Gets the roles and teams of the user
@@ -323,7 +302,6 @@ func LdapLogin(ctx *ctx.Context, username, pass string, defaultRoles []string, d
if err != nil {
return nil, err
}
// copy attributes from ldap
ldap.RLock()
attrs := ldap.Attributes

View File

@@ -82,35 +82,29 @@ func (s *SsoClient) UserGetAll() (map[string]*models.User, error) {
}
defer conn.Close()
srs, err := lc.ldapReq(conn, lc.UserFilter)
sr, err := lc.ldapReq(conn, lc.UserFilter)
if err != nil {
return nil, fmt.Errorf("ldap.error: ldap search fail: %v", err)
}
res := make(map[string]*models.User)
res := make(map[string]*models.User, len(sr.Entries))
for _, entry := range sr.Entries {
attrs := lc.Attributes
username := entry.GetAttributeValue(attrs.Username)
nickname := entry.GetAttributeValue(attrs.Nickname)
email := entry.GetAttributeValue(attrs.Email)
phone := entry.GetAttributeValue(attrs.Phone)
for i := range srs {
if srs[i] == nil {
continue
// Gets the roles and teams for this entry
roleTeamMapping := lc.GetUserRolesAndTeams(entry)
if len(roleTeamMapping.Roles) == 0 {
// No role mapping is configured, the configured default role is used
roleTeamMapping.Roles = lc.DefaultRoles
}
for _, entry := range srs[i].Entries {
attrs := lc.Attributes
username := entry.GetAttributeValue(attrs.Username)
nickname := entry.GetAttributeValue(attrs.Nickname)
email := entry.GetAttributeValue(attrs.Email)
phone := entry.GetAttributeValue(attrs.Phone)
user := new(models.User)
user.FullSsoFieldsWithTeams("ldap", username, nickname, phone, email, roleTeamMapping.Roles, roleTeamMapping.Teams)
// Gets the roles and teams for this entry
roleTeamMapping := lc.GetUserRolesAndTeams(entry)
if len(roleTeamMapping.Roles) == 0 {
// No role mapping is configured, the configured default role is used
roleTeamMapping.Roles = lc.DefaultRoles
}
user := new(models.User)
user.FullSsoFieldsWithTeams("ldap", username, nickname, phone, email, roleTeamMapping.Roles, roleTeamMapping.Teams)
res[entry.GetAttributeValue(attrs.Username)] = user
}
res[entry.GetAttributeValue(attrs.Username)] = user
}
return res, nil
@@ -178,20 +172,13 @@ func (s *SsoClient) UserExist(username string) (bool, error) {
}
defer conn.Close()
srs, err := lc.ldapReq(conn, "(&(%s=%s))", lc.Attributes.Username, username)
sr, err := lc.ldapReq(conn, "(&(%s=%s))", lc.Attributes.Username, username)
if err != nil {
return false, err
}
for i := range srs {
if srs[i] == nil {
continue
}
if len(srs[i].Entries) > 0 {
return true, nil
}
if len(sr.Entries) > 0 {
return true, nil
}
return false, nil

Some files were not shown because too many files have changed in this diff Show More