Compare commits

...

1 Commits

Author SHA1 Message Date
Xu Bin
0292779711 refactor: embedded ibex (#2136) 2024-09-03 15:32:35 +08:00
51 changed files with 4155 additions and 52 deletions

View File

@@ -16,6 +16,7 @@ import (
"github.com/ccfos/nightingale/v6/alert/sender"
"github.com/ccfos/nightingale/v6/conf"
"github.com/ccfos/nightingale/v6/dumper"
"github.com/ccfos/nightingale/v6/ibex"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
@@ -26,8 +27,6 @@ import (
"github.com/ccfos/nightingale/v6/pushgw/writer"
"github.com/ccfos/nightingale/v6/storage"
"github.com/ccfos/nightingale/v6/tdengine"
"github.com/flashcatcloud/ibex/src/cmd/ibex"
)
func Initialize(configDir string, cryptoKey string) (func(), error) {
@@ -41,14 +40,14 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
return nil, err
}
ctx := ctx.NewContext(context.Background(), nil, false, config.CenterApi)
var redis storage.Redis
redis, err = storage.NewRedis(config.Redis)
if err != nil {
return nil, err
}
ctx := ctx.NewContext(context.Background(), nil, redis, false, config.CenterApi)
syncStats := memsto.NewSyncStats()
alertStats := astats.NewSyncStats()
@@ -74,13 +73,13 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
rt := router.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors)
if config.Ibex.Enable {
ibex.ServerStart(false, nil, redis, config.HTTP.APIForService.BasicAuth, config.Alert.Heartbeat, &config.CenterApi, r, nil, config.Ibex, config.HTTP.Port)
ibex.ServerStart(ctx, false, nil, redis, config.HTTP.APIForService.BasicAuth, config.Alert.Heartbeat, &config.CenterApi, r, nil, config.Ibex, config.HTTP.Port)
}
rt.Config(r)
dumper.ConfigRouter(r)
httpClean := httpx.Init(config.HTTP, r)
httpClean := httpx.Init(config.HTTP, context.Background(), r)
return func() {
logxClean()

View File

@@ -12,8 +12,7 @@ import (
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
imodels "github.com/flashcatcloud/ibex/src/models"
"github.com/flashcatcloud/ibex/src/storage"
"github.com/ccfos/nightingale/v6/storage"
"github.com/toolkits/pkg/logger"
)
@@ -43,7 +42,7 @@ func (c *IbexCallBacker) CallBack(ctx CallBackContext) {
}
func (c *IbexCallBacker) handleIbex(ctx *ctx.Context, url string, event *models.AlertCurEvent) {
if imodels.DB() == nil && ctx.IsCenter {
if models.DB(ctx) == nil && ctx.IsCenter {
logger.Warning("event_callback_ibex: db is nil")
return
}
@@ -142,7 +141,7 @@ func CallIbex(ctx *ctx.Context, id int64, host string,
AlertTriggered: true,
}
id, err = TaskAdd(in, tpl.UpdateBy, ctx.IsCenter)
id, err = TaskAdd(ctx, in, tpl.UpdateBy, ctx.IsCenter)
if err != nil {
logger.Errorf("event_callback_ibex: call ibex fail: %v", err)
return
@@ -184,13 +183,13 @@ func canDoIbex(username string, tpl *models.TaskTpl, host string, targetCache *m
return target.GroupId == tpl.GroupId, nil
}
func TaskAdd(f models.TaskForm, authUser string, isCenter bool) (int64, error) {
func TaskAdd(ctx *ctx.Context, f models.TaskForm, authUser string, isCenter bool) (int64, error) {
hosts := cleanHosts(f.Hosts)
if len(hosts) == 0 {
return 0, fmt.Errorf("arg(hosts) empty")
}
taskMeta := &imodels.TaskMeta{
taskMeta := &models.TaskMeta{
Title: f.Title,
Account: f.Account,
Batch: f.Batch,
@@ -213,34 +212,34 @@ func TaskAdd(f models.TaskForm, authUser string, isCenter bool) (int64, error) {
// 任务类型分为"告警规则触发"和"n9e center用户下发"两种;
// 边缘机房"告警规则触发"的任务不需要规划并且它可能是失联的无法使用db资源所以放入redis缓存中直接下发给agentd执行
if !isCenter && f.AlertTriggered {
if err := taskMeta.Create(); err != nil {
if err := taskMeta.Create(ctx); err != nil {
// 当网络不连通时生成唯一的id防止边缘机房中不同任务的id相同
// 方法是redis自增id去防止同一个机房的不同n9e edge生成的id相同
// 但没法防止不同边缘机房生成同样的id所以生成id的数据不会上报存入数据库只用于闭环执行。
taskMeta.Id, err = storage.IdGet()
taskMeta.Id, err = storage.IdGet(ctx.Redis)
if err != nil {
return 0, err
}
}
taskHost := imodels.TaskHost{
taskHost := models.TaskHost{
Id: taskMeta.Id,
Host: hosts[0],
Status: "running",
}
if err = taskHost.Create(); err != nil {
if err = taskHost.Create(ctx); err != nil {
logger.Warningf("task_add_fail: authUser=%s title=%s err=%s", authUser, taskMeta.Title, err.Error())
}
// 缓存任务元信息和待下发的任务
err = taskMeta.Cache(hosts[0])
err = taskMeta.Cache(ctx, hosts[0])
if err != nil {
return 0, err
}
} else {
// 如果是中心机房,还是保持之前的逻辑
err = taskMeta.Save(hosts, f.Action)
err = taskMeta.Save(ctx, hosts, f.Action)
if err != nil {
return 0, err
}

View File

@@ -18,6 +18,7 @@ import (
"github.com/ccfos/nightingale/v6/conf"
"github.com/ccfos/nightingale/v6/cron"
"github.com/ccfos/nightingale/v6/dumper"
"github.com/ccfos/nightingale/v6/ibex"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/models/migrate"
@@ -33,8 +34,6 @@ import (
"github.com/ccfos/nightingale/v6/pushgw/writer"
"github.com/ccfos/nightingale/v6/storage"
"github.com/ccfos/nightingale/v6/tdengine"
"github.com/flashcatcloud/ibex/src/cmd/ibex"
)
func Initialize(configDir string, cryptoKey string) (func(), error) {
@@ -61,7 +60,14 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
if err != nil {
return nil, err
}
ctx := ctx.NewContext(context.Background(), db, true)
var redis storage.Redis
redis, err = storage.NewRedis(config.Redis)
if err != nil {
return nil, err
}
ctx := ctx.NewContext(context.Background(), db, redis, true)
migrate.Migrate(db)
models.InitRoot(ctx)
@@ -73,11 +79,6 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
}
integration.Init(ctx, config.Center.BuiltinIntegrationsDir)
var redis storage.Redis
redis, err = storage.NewRedis(config.Redis)
if err != nil {
return nil, err
}
metas := metas.New(redis)
idents := idents.New(ctx, redis)
@@ -123,10 +124,10 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
if config.Ibex.Enable {
migrate.MigrateIbexTables(db)
ibex.ServerStart(true, db, redis, config.HTTP.APIForService.BasicAuth, config.Alert.Heartbeat, &config.CenterApi, r, centerRouter, config.Ibex, config.HTTP.Port)
ibex.ServerStart(ctx, true, db, redis, config.HTTP.APIForService.BasicAuth, config.Alert.Heartbeat, &config.CenterApi, r, centerRouter, config.Ibex, config.HTTP.Port)
}
httpClean := httpx.Init(config.HTTP, r)
httpClean := httpx.Init(config.HTTP, context.Background(), r)
return func() {
logxClean()

View File

@@ -126,7 +126,7 @@ func (rt *Router) taskAdd(c *gin.Context) {
rt.checkTargetPerm(c, f.Hosts)
// call ibex
taskId, err := sender.TaskAdd(f, user.Username, rt.Ctx.IsCenter)
taskId, err := sender.TaskAdd(rt.Ctx, f, user.Username, rt.Ctx.IsCenter)
ginx.Dangerous(err)
if taskId <= 0 {

View File

@@ -18,7 +18,7 @@ func Upgrade(configFile string) error {
return err
}
ctx := ctx.NewContext(context.Background(), db, true)
ctx := ctx.NewContext(context.Background(), db, nil, true)
for _, cluster := range config.Clusters {
count, err := models.GetDatasourcesCountByName(ctx, cluster.Name)
if err != nil {

View File

@@ -12,6 +12,7 @@ import (
"github.com/ccfos/nightingale/v6/center/metas"
"github.com/ccfos/nightingale/v6/conf"
"github.com/ccfos/nightingale/v6/dumper"
"github.com/ccfos/nightingale/v6/ibex"
"github.com/ccfos/nightingale/v6/memsto"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/httpx"
@@ -22,8 +23,6 @@ import (
"github.com/ccfos/nightingale/v6/pushgw/writer"
"github.com/ccfos/nightingale/v6/storage"
"github.com/ccfos/nightingale/v6/tdengine"
"github.com/flashcatcloud/ibex/src/cmd/ibex"
)
func Initialize(configDir string, cryptoKey string) (func(), error) {
@@ -40,7 +39,6 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
if len(config.CenterApi.Addrs) < 1 {
return nil, errors.New("failed to init config: the CenterApi configuration is missing")
}
ctx := ctx.NewContext(context.Background(), nil, false, config.CenterApi)
var redis storage.Redis
redis, err = storage.NewRedis(config.Redis)
@@ -48,6 +46,8 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
return nil, err
}
ctx := ctx.NewContext(context.Background(), nil, redis, false, config.CenterApi)
syncStats := memsto.NewSyncStats()
targetCache := memsto.NewTargetCache(ctx, syncStats, redis)
@@ -82,12 +82,12 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
alertrtRouter.Config(r)
if config.Ibex.Enable {
ibex.ServerStart(false, nil, redis, config.HTTP.APIForService.BasicAuth, config.Alert.Heartbeat, &config.CenterApi, r, nil, config.Ibex, config.HTTP.Port)
ibex.ServerStart(ctx, false, nil, redis, config.HTTP.APIForService.BasicAuth, config.Alert.Heartbeat, &config.CenterApi, r, nil, config.Ibex, config.HTTP.Port)
}
}
dumper.ConfigRouter(r)
httpClean := httpx.Init(config.HTTP, r)
httpClean := httpx.Init(config.HTTP, context.Background(), r)
return func() {
logxClean()

119
cmd/ibex/main.go Normal file
View File

@@ -0,0 +1,119 @@
package main
import (
"fmt"
"os"
"github.com/ccfos/nightingale/v6/ibex/agentd"
"github.com/ccfos/nightingale/v6/ibex/server"
"github.com/toolkits/pkg/net/tcpx"
"github.com/toolkits/pkg/runner"
"github.com/urfave/cli/v2"
)
// VERSION go build -ldflags "-X main.VERSION=x.x.x"
var VERSION = "not specified"
func main() {
app := cli.NewApp()
app.Name = "ibex"
app.Version = VERSION
app.Usage = "Ibex, running scripts on large scale machines"
app.Commands = []*cli.Command{
newCenterServerCmd(),
newEdgeServerCmd(),
newAgentdCmd(),
}
app.Run(os.Args)
}
func newCenterServerCmd() *cli.Command {
return &cli.Command{
Name: "server",
Usage: "Run server",
Flags: []cli.Flag{
&cli.StringFlag{
Name: "conf",
Aliases: []string{"c"},
Usage: "specify configuration file(.json,.yaml,.toml)",
},
},
Action: func(c *cli.Context) error {
printEnv()
tcpx.WaitHosts()
var opts []server.ServerOption
if c.String("conf") != "" {
opts = append(opts, server.SetConfigFile(c.String("conf")))
}
opts = append(opts, server.SetVersion(VERSION))
server.Run(true, opts...)
return nil
},
}
}
func newEdgeServerCmd() *cli.Command {
return &cli.Command{
Name: "edge server",
Usage: "Run edge server",
Flags: []cli.Flag{
&cli.StringFlag{
Name: "conf",
Aliases: []string{"c"},
Usage: "specify configuration file(.json,.yaml,.toml)",
},
},
Action: func(c *cli.Context) error {
printEnv()
tcpx.WaitHosts()
var opts []server.ServerOption
if c.String("conf") != "" {
opts = append(opts, server.SetConfigFile(c.String("conf")))
}
opts = append(opts, server.SetVersion(VERSION))
server.Run(false, opts...)
return nil
},
}
}
func newAgentdCmd() *cli.Command {
return &cli.Command{
Name: "agentd",
Usage: "Run agentd",
Flags: []cli.Flag{
&cli.StringFlag{
Name: "conf",
Aliases: []string{"c"},
Usage: "specify configuration file(.json,.yaml,.toml)",
},
},
Action: func(c *cli.Context) error {
printEnv()
var opts []agentd.AgentdOption
if c.String("conf") != "" {
opts = append(opts, agentd.SetConfigFile(c.String("conf")))
}
opts = append(opts, agentd.SetVersion(VERSION))
agentd.Run(opts...)
return nil
},
}
}
func printEnv() {
runner.Init()
fmt.Println("runner.cwd:", runner.Cwd)
fmt.Println("runner.hostname:", runner.Hostname)
fmt.Println("runner.fd_limits:", runner.FdLimits())
fmt.Println("runner.vm_limits:", runner.VMLimits())
}

38
etc/ibex/agentd.toml Normal file
View File

@@ -0,0 +1,38 @@
# debug, release
RunMode = "debug"
# task meta storage dir
MetaDir = "./meta"
[HTTP]
Enable = true
# http listening address
Host = "0.0.0.0"
# http listening port
Port = 2090
# https cert file path
CertFile = ""
# https key file path
KeyFile = ""
# whether print access log
PrintAccessLog = true
# whether enable pprof
PProf = false
# http graceful shutdown timeout, unit: s
ShutdownTimeout = 30
# max content length: 64M
MaxContentLength = 67108864
# http server read timeout, unit: s
ReadTimeout = 20
# http server write timeout, unit: s
WriteTimeout = 40
# http server idle timeout, unit: s
IdleTimeout = 120
[Heartbeat]
# unit: ms
Interval = 1000
# rpc servers
Servers = ["127.0.0.1:20090"]
# $ip or $hostname or specified string
Host = "$hostname"

View File

@@ -0,0 +1,20 @@
[Unit]
Description="ibex-agentd"
After=network.target
[Service]
Type=simple
ExecStart=/root/gopath/ibex/ibex agentd
WorkingDirectory=/root/gopath/ibex
Restart=on-failure
SuccessExitStatus=0
LimitNOFILE=65536
StandardOutput=syslog
StandardError=syslog
SyslogIdentifier=ibex-agentd
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,20 @@
[Unit]
Description="ibex-server"
After=network.target
[Service]
Type=simple
ExecStart=/root/gopath/ibex/ibex server
WorkingDirectory=/root/gopath/ibex
Restart=on-failure
SuccessExitStatus=0
LimitNOFILE=65536
StandardOutput=syslog
StandardError=syslog
SyslogIdentifier=ibex-server
[Install]
WantedBy=multi-user.target

86
etc/ibex/server.toml Normal file
View File

@@ -0,0 +1,86 @@
# debug, release
RunMode = "debug"
[Log]
# log write dir
Dir = "logs-server"
# log level: DEBUG INFO WARNING ERROR
Level = "DEBUG"
# stdout, stderr, file
Output = "stdout"
# # rotate by time
# KeepHours: 4
# # rotate by size
# RotateNum = 3
# # unit: MB
# RotateSize = 256
[HTTP]
Enable = true
# http listening address
Host = "0.0.0.0"
# http listening port
Port = 10090
# https cert file path
CertFile = ""
# https key file path
KeyFile = ""
# whether print access log
PrintAccessLog = true
# whether enable pprof
PProf = false
# http graceful shutdown timeout, unit: s
ShutdownTimeout = 30
# max content length: 64M
MaxContentLength = 67108864
# http server read timeout, unit: s
ReadTimeout = 20
# http server write timeout, unit: s
WriteTimeout = 40
# http server idle timeout, unit: s
IdleTimeout = 120
[BasicAuth]
# using when call apis
ibex = "ibex"
[RPC]
Listen = "0.0.0.0:20090"
[Heartbeat]
# auto detect if blank
IP = ""
# unit: ms
Interval = 1000
[Output]
# database | remote
ComeFrom = "database"
AgtdPort = 2090
[DB]
# postgres: host=%s port=%s user=%s dbname=%s password=%s sslmode=%s
# postgres: DSN="host=127.0.0.1 port=5432 user=root dbname=n9e_v6 password=1234 sslmode=disable"
DSN="root:1234@tcp(127.0.0.1:3306)/ibex?charset=utf8mb4&parseTime=True&loc=Local&allowNativePasswords=true"
# enable debug mode or not
Debug = false
# mysql postgres
DBType = "mysql"
# unit: s
MaxLifetime = 7200
# max open connections
MaxOpenConns = 150
# max idle connections
MaxIdleConns = 50
# table prefix
TablePrefix = ""
[Redis]
# address, ip:port or ip1:port,ip2:port for cluster and sentinel(SentinelAddrs)
Address = "127.0.0.1:6379"
# Username = ""
# Password = ""
# DB = 0
# UseTLS = false
# TLSMinVersion = "1.2"
# standalone cluster sentinel

12
go.mod
View File

@@ -3,12 +3,11 @@ module github.com/ccfos/nightingale/v6
go 1.18
require (
github.com/BurntSushi/toml v0.3.1
github.com/BurntSushi/toml v1.3.2
github.com/coreos/go-oidc v2.2.1+incompatible
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc
github.com/dgrijalva/jwt-go v3.2.0+incompatible
github.com/expr-lang/expr v1.16.1
github.com/flashcatcloud/ibex v1.3.5
github.com/gin-contrib/pprof v1.4.0
github.com/gin-gonic/gin v1.9.1
github.com/go-ldap/ldap/v3 v3.4.4
@@ -34,6 +33,7 @@ require (
github.com/spaolacci/murmur3 v1.1.0
github.com/tidwall/gjson v1.14.0
github.com/toolkits/pkg v1.3.6
github.com/urfave/cli/v2 v2.27.4
golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1
golang.org/x/oauth2 v0.10.0
gopkg.in/gomail.v2 v2.0.0-20160411212932-81ebce5c23df
@@ -44,6 +44,12 @@ require (
gorm.io/gorm v1.25.7-0.20240204074919-46816ad31dde
)
require (
github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect
)
require (
github.com/Azure/go-ntlmssp v0.0.0-20220621081337-cb9428e4ac1e // indirect
github.com/beorn7/perks v1.0.1 // indirect
@@ -90,7 +96,7 @@ require (
github.com/tidwall/match v1.1.1 // indirect
github.com/tidwall/pretty v1.2.0 // indirect
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
github.com/ugorji/go/codec v1.2.11 // indirect
github.com/ugorji/go/codec v1.2.11
go.uber.org/atomic v1.11.0 // indirect
go.uber.org/automaxprocs v1.5.2 // indirect
golang.org/x/arch v0.3.0 // indirect

13
go.sum
View File

@@ -5,8 +5,9 @@ github.com/Azure/azure-sdk-for-go/sdk/internal v1.3.0 h1:sXr+ck84g/ZlZUOZiNELInm
github.com/Azure/go-ntlmssp v0.0.0-20220621081337-cb9428e4ac1e h1:NeAW1fUYUEWhft7pkxDf6WoUvEZJ/uOKsvtpjLnn8MU=
github.com/Azure/go-ntlmssp v0.0.0-20220621081337-cb9428e4ac1e/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU=
github.com/AzureAD/microsoft-authentication-library-for-go v1.0.0 h1:OBhqkivkhkMqLPymWEppkm7vgPQY2XsHoEkaMQ0AdZY=
github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/toml v1.3.2 h1:o7IhLm0Msx3BaB+n3Ag7L8EVlByGnpq14C4YWiu/gL8=
github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
github.com/Masterminds/semver/v3 v3.1.1 h1:hLg3sBzpNErnxhQtUy/mmLR2I9foDujNK030IGemrRc=
github.com/Masterminds/semver/v3 v3.1.1/go.mod h1:VPu/7SZ7ePZ3QOrcuXROw5FAcLl4a0cBrbBpGY/8hQs=
github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137 h1:s6gZFSlWYmbqAuRjVTiNNhvNRfY2Wxp9nhfyel4rklc=
@@ -29,6 +30,8 @@ github.com/coreos/go-oidc v2.2.1+incompatible h1:mh48q/BqXqgjVHpy2ZY7WnWAbenxRjs
github.com/coreos/go-oidc v2.2.1+incompatible/go.mod h1:CgnwVTmzoESiwO9qyAFEMiHoZ1nMCKZlZ9V6mm3/LKc=
github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
github.com/cpuguy83/go-md2man/v2 v2.0.4 h1:wfIWP927BUkWJb2NmU/kNDYIBTh/ziUX91+lVfRxZq4=
github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@@ -47,8 +50,6 @@ github.com/fatih/camelcase v1.0.0 h1:hxNvNX/xYBp0ovncs8WyWZrOrpBNub/JfaMvbURyft8
github.com/fatih/camelcase v1.0.0/go.mod h1:yN2Sb0lFhZJUdVvtELVWefmrXpuZESvPmqwoZc+/fpc=
github.com/fatih/structs v1.1.0 h1:Q7juDM0QtcnhCpeyLGQKyg4TOIghuNXrkL32pHAUMxo=
github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M=
github.com/flashcatcloud/ibex v1.3.5 h1:8GOOf5+aJT0TP/MC6izz7CO5JKJSdKVFBwL0vQp93Nc=
github.com/flashcatcloud/ibex v1.3.5/go.mod h1:T8hbMUySK2q6cXUaYp0AUVeKkU9Od2LjzwmB5lmTRBM=
github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU=
github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA=
github.com/garyburd/redigo v1.6.2/go.mod h1:NR3MbYisc3/PwhQ00EMzDiPmrwpPxAn5GI05/YaO1SY=
@@ -262,6 +263,8 @@ github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjR
github.com/rs/xid v1.2.1/go.mod h1:+uKXf+4Djp6Md1KODXJxgGQPKngRmWyn10oCKFzNHOQ=
github.com/rs/zerolog v1.13.0/go.mod h1:YbFCdg8HfsridGWAh22vktObvhZbQsZXe4/zB0OKkWU=
github.com/rs/zerolog v1.15.0/go.mod h1:xYTKnLHcpfU2225ny5qZjxnj9NvkumZYjJHlAThCjNc=
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0=
github.com/shopspring/decimal v0.0.0-20180709203117-cd690d0c9e24/go.mod h1:M+9NzErvs504Cn4c5DxATwIqPbtswREoFCre64PpcG4=
github.com/shopspring/decimal v1.2.0 h1:abSATXmQEYyShuxI4/vyW3tV1MrKAJzCZ/0zLUXYbsQ=
@@ -302,6 +305,10 @@ github.com/ugorji/go v1.2.7/go.mod h1:nF9osbDWLy6bDVv/Rtoh6QgnvNDpmCalQV5urGCCS6
github.com/ugorji/go/codec v1.2.7/go.mod h1:WGN1fab3R1fzQlVQTkfxVtIBhWDRqOviHU95kRgeqEY=
github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU=
github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
github.com/urfave/cli/v2 v2.27.4 h1:o1owoI+02Eb+K107p27wEX9Bb8eqIoZCfLXloLUSWJ8=
github.com/urfave/cli/v2 v2.27.4/go.mod h1:m4QzxcD2qpra4z7WhzEGn74WZLViBnMpb1ToCAKdGRQ=
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4=
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=

117
ibex/agentd/agentd.go Normal file
View File

@@ -0,0 +1,117 @@
package agentd
import (
"context"
"fmt"
"log"
"os"
"os/signal"
"path/filepath"
"syscall"
"github.com/toolkits/pkg/i18n"
"github.com/ccfos/nightingale/v6/ibex/agentd/config"
"github.com/ccfos/nightingale/v6/ibex/agentd/router"
"github.com/ccfos/nightingale/v6/ibex/agentd/timer"
"github.com/ccfos/nightingale/v6/pkg/httpx"
)
type Agentd struct {
ConfigFile string
Version string
}
type AgentdOption func(*Agentd)
func SetConfigFile(f string) AgentdOption {
return func(s *Agentd) {
s.ConfigFile = f
}
}
func SetVersion(v string) AgentdOption {
return func(s *Agentd) {
s.Version = v
}
}
// Run run agentd
func Run(opts ...AgentdOption) {
code := 1
sc := make(chan os.Signal, 1)
signal.Notify(sc, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT)
agentd := Agentd{
ConfigFile: filepath.Join("etc", "ibex", "agentd.toml"),
Version: "not specified",
}
for _, opt := range opts {
opt(&agentd)
}
cleanFunc, err := agentd.initialize()
if err != nil {
fmt.Println("agentd init fail:", err)
os.Exit(code)
}
EXIT:
for {
sig := <-sc
fmt.Println("received signal:", sig.String())
switch sig {
case syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGINT:
code = 0
break EXIT
case syscall.SIGHUP:
// reload configuration?
default:
break EXIT
}
}
cleanFunc()
fmt.Println("agentd exited")
os.Exit(code)
}
func (s Agentd) initialize() (func(), error) {
fns := Functions{}
ctx, cancel := context.WithCancel(context.Background())
fns.Add(cancel)
log.SetFlags(log.Ldate | log.Ltime | log.Lshortfile)
// parse config file
config.MustLoad(s.ConfigFile)
// init i18n
i18n.Init()
// init http server
r := router.New(s.Version)
httpClean := httpx.Init(config.C.HTTP, ctx, r)
fns.Add(httpClean)
go timer.Heartbeat(ctx)
return fns.Ret(), nil
}
type Functions struct {
List []func()
}
func (fs *Functions) Add(f func()) {
fs.List = append(fs.List, f)
}
func (fs *Functions) Ret() func() {
return func() {
for i := 0; i < len(fs.List); i++ {
fs.List[i]()
}
}
}

110
ibex/agentd/client/cli.go Normal file
View File

@@ -0,0 +1,110 @@
package client
import (
"bufio"
"io"
"log"
"net"
"net/rpc"
"reflect"
"time"
"github.com/toolkits/pkg/net/gobrpc"
"github.com/ugorji/go/codec"
"github.com/ccfos/nightingale/v6/ibex/agentd/config"
)
var cli *gobrpc.RPCClient
func getCli() *gobrpc.RPCClient {
if cli != nil {
return cli
}
// detect the fastest server
var (
address string
client *rpc.Client
duration int64 = 999999999999
)
// auto close other slow server
acm := make(map[string]*rpc.Client)
l := len(config.C.Heartbeat.Servers)
for i := 0; i < l; i++ {
addr := config.C.Heartbeat.Servers[i]
begin := time.Now()
conn, err := net.DialTimeout("tcp", addr, time.Second*5)
if err != nil {
log.Printf("W: dial %s fail: %s", addr, err)
continue
}
var bufConn = struct {
io.Closer
*bufio.Reader
*bufio.Writer
}{conn, bufio.NewReader(conn), bufio.NewWriter(conn)}
var mh codec.MsgpackHandle
mh.MapType = reflect.TypeOf(map[string]interface{}(nil))
rpcCodec := codec.MsgpackSpecRpc.ClientCodec(bufConn, &mh)
c := rpc.NewClientWithCodec(rpcCodec)
acm[addr] = c
var out string
err = c.Call("Server.Ping", "", &out)
if err != nil {
log.Printf("W: ping %s fail: %s", addr, err)
continue
}
use := time.Since(begin).Nanoseconds()
if use < duration {
address = addr
client = c
duration = use
}
}
if address == "" {
log.Println("E: no job server found")
return nil
}
log.Printf("I: choose server: %s, duration: %dms", address, duration/1000000)
for addr, c := range acm {
if addr == address {
continue
}
c.Close()
}
cli = gobrpc.NewRPCClient(address, client, 5*time.Second)
return cli
}
// GetCli 探测所有server端的延迟自动选择最快的
func GetCli() *gobrpc.RPCClient {
for {
c := getCli()
if c != nil {
return c
}
time.Sleep(time.Second * 10)
}
}
// CloseCli 关闭客户端连接
func CloseCli() {
if cli != nil {
cli.Close()
cli = nil
}
}

View File

@@ -0,0 +1,31 @@
package client
import (
"fmt"
"log"
"github.com/ccfos/nightingale/v6/ibex/types"
)
// Meta 从Server端获取任务元信息
func Meta(id int64) (script string, args string, account string, stdin string, err error) {
var resp types.TaskMetaResponse
err = GetCli().Call("Server.GetTaskMeta", id, &resp)
if err != nil {
log.Println("E: rpc call Server.GetTaskMeta:", err)
CloseCli()
return
}
if resp.Message != "" {
log.Println("E: rpc call Server.GetTaskMeta:", resp.Message)
err = fmt.Errorf(resp.Message)
return
}
script = resp.Script
args = resp.Args
account = resp.Account
stdin = resp.Stdin
return
}

View File

@@ -0,0 +1,140 @@
package config
import (
"fmt"
"log"
"net"
"os"
"strings"
"sync"
"github.com/koding/multiconfig"
"github.com/toolkits/pkg/file"
"github.com/ccfos/nightingale/v6/pkg/httpx"
)
var (
C = new(Config)
once sync.Once
)
func MustLoad(fpaths ...string) {
once.Do(func() {
loaders := []multiconfig.Loader{
&multiconfig.TagLoader{},
&multiconfig.EnvironmentLoader{},
}
for _, fpath := range fpaths {
handled := false
if strings.HasSuffix(fpath, "toml") {
loaders = append(loaders, &multiconfig.TOMLLoader{Path: fpath})
handled = true
}
if strings.HasSuffix(fpath, "conf") {
loaders = append(loaders, &multiconfig.TOMLLoader{Path: fpath})
handled = true
}
if strings.HasSuffix(fpath, "json") {
loaders = append(loaders, &multiconfig.JSONLoader{Path: fpath})
handled = true
}
if strings.HasSuffix(fpath, "yaml") {
loaders = append(loaders, &multiconfig.YAMLLoader{Path: fpath})
handled = true
}
if !handled {
fmt.Println("config file invalid, valid file exts: .conf,.yaml,.toml,.json")
os.Exit(1)
}
}
m := multiconfig.DefaultLoader{
Loader: multiconfig.MultiLoader(loaders...),
Validator: multiconfig.MultiValidator(&multiconfig.RequiredValidator{}),
}
m.MustLoad(C)
if C.Heartbeat.Host == "" {
fmt.Println("heartbeat.host is blank")
os.Exit(1)
}
if C.Heartbeat.Host == "$ip" {
C.Heartbeat.Endpoint = fmt.Sprint(GetOutboundIP())
if C.Heartbeat.Endpoint == "" {
fmt.Println("ip auto got is blank")
os.Exit(1)
}
fmt.Println("host.ip:", C.Heartbeat.Endpoint)
}
host, err := C.GetHost()
if err != nil {
log.Println("E: failed to GetHost:", err)
os.Exit(1)
}
fmt.Println("host:", host)
if C.MetaDir == "" {
C.MetaDir = "./meta"
}
C.MetaDir, err = file.RealPath(C.MetaDir)
if err != nil {
log.Println("E: failed to get real path of MetaDir:", err)
os.Exit(1)
}
file.EnsureDir(C.MetaDir)
file.EnsureDirRW(C.MetaDir)
})
}
type Config struct {
RunMode string
MetaDir string
Heartbeat Heartbeat
HTTP httpx.Config
}
type Heartbeat struct {
Interval int64
Servers []string
Host string
Endpoint string
}
func (c *Config) IsDebugMode() bool {
return c.RunMode == "debug"
}
func (c *Config) GetHost() (string, error) {
if c.Heartbeat.Host == "$ip" {
return c.Heartbeat.Endpoint, nil
}
if c.Heartbeat.Host == "$hostname" {
return os.Hostname()
}
return c.Heartbeat.Host, nil
}
// Get preferred outbound ip of this machine
func GetOutboundIP() net.IP {
conn, err := net.Dial("udp", "8.8.8.8:80")
if err != nil {
fmt.Println("auto get outbound ip fail:", err)
os.Exit(1)
}
defer conn.Close()
localAddr := conn.LocalAddr().(*net.UDPAddr)
return localAddr.IP
}

View File

@@ -0,0 +1,60 @@
package router
import (
"fmt"
"os"
"strings"
"github.com/gin-contrib/pprof"
"github.com/gin-gonic/gin"
"github.com/ccfos/nightingale/v6/ibex/agentd/config"
"github.com/ccfos/nightingale/v6/pkg/aop"
)
func New(version string) *gin.Engine {
gin.SetMode(config.C.RunMode)
loggerMid := aop.Logger()
recoveryMid := aop.Recovery()
if strings.ToLower(config.C.RunMode) == "release" {
aop.DisableConsoleColor()
}
r := gin.New()
r.Use(recoveryMid)
// whether print access log
if config.C.HTTP.PrintAccessLog {
r.Use(loggerMid)
}
configRoute(r, version)
return r
}
func configRoute(r *gin.Engine, version string) {
if config.C.HTTP.PProf {
pprof.Register(r, "/debug/pprof")
}
r.GET("/ping", func(c *gin.Context) {
c.String(200, "pong")
})
r.GET("/pid", func(c *gin.Context) {
c.String(200, fmt.Sprintf("%d", os.Getpid()))
})
r.GET("/addr", func(c *gin.Context) {
c.String(200, c.Request.RemoteAddr)
})
r.GET("/version", func(c *gin.Context) {
c.String(200, version)
})
}

View File

@@ -0,0 +1,18 @@
//go:build !windows
// +build !windows
package timer
import (
"os/exec"
"syscall"
)
func CmdStart(cmd *exec.Cmd) error {
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
return cmd.Start()
}
func CmdKill(cmd *exec.Cmd) error {
return syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
}

View File

@@ -0,0 +1,16 @@
//go:build windows
// +build windows
package timer
import (
"os/exec"
)
func CmdStart(cmd *exec.Cmd) error {
return cmd.Start()
}
func CmdKill(cmd *exec.Cmd) error {
return cmd.Process.Kill()
}

View File

@@ -0,0 +1,74 @@
package timer
import (
"context"
"log"
"time"
"github.com/ccfos/nightingale/v6/ibex/agentd/client"
"github.com/ccfos/nightingale/v6/ibex/agentd/config"
"github.com/ccfos/nightingale/v6/ibex/types"
)
func Heartbeat(ctx context.Context) {
interval := time.Duration(config.C.Heartbeat.Interval) * time.Millisecond
for {
select {
case <-ctx.Done():
return
case <-time.After(interval):
heartbeat()
}
}
}
func heartbeat() {
ident, err := config.C.GetHost()
if err != nil {
log.Println("E: GetHost fail:", err)
return
}
req := types.ReportRequest{
Ident: ident,
ReportTasks: Locals.ReportTasks(),
}
var resp types.ReportResponse
err = client.GetCli().Call("Server.Report", req, &resp)
if err != nil {
log.Println("E: rpc call Server.Report fail:", err)
client.CloseCli()
return
}
if resp.Message != "" {
log.Println("E: error from server:", resp.Message)
return
}
assigned := make(map[int64]struct{})
if resp.AssignTasks != nil {
count := len(resp.AssignTasks)
for i := 0; i < count; i++ {
at := resp.AssignTasks[i]
assigned[at.Id] = struct{}{}
Locals.AssignTask(at)
}
}
if len(assigned) > 0 {
log.Println("D: assigned tasks:", mapKeys(assigned))
}
Locals.Clean(assigned)
}
func mapKeys(m map[int64]struct{}) []int64 {
lst := make([]int64, 0, len(m))
for k := range m {
lst = append(lst, k)
}
return lst
}

333
ibex/agentd/timer/task.go Normal file
View File

@@ -0,0 +1,333 @@
package timer
import (
"bytes"
"fmt"
"log"
"os/exec"
"os/user"
"path"
"strings"
"sync"
"github.com/toolkits/pkg/file"
"github.com/toolkits/pkg/runner"
"github.com/toolkits/pkg/sys"
"github.com/ccfos/nightingale/v6/ibex/agentd/client"
"github.com/ccfos/nightingale/v6/ibex/agentd/config"
)
type Task struct {
sync.Mutex
Id int64
Clock int64
Action string
Status string
alive bool
Cmd *exec.Cmd
Stdout bytes.Buffer
Stderr bytes.Buffer
Stdin *bytes.Reader
Args string
Account string
StdinStr string
}
func (t *Task) SetStatus(status string) {
t.Lock()
t.Status = status
t.Unlock()
}
func (t *Task) GetStatus() string {
t.Lock()
s := t.Status
t.Unlock()
return s
}
func (t *Task) GetAlive() bool {
t.Lock()
pa := t.alive
t.Unlock()
return pa
}
func (t *Task) SetAlive(pa bool) {
t.Lock()
t.alive = pa
t.Unlock()
}
func (t *Task) GetStdout() string {
t.Lock()
out := t.Stdout.String()
t.Unlock()
return out
}
func (t *Task) GetStderr() string {
t.Lock()
out := t.Stderr.String()
t.Unlock()
return out
}
func (t *Task) ResetBuff() {
t.Lock()
t.Stdout.Reset()
t.Stderr.Reset()
t.Unlock()
}
func (t *Task) doneBefore() bool {
doneFlag := path.Join(config.C.MetaDir, fmt.Sprint(t.Id), fmt.Sprintf("%d.done", t.Clock))
return file.IsExist(doneFlag)
}
func (t *Task) loadResult() {
metadir := config.C.MetaDir
doneFlag := path.Join(metadir, fmt.Sprint(t.Id), fmt.Sprintf("%d.done", t.Clock))
stdoutFile := path.Join(metadir, fmt.Sprint(t.Id), "stdout")
stderrFile := path.Join(metadir, fmt.Sprint(t.Id), "stderr")
var err error
t.Status, err = file.ReadStringTrim(doneFlag)
if err != nil {
log.Printf("E: read file %s fail %v", doneFlag, err)
}
stdout, err := file.ReadString(stdoutFile)
if err != nil {
log.Printf("E: read file %s fail %v", stdoutFile, err)
}
stderr, err := file.ReadString(stderrFile)
if err != nil {
log.Printf("E: read file %s fail %v", stderrFile, err)
}
t.Stdout = *bytes.NewBufferString(stdout)
t.Stderr = *bytes.NewBufferString(stderr)
}
func (t *Task) prepare() error {
if t.Account != "" {
// already prepared
return nil
}
IdDir := path.Join(config.C.MetaDir, fmt.Sprint(t.Id))
err := file.EnsureDir(IdDir)
if err != nil {
log.Printf("E: mkdir -p %s fail: %v", IdDir, err)
return err
}
writeFlag := path.Join(IdDir, ".write")
if file.IsExist(writeFlag) {
// 从磁盘读取
argsFile := path.Join(IdDir, "args")
args, err := file.ReadStringTrim(argsFile)
if err != nil {
log.Printf("E: read %s fail %v", argsFile, err)
return err
}
accountFile := path.Join(IdDir, "account")
account, err := file.ReadStringTrim(accountFile)
if err != nil {
log.Printf("E: read %s fail %v", accountFile, err)
return err
}
stdinFile := path.Join(IdDir, "stdin")
stdin, err := file.ReadStringTrim(stdinFile)
if err != nil {
log.Printf("E: read %s fail %v", stdinFile, err)
return err
}
t.Args = args
t.Account = account
t.StdinStr = stdin
} else {
// 从远端读取,再写入磁盘
script, args, account, stdin, err := client.Meta(t.Id)
if err != nil {
log.Println("E: query task meta fail:", err)
return err
}
scriptFile := path.Join(IdDir, "script")
_, err = file.WriteString(scriptFile, script)
if err != nil {
log.Printf("E: write script to %s fail: %v", scriptFile, err)
return err
}
out, err := sys.CmdOutTrim("chmod", "+x", scriptFile)
if err != nil {
log.Printf("E: chmod +x %s fail %v. output: %s", scriptFile, err, out)
return err
}
argsFile := path.Join(IdDir, "args")
_, err = file.WriteString(argsFile, args)
if err != nil {
log.Printf("E: write args to %s fail: %v", argsFile, err)
return err
}
accountFile := path.Join(IdDir, "account")
_, err = file.WriteString(accountFile, account)
if err != nil {
log.Printf("E: write account to %s fail: %v", accountFile, err)
return err
}
stdinFile := path.Join(IdDir, "stdin")
_, err = file.WriteString(stdinFile, stdin)
if err != nil {
log.Printf("E: write tags to %s fail: %v", stdinFile, err)
return err
}
_, err = file.WriteString(writeFlag, "")
if err != nil {
log.Printf("E: create %s flag file fail: %v", writeFlag, err)
return err
}
t.Args = args
t.Account = account
t.StdinStr = stdin
}
t.Stdin = bytes.NewReader([]byte(t.StdinStr))
return nil
}
func (t *Task) start() {
if t.GetAlive() {
return
}
err := t.prepare()
if err != nil {
return
}
args := t.Args
if args != "" {
args = strings.Replace(args, ",,", "' '", -1)
args = "'" + args + "'"
}
scriptFile := path.Join(config.C.MetaDir, fmt.Sprint(t.Id), "script")
if !path.IsAbs(scriptFile) {
scriptFile = path.Join(runner.Cwd, scriptFile)
}
sh := fmt.Sprintf("%s %s", scriptFile, args)
var cmd *exec.Cmd
loginUser, err := user.Current()
if err != nil {
log.Println("E: cannot get current login user:", err)
return
}
if loginUser.Username == "root" {
// current login user is root
if t.Account == "root" {
cmd = exec.Command("sh", "-c", sh)
cmd.Dir = loginUser.HomeDir
} else {
cmd = exec.Command("su", "-c", sh, "-", t.Account)
}
} else {
// current login user not root
cmd = exec.Command("sh", "-c", sh)
cmd.Dir = loginUser.HomeDir
}
cmd.Stdout = &t.Stdout
cmd.Stderr = &t.Stderr
cmd.Stdin = t.Stdin
t.Cmd = cmd
err = CmdStart(cmd)
if err != nil {
log.Printf("E: cannot start cmd of task[%d]: %v", t.Id, err)
return
}
go runProcess(t)
}
func (t *Task) kill() {
go killProcess(t)
}
func runProcess(t *Task) {
t.SetAlive(true)
defer t.SetAlive(false)
err := t.Cmd.Wait()
if err != nil {
if strings.Contains(err.Error(), "signal: killed") {
t.SetStatus("killed")
log.Printf("D: process of task[%d] killed", t.Id)
} else if strings.Contains(err.Error(), "signal: terminated") {
// kill children process manually
t.SetStatus("killed")
log.Printf("D: process of task[%d] terminated", t.Id)
} else {
t.SetStatus("failed")
log.Printf("D: process of task[%d] return error: %v", t.Id, err)
}
} else {
t.SetStatus("success")
log.Printf("D: process of task[%d] done", t.Id)
}
persistResult(t)
}
func persistResult(t *Task) {
metadir := config.C.MetaDir
stdout := path.Join(metadir, fmt.Sprint(t.Id), "stdout")
stderr := path.Join(metadir, fmt.Sprint(t.Id), "stderr")
doneFlag := path.Join(metadir, fmt.Sprint(t.Id), fmt.Sprintf("%d.done", t.Clock))
file.WriteString(stdout, t.GetStdout())
file.WriteString(stderr, t.GetStderr())
file.WriteString(doneFlag, t.GetStatus())
}
func killProcess(t *Task) {
t.SetAlive(true)
defer t.SetAlive(false)
log.Printf("D: begin kill process of task[%d]", t.Id)
err := CmdKill(t.Cmd)
if err != nil {
t.SetStatus("killfailed")
log.Printf("D: kill process of task[%d] fail: %v", t.Id, err)
} else {
t.SetStatus("killed")
log.Printf("D: process of task[%d] killed", t.Id)
}
persistResult(t)
}

120
ibex/agentd/timer/tasks.go Normal file
View File

@@ -0,0 +1,120 @@
package timer
import (
"log"
"github.com/ccfos/nightingale/v6/ibex/types"
)
type LocalTasksT struct {
M map[int64]*Task
}
var Locals = &LocalTasksT{M: make(map[int64]*Task)}
func (lt *LocalTasksT) ReportTasks() []types.ReportTask {
ret := make([]types.ReportTask, 0, len(lt.M))
for id, t := range lt.M {
rt := types.ReportTask{Id: id, Clock: t.Clock}
rt.Status = t.GetStatus()
if rt.Status == "running" || rt.Status == "killing" {
// intermediate state
continue
}
rt.Stdout = t.GetStdout()
rt.Stderr = t.GetStderr()
stdoutLen := len(rt.Stdout)
stderrLen := len(rt.Stderr)
// 输出太长的话,截断,要不然把数据库撑爆了
if stdoutLen > 65535 {
start := stdoutLen - 65535
rt.Stdout = rt.Stdout[start:]
}
if stderrLen > 65535 {
start := stderrLen - 65535
rt.Stderr = rt.Stderr[start:]
}
ret = append(ret, rt)
}
return ret
}
func (lt *LocalTasksT) GetTask(id int64) (*Task, bool) {
t, found := lt.M[id]
return t, found
}
func (lt *LocalTasksT) SetTask(t *Task) {
lt.M[t.Id] = t
}
func (lt *LocalTasksT) AssignTask(at types.AssignTask) {
local, found := lt.GetTask(at.Id)
if found {
if local.Clock == at.Clock && local.Action == at.Action {
// ignore repeat task
return
}
local.Clock = at.Clock
local.Action = at.Action
} else {
if at.Action == "kill" {
// no process in local, no need kill
return
}
local = &Task{
Id: at.Id,
Clock: at.Clock,
Action: at.Action,
}
lt.SetTask(local)
if local.doneBefore() {
local.loadResult()
return
}
}
if local.Action == "kill" {
local.SetStatus("killing")
local.kill()
} else if local.Action == "start" {
local.SetStatus("running")
local.start()
} else {
log.Printf("W: unknown action: %s of task %d", at.Action, at.Id)
}
}
func (lt *LocalTasksT) Clean(assigned map[int64]struct{}) {
del := make(map[int64]struct{})
for id := range lt.M {
if _, found := assigned[id]; !found {
del[id] = struct{}{}
}
}
for id := range del {
// 远端已经不关注这个任务了但是本地来看任务还是running的
// 可能是远端认为超时了,此时本地不能删除,仍然要继续上报
if lt.M[id].GetStatus() == "running" {
continue
}
lt.M[id].ResetBuff()
cmd := lt.M[id].Cmd
delete(lt.M, id)
if cmd != nil && cmd.Process != nil {
cmd.Process.Release()
}
}
}

82
ibex/ibex.go Normal file
View File

@@ -0,0 +1,82 @@
package ibex
import (
"fmt"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"os"
"strings"
"github.com/ccfos/nightingale/v6/ibex/server/config"
"github.com/ccfos/nightingale/v6/ibex/server/router"
"github.com/ccfos/nightingale/v6/ibex/server/rpc"
"github.com/ccfos/nightingale/v6/ibex/server/timer"
"github.com/ccfos/nightingale/v6/storage"
"github.com/ccfos/nightingale/v6/alert/aconf"
n9eRouter "github.com/ccfos/nightingale/v6/center/router"
"github.com/ccfos/nightingale/v6/conf"
n9eConf "github.com/ccfos/nightingale/v6/conf"
"github.com/gin-gonic/gin"
"github.com/redis/go-redis/v9"
"gorm.io/gorm"
)
var (
HttpPort int
)
func ServerStart(ctx *ctx.Context, isCenter bool, db *gorm.DB, rc redis.Cmdable, basicAuth gin.Accounts, heartbeat aconf.HeartbeatConfig,
api *n9eConf.CenterApi, r *gin.Engine, centerRouter *n9eRouter.Router, ibex conf.Ibex, httpPort int) {
config.C.IsCenter = isCenter
config.C.BasicAuth = make(gin.Accounts)
if len(basicAuth) > 0 {
config.C.BasicAuth = basicAuth
}
config.C.Heartbeat.IP = heartbeat.IP
config.C.Heartbeat.Interval = heartbeat.Interval
config.C.Heartbeat.LocalAddr = schedulerAddrGet(ibex.RPCListen)
HttpPort = httpPort
config.C.Output.ComeFrom = ibex.Output.ComeFrom
config.C.Output.AgtdPort = ibex.Output.AgtdPort
rou := router.NewRouter(ctx)
if centerRouter != nil {
rou.ConfigRouter(r, centerRouter)
} else {
rou.ConfigRouter(r)
}
ctx.Redis = rc
if err := storage.IdInit(ctx.Redis); err != nil {
fmt.Println("cannot init id generator: ", err)
os.Exit(1)
}
rpc.Start(ibex.RPCListen, ctx)
if isCenter {
go timer.Heartbeat(ctx)
go timer.Schedule(ctx)
go timer.CleanLong(ctx)
} else {
config.C.CenterApi = *api
}
timer.CacheHostDoing(ctx)
timer.ReportResult(ctx)
}
func schedulerAddrGet(rpcListen string) string {
ip := fmt.Sprint(config.GetOutboundIP())
if ip == "" {
fmt.Println("heartbeat ip auto got is blank")
os.Exit(1)
}
port := strings.Split(rpcListen, ":")[1]
localAddr := ip + ":" + port
return localAddr
}

View File

@@ -0,0 +1,135 @@
package config
import (
"fmt"
"net"
"os"
"strings"
"sync"
"github.com/ccfos/nightingale/v6/pkg/httpx"
"github.com/ccfos/nightingale/v6/pkg/logx"
"github.com/ccfos/nightingale/v6/conf"
"github.com/ccfos/nightingale/v6/pkg/ormx"
"github.com/ccfos/nightingale/v6/storage"
"github.com/gin-gonic/gin"
"github.com/koding/multiconfig"
)
var (
C = new(Config)
once sync.Once
)
func MustLoad(fpaths ...string) {
once.Do(func() {
loaders := []multiconfig.Loader{
&multiconfig.TagLoader{},
&multiconfig.EnvironmentLoader{},
}
for _, fpath := range fpaths {
handled := false
if strings.HasSuffix(fpath, "toml") {
loaders = append(loaders, &multiconfig.TOMLLoader{Path: fpath})
handled = true
}
if strings.HasSuffix(fpath, "conf") {
loaders = append(loaders, &multiconfig.TOMLLoader{Path: fpath})
handled = true
}
if strings.HasSuffix(fpath, "json") {
loaders = append(loaders, &multiconfig.JSONLoader{Path: fpath})
handled = true
}
if strings.HasSuffix(fpath, "yaml") {
loaders = append(loaders, &multiconfig.YAMLLoader{Path: fpath})
handled = true
}
if !handled {
fmt.Println("config file invalid, valid file exts: .conf,.yaml,.toml,.json")
os.Exit(1)
}
}
m := multiconfig.DefaultLoader{
Loader: multiconfig.MultiLoader(loaders...),
Validator: multiconfig.MultiValidator(&multiconfig.RequiredValidator{}),
}
m.MustLoad(C)
if C.Heartbeat.IP == "" {
// auto detect
C.Heartbeat.IP = fmt.Sprint(GetOutboundIP())
if C.Heartbeat.IP == "" {
fmt.Println("heartbeat ip auto got is blank")
os.Exit(1)
}
}
port := strings.Split(C.RPC.Listen, ":")[1]
endpoint := C.Heartbeat.IP + ":" + port
C.Heartbeat.LocalAddr = endpoint
// 正常情况肯定不是127.0.0.1,但是,如果就是单机部署,并且这个机器没有网络,比如本地调试并且本机没网的时候
// if C.Heartbeat.IP == "127.0.0.1" {
// fmt.Println("heartbeat ip is 127.0.0.1 and it is useless, so, exit")
// os.Exit(1)
// }
fmt.Println("heartbeat.ip:", C.Heartbeat.IP)
fmt.Printf("heartbeat.interval: %dms\n", C.Heartbeat.Interval)
})
}
type Config struct {
RunMode string
RPC RPC
Heartbeat Heartbeat
Output Output
IsCenter bool
CenterApi conf.CenterApi
Log logx.Config
HTTP httpx.Config
BasicAuth gin.Accounts
DB ormx.DBConfig
Redis storage.RedisConfig
}
type RPC struct {
Listen string
}
type Heartbeat struct {
IP string
Interval int64
LocalAddr string
}
type Output struct {
ComeFrom string
AgtdPort int
}
func (c *Config) IsDebugMode() bool {
return c.RunMode == "debug"
}
// Get preferred outbound ip of this machine
func GetOutboundIP() net.IP {
conn, err := net.Dial("udp", "8.8.8.8:80")
if err != nil {
fmt.Println("auto get outbound ip fail:", err)
os.Exit(1)
}
defer conn.Close()
localAddr := conn.LocalAddr().(*net.UDPAddr)
return localAddr.IP
}

View File

@@ -0,0 +1,144 @@
package logic
import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/slice"
"github.com/toolkits/pkg/str"
)
func ScheduleTask(ctx *ctx.Context, id int64) {
logger.Debugf("task[%d] scheduling...", id)
count, err := models.WaitingHostCount(ctx, id)
if err != nil {
logger.Errorf("cannot get task[%d] waiting host count: %v", id, err)
return
}
if count == 0 {
cleanDoneTask(ctx, id)
return
}
action, err := models.TaskActionGet(ctx, "id=?", id)
if err != nil {
logger.Errorf("cannot get task[%d] action: %v", id, err)
return
}
if action == nil {
logger.Errorf("[W] no action found of task[%d]", id)
return
}
switch action.Action {
case "start":
startTask(ctx, id, action)
case "pause":
return
case "cancel":
return
case "kill":
return
default:
logger.Errorf("unknown action: %s of task[%d]", action.Action, id)
}
}
func cleanDoneTask(ctx *ctx.Context, id int64) {
ingCount, err := models.IngStatusHostCount(ctx, id)
if err != nil {
logger.Errorf("cannot get task[%d] ing status host count: %v", id, err)
return
}
if ingCount > 0 {
return
}
err = models.CleanDoneTask(ctx, id)
if err != nil {
logger.Errorf("cannot clean done task[%d]: %v", id, err)
}
logger.Debugf("task[%d] done", id)
}
func startTask(ctx *ctx.Context, id int64, action *models.TaskAction) {
meta, err := models.TaskMetaGetByID(ctx, id)
if err != nil {
logger.Errorf("cannot get task[%d] meta: %v", id, err)
return
}
if meta == nil {
logger.Errorf("task[%d] meta lost", id)
return
}
count, err := models.UnexpectedHostCount(ctx, id)
if err != nil {
logger.Errorf("cannot get task[%d] unexpected host count: %v", id, err)
return
}
if count > int64(meta.Tolerance) {
err = action.Update(ctx, "pause")
if err != nil {
logger.Errorf("cannot update task[%d] action to 'pause': %v", id, err)
}
return
}
waitings, err := models.WaitingHostList(ctx, id)
if err != nil {
logger.Errorf("cannot get task[%d] waiting host: %v", id, err)
return
}
waitingsCount := len(waitings)
if waitingsCount == 0 {
return
}
doingsCount, err := models.TableRecordCount(ctx, models.TaskHostDoing{}.TableName(), "id=?", id)
if err != nil {
logger.Errorf("cannot get task[%d] doing host count: %v", id, err)
return
}
need := meta.Batch - int(doingsCount)
if meta.Batch == 0 {
need = waitingsCount
}
if need <= 0 {
return
}
if need > waitingsCount {
need = waitingsCount
}
arr := str.ParseCommaTrim(meta.Pause)
end := need
for i := 0; i < need; i++ {
if slice.ContainsString(arr, waitings[i].Host) {
end = i + 1
err = action.Update(ctx, "pause")
if err != nil {
logger.Errorf("cannot update task[%d] action to 'pause': %v", id, err)
return
}
break
}
}
err = models.RunWaitingHosts(ctx, waitings[:end])
if err != nil {
logger.Errorf("cannot run waiting hosts: %v", err)
}
}

View File

@@ -0,0 +1,45 @@
package logic
import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"time"
"github.com/toolkits/pkg/logger"
)
func CheckTimeout(ctx *ctx.Context, id int64) {
meta, err := models.TaskMetaGetByID(ctx, id)
if err != nil {
logger.Errorf("cannot get task[%d] meta: %v", id, err)
return
}
if meta == nil {
logger.Errorf("task[%d] meta lost", id)
return
}
hosts, err := models.TableRecordGets[[]models.TaskHostDoing](ctx, models.TaskHostDoing{}.TableName(), "id=?", id)
if err != nil {
logger.Errorf("cannot get task[%d] doing host list: %v", id, err)
return
}
count := len(hosts)
if count == 0 {
return
}
// 3s: task dispatch duration: web -> db -> scheduler -> executor
timeout := int64(meta.Timeout + 3)
now := time.Now().Unix()
for i := 0; i < count; i++ {
if now-hosts[i].Clock > timeout {
err = models.MarkDoneStatus(ctx, hosts[i].Id, hosts[i].Clock, hosts[i].Host, "timeout", "", "")
if err != nil {
logger.Errorf("cannot mark task[%d] done status: %v", id, err)
}
}
}
}

View File

@@ -0,0 +1,40 @@
package router
import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"net/http"
"strings"
"github.com/toolkits/pkg/errorx"
)
func TaskMeta(ctx *ctx.Context, id int64) *models.TaskMeta {
obj, err := models.TaskMetaGet(ctx, "id = ?", id)
errorx.Dangerous(err)
if obj == nil {
errorx.Bomb(http.StatusNotFound, "no such task meta")
}
return obj
}
func cleanHosts(formHosts []string) []string {
cnt := len(formHosts)
arr := make([]string, 0, cnt)
for i := 0; i < cnt; i++ {
item := strings.TrimSpace(formHosts[i])
if item == "" {
continue
}
if strings.HasPrefix(item, "#") {
continue
}
arr = append(arr, item)
}
return arr
}

View File

@@ -0,0 +1,612 @@
package router
import (
"fmt"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"strconv"
"io/ioutil"
"net/http"
"time"
"github.com/ccfos/nightingale/v6/ibex/server/config"
"github.com/ccfos/nightingale/v6/storage"
"github.com/gin-gonic/gin"
"github.com/toolkits/pkg/errorx"
"github.com/toolkits/pkg/ginx"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/slice"
"github.com/toolkits/pkg/str"
)
func (rou *Router) taskStdout(c *gin.Context) {
meta := TaskMeta(rou.ctx, UrlParamsInt64(c, "id"))
stdouts, err := meta.Stdouts(rou.ctx)
ginx.NewRender(c).Data(stdouts, err)
}
func (rou *Router) taskStderr(c *gin.Context) {
meta := TaskMeta(rou.ctx, UrlParamsInt64(c, "id"))
stderrs, err := meta.Stderrs(rou.ctx)
ginx.NewRender(c).Data(stderrs, err)
}
// TODO: 不能只判断task_action还应该看所有的host执行情况
func (rou *Router) taskState(c *gin.Context) {
action, err := models.TaskActionGet(rou.ctx, "id=?", UrlParamsInt64(c, "id"))
if err != nil {
ginx.NewRender(c).Data("", err)
return
}
state := "done"
if action != nil {
state = action.Action
}
ginx.NewRender(c).Data(state, err)
}
func (rou *Router) taskResult(c *gin.Context) {
id := UrlParamsInt64(c, "id")
hosts, err := models.TaskHostStatus(rou.ctx, id)
if err != nil {
errorx.Bomb(500, "load task hosts of %d occur error %v", id, err)
}
ss := make(map[string][]string)
total := len(hosts)
for i := 0; i < total; i++ {
s := hosts[i].Status
ss[s] = append(ss[s], hosts[i].Host)
}
ginx.NewRender(c).Data(ss, nil)
}
func (rou *Router) taskHostOutput(c *gin.Context) {
obj, err := models.TaskHostGet(rou.ctx, UrlParamsInt64(c, "id"), ginx.UrlParamStr(c, "host"))
ginx.NewRender(c).Data(obj, err)
}
func (rou *Router) taskHostStdout(c *gin.Context) {
id := UrlParamsInt64(c, "id")
host := ginx.UrlParamStr(c, "host")
if config.C.Output.ComeFrom == "database" || config.C.Output.ComeFrom == "" {
obj, err := models.TaskHostGet(rou.ctx, id, host)
ginx.NewRender(c).Data(obj.Stdout, err)
return
}
if config.C.Output.AgtdPort <= 0 || config.C.Output.AgtdPort > 65535 {
ginx.NewRender(c).Message(fmt.Errorf("remotePort(%d) invalid", config.C.Output.AgtdPort))
return
}
url := fmt.Sprintf("http://%s:%d/output/%d/stdout.json", host, config.C.Output.AgtdPort, id)
client := &http.Client{
CheckRedirect: func(req *http.Request, via []*http.Request) error {
return http.ErrUseLastResponse
},
}
resp, err := client.Get(url)
errorx.Dangerous(err)
defer resp.Body.Close()
bs, err := ioutil.ReadAll(resp.Body)
errorx.Dangerous(err)
c.Writer.Header().Set("Content-Type", "application/json; charset=UTF-8")
c.Writer.Write(bs)
}
func (rou *Router) taskHostStderr(c *gin.Context) {
id := UrlParamsInt64(c, "id")
host := ginx.UrlParamStr(c, "host")
if config.C.Output.ComeFrom == "database" || config.C.Output.ComeFrom == "" {
obj, err := models.TaskHostGet(rou.ctx, id, host)
ginx.NewRender(c).Data(obj.Stderr, err)
return
}
if config.C.Output.AgtdPort <= 0 || config.C.Output.AgtdPort > 65535 {
ginx.NewRender(c).Message(fmt.Errorf("remotePort(%d) invalid", config.C.Output.AgtdPort))
return
}
url := fmt.Sprintf("http://%s:%d/output/%d/stderr.json", host, config.C.Output.AgtdPort, id)
client := &http.Client{
CheckRedirect: func(req *http.Request, via []*http.Request) error {
return http.ErrUseLastResponse
},
}
resp, err := client.Get(url)
errorx.Dangerous(err)
defer resp.Body.Close()
bs, err := ioutil.ReadAll(resp.Body)
errorx.Dangerous(err)
c.Writer.Header().Set("Content-Type", "application/json; charset=UTF-8")
c.Writer.Write(bs)
}
func (rou *Router) taskStdoutTxt(c *gin.Context) {
id := UrlParamsInt64(c, "id")
meta, err := models.TaskMetaGet(rou.ctx, "id = ?", id)
if err != nil {
c.String(500, err.Error())
return
}
if meta == nil {
c.String(404, "no such task")
return
}
stdouts, err := meta.Stdouts(rou.ctx)
if err != nil {
c.String(500, err.Error())
return
}
w := c.Writer
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
count := len(stdouts)
for i := 0; i < count; i++ {
if i != 0 {
w.Write([]byte("\n\n"))
}
w.Write([]byte(stdouts[i].Host + ":\n"))
w.Write([]byte(stdouts[i].Stdout))
}
}
func (rou *Router) taskStderrTxt(c *gin.Context) {
id := UrlParamsInt64(c, "id")
meta, err := models.TaskMetaGet(rou.ctx, "id = ?", id)
if err != nil {
c.String(500, err.Error())
return
}
if meta == nil {
c.String(404, "no such task")
return
}
stderrs, err := meta.Stderrs(rou.ctx)
if err != nil {
c.String(500, err.Error())
return
}
w := c.Writer
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
count := len(stderrs)
for i := 0; i < count; i++ {
if i != 0 {
w.Write([]byte("\n\n"))
}
w.Write([]byte(stderrs[i].Host + ":\n"))
w.Write([]byte(stderrs[i].Stderr))
}
}
type TaskStdoutData struct {
Host string `json:"host"`
Stdout string `json:"stdout"`
}
type TaskStderrData struct {
Host string `json:"host"`
Stderr string `json:"stderr"`
}
func (rou *Router) taskStdoutJSON(c *gin.Context) {
task := TaskMeta(rou.ctx, UrlParamsInt64(c, "id"))
host := ginx.QueryStr(c, "host", "")
var hostsLen int
var ret []TaskStdoutData
if host != "" {
obj, err := models.TaskHostGet(rou.ctx, task.Id, host)
if err != nil {
ginx.NewRender(c).Data("", err)
return
} else if obj == nil {
ginx.NewRender(c).Data("", fmt.Errorf("task: %d, host(%s) not eixsts", task.Id, host))
return
} else {
ret = append(ret, TaskStdoutData{
Host: host,
Stdout: obj.Stdout,
})
}
} else {
hosts, err := models.TaskHostGets(rou.ctx, task.Id)
if err != nil {
ginx.NewRender(c).Data("", err)
return
}
hostsLen = len(hosts)
ret = make([]TaskStdoutData, 0, hostsLen)
for i := 0; i < hostsLen; i++ {
ret = append(ret, TaskStdoutData{
Host: hosts[i].Host,
Stdout: hosts[i].Stdout,
})
}
}
ginx.NewRender(c).Data(ret, nil)
}
func (rou *Router) taskStderrJSON(c *gin.Context) {
task := TaskMeta(rou.ctx, UrlParamsInt64(c, "id"))
host := ginx.QueryStr(c, "host", "")
var hostsLen int
var ret []TaskStderrData
if host != "" {
obj, err := models.TaskHostGet(rou.ctx, task.Id, host)
if err != nil {
ginx.NewRender(c).Data("", err)
return
} else if obj == nil {
ginx.NewRender(c).Data("", fmt.Errorf("task: %d, host(%s) not eixsts", task.Id, host))
return
} else {
ret = append(ret, TaskStderrData{
Host: host,
Stderr: obj.Stderr,
})
}
} else {
hosts, err := models.TaskHostGets(rou.ctx, task.Id)
if err != nil {
ginx.NewRender(c).Data("", err)
return
}
hostsLen = len(hosts)
ret = make([]TaskStderrData, 0, hostsLen)
for i := 0; i < hostsLen; i++ {
ret = append(ret, TaskStderrData{
Host: hosts[i].Host,
Stderr: hosts[i].Stderr,
})
}
}
ginx.NewRender(c).Data(ret, nil)
}
type taskForm struct {
Title string `json:"title" binding:"required"`
Account string `json:"account" binding:"required"`
Batch int `json:"batch"`
Tolerance int `json:"tolerance"`
Timeout int `json:"timeout"`
Pause string `json:"pause"`
Script string `json:"script" binding:"required"`
Args string `json:"args"`
Stdin string `json:"stdin"`
Action string `json:"action" binding:"required"`
Creator string `json:"creator" binding:"required"`
Hosts []string `json:"hosts" binding:"required"`
AlertTriggered bool `json:"alert_triggered"`
}
func (rou *Router) taskAdd(c *gin.Context) {
var f taskForm
ginx.BindJSON(c, &f)
hosts := cleanHosts(f.Hosts)
if len(hosts) == 0 {
errorx.Bomb(http.StatusBadRequest, "arg(hosts) empty")
}
taskMeta := &models.TaskMeta{
Title: f.Title,
Account: f.Account,
Batch: f.Batch,
Tolerance: f.Tolerance,
Timeout: f.Timeout,
Pause: f.Pause,
Script: f.Script,
Args: f.Args,
Stdin: f.Stdin,
Creator: f.Creator,
}
err := taskMeta.CleanFields()
ginx.Dangerous(err)
taskMeta.HandleFH(hosts[0])
authUser := c.MustGet(gin.AuthUserKey).(string)
// 任务类型分为"告警规则触发"和"n9e center用户下发"两种;
// 边缘机房"告警规则触发"的任务不需要规划并且它可能是失联的无法使用db资源所以放入redis缓存中直接下发给agentd执行
if !config.C.IsCenter && f.AlertTriggered {
if err := taskMeta.Create(rou.ctx); err != nil {
// 当网络不连通时生成唯一的id防止边缘机房中不同任务的id相同
// 方法是redis自增id去防止同一个机房的不同n9e edge生成的id相同
// 但没法防止不同边缘机房生成同样的id所以生成id的数据不会上报存入数据库只用于闭环执行。
taskMeta.Id, err = storage.IdGet(rou.ctx.Redis)
ginx.Dangerous(err)
}
if err == nil {
taskHost := models.TaskHost{
Id: taskMeta.Id,
Host: hosts[0],
Status: "running",
}
if err = taskHost.Create(rou.ctx); err != nil {
logger.Warningf("task_add_fail: authUser=%s title=%s err=%s", authUser, taskMeta.Title, err.Error())
}
}
// 缓存任务元信息和待下发的任务
err = taskMeta.Cache(rou.ctx, hosts[0])
ginx.Dangerous(err)
} else {
// 如果是中心机房,还是保持之前的逻辑
err = taskMeta.Save(rou.ctx, hosts, f.Action)
ginx.Dangerous(err)
}
logger.Infof("task_add_succ: authUser=%s title=%s", authUser, taskMeta.Title)
ginx.NewRender(c).Data(taskMeta.Id, err)
}
func (rou *Router) taskGet(c *gin.Context) {
meta := TaskMeta(rou.ctx, UrlParamsInt64(c, "id"))
hosts, err := meta.Hosts(rou.ctx)
errorx.Dangerous(err)
action, err := meta.Action(rou.ctx)
errorx.Dangerous(err)
actionStr := ""
if action != nil {
actionStr = action.Action
} else {
meta.Done = true
}
ginx.NewRender(c).Data(gin.H{
"meta": meta,
"hosts": hosts,
"action": actionStr,
}, nil)
}
// 传进来一堆ids返回已经done的任务的ids
func (rou *Router) doneIds(c *gin.Context) {
ids := ginx.QueryStr(c, "ids", "")
if ids == "" {
errorx.Dangerous("arg(ids) empty")
}
idsint64 := str.IdsInt64(ids, ",")
if len(idsint64) == 0 {
errorx.Dangerous("arg(ids) empty")
}
exists, err := models.TaskActionExistsIds(rou.ctx, idsint64)
errorx.Dangerous(err)
dones := slice.SubInt64(idsint64, exists)
ginx.NewRender(c).Data(gin.H{
"list": dones,
}, nil)
}
func (rou *Router) taskGets(c *gin.Context) {
query := ginx.QueryStr(c, "query", "")
limit := ginx.QueryInt(c, "limit", 20)
creator := ginx.QueryStr(c, "creator", "")
days := ginx.QueryInt64(c, "days", 7)
before := time.Unix(time.Now().Unix()-days*24*3600, 0)
total, err := models.TaskMetaTotal(rou.ctx, creator, query, before)
errorx.Dangerous(err)
list, err := models.TaskMetaGets(rou.ctx, creator, query, before, limit, ginx.Offset(c, limit))
errorx.Dangerous(err)
cnt := len(list)
ids := make([]int64, cnt)
for i := 0; i < cnt; i++ {
ids[i] = list[i].Id
}
exists, err := models.TaskActionExistsIds(rou.ctx, ids)
errorx.Dangerous(err)
for i := 0; i < cnt; i++ {
if slice.ContainsInt64(exists, list[i].Id) {
list[i].Done = false
} else {
list[i].Done = true
}
}
ginx.NewRender(c).Data(gin.H{
"total": total,
"list": list,
}, nil)
}
type actionForm struct {
Action string `json:"action"`
}
func (rou *Router) taskAction(c *gin.Context) {
meta := TaskMeta(rou.ctx, UrlParamsInt64(c, "id"))
var f actionForm
ginx.BindJSON(c, &f)
action, err := models.TaskActionGet(rou.ctx, "id=?", meta.Id)
errorx.Dangerous(err)
if action == nil {
errorx.Bomb(200, "task already finished, no more action can do")
}
ginx.NewRender(c).Message(action.Update(rou.ctx, f.Action))
}
func (rou *Router) taskHostAction(c *gin.Context) {
host := ginx.UrlParamStr(c, "host")
meta := TaskMeta(rou.ctx, UrlParamsInt64(c, "id"))
noopWhenDone(rou.ctx, meta.Id)
var f actionForm
ginx.BindJSON(c, &f)
if f.Action == "ignore" {
errorx.Dangerous(meta.IgnoreHost(rou.ctx, host))
action, err := models.TaskActionGet(rou.ctx, "id=?", meta.Id)
errorx.Dangerous(err)
if action != nil && action.Action == "pause" {
ginx.NewRender(c).Data("you can click start to run the task", nil)
return
}
}
if f.Action == "kill" {
errorx.Dangerous(meta.KillHost(rou.ctx, host))
}
if f.Action == "redo" {
errorx.Dangerous(meta.RedoHost(rou.ctx, host))
}
ginx.NewRender(c).Message(nil)
}
func noopWhenDone(ctx *ctx.Context, id int64) {
action, err := models.TaskActionGet(ctx, "id=?", id)
errorx.Dangerous(err)
if action == nil {
errorx.Bomb(200, "task already finished, no more taskAction can do")
}
}
type sqlCondForm struct {
Table string
Where string
Args []interface{}
}
func (rou *Router) tableRecordListGet(c *gin.Context) {
var f sqlCondForm
ginx.BindJSON(c, &f)
switch f.Table {
case models.TaskHostDoing{}.TableName():
lst, err := models.TableRecordGets[[]models.TaskHostDoing](rou.ctx, f.Table, f.Where, f.Args)
ginx.NewRender(c).Data(lst, err)
case models.TaskMeta{}.TableName():
lst, err := models.TableRecordGets[[]models.TaskMeta](rou.ctx, f.Table, f.Where, f.Args)
ginx.NewRender(c).Data(lst, err)
default:
ginx.Bomb(http.StatusBadRequest, "table[%v] not support", f.Table)
}
}
func (rou *Router) tableRecordCount(c *gin.Context) {
var f sqlCondForm
ginx.BindJSON(c, &f)
ginx.NewRender(c).Data(models.TableRecordCount(rou.ctx, f.Table, f.Where, f.Args))
}
type markDoneForm struct {
Id int64
Clock int64
Host string
Status string
Stdout string
Stderr string
}
func (rou *Router) markDone(c *gin.Context) {
var f markDoneForm
ginx.BindJSON(c, &f)
ginx.NewRender(c).Message(models.MarkDoneStatus(rou.ctx, f.Id, f.Clock, f.Host, f.Status, f.Stdout, f.Stderr))
}
func (rou *Router) taskMetaAdd(c *gin.Context) {
var f models.TaskMeta
ginx.BindJSON(c, &f)
err := f.Create(rou.ctx)
ginx.NewRender(c).Data(f.Id, err)
}
func (rou *Router) taskHostAdd(c *gin.Context) {
var f models.TaskHost
ginx.BindJSON(c, &f)
ginx.NewRender(c).Message(f.Upsert(rou.ctx))
}
func (rou *Router) taskHostUpsert(c *gin.Context) {
var f []models.TaskHost
ginx.BindJSON(c, &f)
ginx.NewRender(c).Data(models.TaskHostUpserts(rou.ctx, f))
}
func UrlParamsInt64(c *gin.Context, field string) int64 {
var params []gin.Param
for _, p := range c.Params {
if p.Key == "id" {
params = append(params, p)
}
}
var strval string
if len(params) == 1 {
strval = ginx.UrlParamStr(c, field)
} else if len(params) == 2 {
strval = params[1].Value
} else {
logger.Warningf("url param[%+v] not ok", params)
errorx.Bomb(http.StatusBadRequest, "url param[%s] is blank", field)
}
intval, err := strconv.ParseInt(strval, 10, 64)
if err != nil {
errorx.Bomb(http.StatusBadRequest, "cannot convert %s to int64", strval)
}
return intval
}

View File

@@ -0,0 +1,132 @@
package router
import (
"fmt"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"os"
"strings"
"github.com/ccfos/nightingale/v6/ibex/server/config"
"github.com/ccfos/nightingale/v6/pkg/aop"
"github.com/ccfos/nightingale/v6/center/router"
"github.com/gin-contrib/pprof"
"github.com/gin-gonic/gin"
)
func New(ctx *ctx.Context, version string) *gin.Engine {
gin.SetMode(config.C.RunMode)
loggerMid := aop.Logger()
recoveryMid := aop.Recovery()
if strings.ToLower(config.C.RunMode) == "release" {
aop.DisableConsoleColor()
}
r := gin.New()
r.Use(recoveryMid)
// whether print access log
if config.C.HTTP.PrintAccessLog {
r.Use(loggerMid)
}
rou := NewRouter(ctx)
rou.configBaseRouter(r, version)
rou.ConfigRouter(r)
return r
}
type Router struct {
ctx *ctx.Context
}
func NewRouter(ctx *ctx.Context) *Router {
return &Router{
ctx: ctx,
}
}
func (rou *Router) configBaseRouter(r *gin.Engine, version string) {
if config.C.HTTP.PProf {
pprof.Register(r, "/debug/pprof")
}
r.GET("/ping", func(c *gin.Context) {
c.String(200, "pong")
})
r.GET("/pid", func(c *gin.Context) {
c.String(200, fmt.Sprintf("%d", os.Getpid()))
})
r.GET("/addr", func(c *gin.Context) {
c.String(200, c.Request.RemoteAddr)
})
r.GET("/version", func(c *gin.Context) {
c.String(200, version)
})
}
func (rou *Router) ConfigRouter(r *gin.Engine, rts ...*router.Router) {
if len(rts) > 0 {
rt := rts[0]
pagesPrefix := "/api/n9e/busi-group/:id"
pages := r.Group(pagesPrefix)
{
pages.GET("/task/:id", rt.Auth(), rt.User(), rt.Perm("/job-tasks"), rou.taskGet)
pages.PUT("/task/:id/action", rt.Auth(), rt.User(), rt.Perm("/job-tasks/put"), rt.Bgrw(), rou.taskAction)
pages.GET("/task/:id/stdout", rt.Auth(), rt.User(), rt.Perm("/job-tasks"), rou.taskStdout)
pages.GET("/task/:id/stderr", rt.Auth(), rt.User(), rt.Perm("/job-tasks"), rou.taskStderr)
pages.GET("/task/:id/state", rt.Auth(), rt.User(), rt.Perm("/job-tasks"), rou.taskState)
pages.GET("/task/:id/result", rt.Auth(), rt.User(), rt.Perm("/job-tasks"), rou.taskResult)
pages.PUT("/task/:id/host/:host/action", rt.Auth(), rt.User(), rt.Perm("/job-tasks/put"), rt.Bgrw(), rou.taskHostAction)
pages.GET("/task/:id/host/:host/output", rt.Auth(), rt.User(), rt.Perm("/job-tasks"), rou.taskHostOutput)
pages.GET("/task/:id/host/:host/stdout", rt.Auth(), rt.User(), rt.Perm("/job-tasks"), rou.taskHostStdout)
pages.GET("/task/:id/host/:host/stderr", rt.Auth(), rt.User(), rt.Perm("/job-tasks"), rou.taskHostStderr)
pages.GET("/task/:id/stdout.txt", rt.Auth(), rt.User(), rt.Perm("/job-tasks"), rou.taskStdoutTxt)
pages.GET("/task/:id/stderr.txt", rt.Auth(), rt.User(), rt.Perm("/job-tasks"), rou.taskStderrTxt)
pages.GET("/task/:id/stdout.json", rt.Auth(), rt.User(), rt.Perm("/job-tasks"), rou.taskStdoutJSON)
pages.GET("/task/:id/stderr.json", rt.Auth(), rt.User(), rt.Perm("/job-tasks"), rou.taskStderrJSON)
}
}
api := r.Group("/ibex/v1")
if len(config.C.BasicAuth) > 0 {
api = r.Group("/ibex/v1", gin.BasicAuth(config.C.BasicAuth))
}
{
api.POST("/tasks", rou.taskAdd)
api.GET("/tasks", rou.taskGets)
api.GET("/tasks/done-ids", rou.doneIds)
api.GET("/task/:id", rou.taskGet)
api.PUT("/task/:id/action", rou.taskAction)
api.GET("/task/:id/stdout", rou.taskStdout)
api.GET("/task/:id/stderr", rou.taskStderr)
api.GET("/task/:id/state", rou.taskState)
api.GET("/task/:id/result", rou.taskResult)
api.PUT("/task/:id/host/:host/action", rou.taskHostAction)
api.GET("/task/:id/host/:host/output", rou.taskHostOutput)
api.GET("/task/:id/host/:host/stdout", rou.taskHostStdout)
api.GET("/task/:id/host/:host/stderr", rou.taskHostStderr)
api.GET("/task/:id/stdout.txt", rou.taskStdoutTxt)
api.GET("/task/:id/stderr.txt", rou.taskStderrTxt)
api.GET("/task/:id/stdout.json", rou.taskStdoutJSON)
api.GET("/task/:id/stderr.json", rou.taskStderrJSON)
// api for edge server
api.POST("/table/record/list", rou.tableRecordListGet)
api.POST("/table/record/count", rou.tableRecordCount)
api.POST("/mark/done", rou.markDone)
api.POST("/task/meta", rou.taskMetaAdd)
api.POST("/task/host/", rou.taskHostAdd)
api.POST("/task/hosts/upsert", rou.taskHostUpsert)
}
}

93
ibex/server/rpc/method.go Normal file
View File

@@ -0,0 +1,93 @@
package rpc
import (
"fmt"
"github.com/ccfos/nightingale/v6/models"
"os"
"github.com/toolkits/pkg/logger"
"github.com/ccfos/nightingale/v6/ibex/types"
)
// Ping return string 'pong', just for test
func (*Server) Ping(input string, output *string) error {
*output = "pong"
return nil
}
func (*Server) GetTaskMeta(id int64, resp *types.TaskMetaResponse) error {
meta, err := models.TaskMetaGetByID(ctxC, id)
if err != nil {
resp.Message = err.Error()
return nil
}
if meta == nil {
resp.Message = fmt.Sprintf("task %d not found", id)
return nil
}
resp.Script = meta.Script
resp.Args = meta.Args
resp.Account = meta.Account
resp.Stdin = meta.Stdin
return nil
}
func (*Server) Report(req types.ReportRequest, resp *types.ReportResponse) error {
if req.ReportTasks != nil && len(req.ReportTasks) > 0 {
err := handleDoneTask(req)
if err != nil {
resp.Message = err.Error()
return nil
}
}
doings := models.GetDoingCache(req.Ident)
tasks := make([]types.AssignTask, 0, len(doings))
for _, doing := range doings {
tasks = append(tasks, types.AssignTask{
Id: doing.Id,
Clock: doing.Clock,
Action: doing.Action,
})
}
resp.AssignTasks = tasks
return nil
}
func handleDoneTask(req types.ReportRequest) error {
count := len(req.ReportTasks)
val, ok := os.LookupEnv("CONTINUOUS_OUTPUT")
for i := 0; i < count; i++ {
t := req.ReportTasks[i]
if ok && val == "1" && t.Status == "running" {
err := models.RealTimeUpdateOutput(ctxC, t.Id, req.Ident, t.Stdout, t.Stderr)
if err != nil {
logger.Errorf("cannot update output, id:%d, hostname:%s, clock:%d, status:%s, err: %v", t.Id, req.Ident, t.Clock, t.Status, err)
return err
}
} else {
if t.Status == "success" || t.Status == "failed" {
exist, isEdgeAlertTriggered := models.CheckExistAndEdgeAlertTriggered(req.Ident, t.Id)
// ibex agent可能会重复上报结果如果任务已经不在task_host_doing缓存中了说明该任务已经MarkDone了不需要再处理
if !exist {
continue
}
err := models.MarkDoneStatus(ctxC, t.Id, t.Clock, req.Ident, t.Status, t.Stdout, t.Stderr, isEdgeAlertTriggered)
if err != nil {
logger.Errorf("cannot mark task done, id:%d, hostname:%s, clock:%d, status:%s, err: %v", t.Id, req.Ident, t.Clock, t.Status, err)
return err
}
}
}
}
return nil
}

61
ibex/server/rpc/rpc.go Normal file
View File

@@ -0,0 +1,61 @@
package rpc
import (
"bufio"
"fmt"
"io"
"net"
"net/rpc"
"os"
"reflect"
"time"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/toolkits/pkg/logger"
"github.com/ugorji/go/codec"
)
type Server int
var ctxC *ctx.Context
func Start(listen string, ctx *ctx.Context) {
ctxC = ctx
go serve(listen)
}
func serve(listen string) {
server := rpc.NewServer()
server.Register(new(Server))
l, err := net.Listen("tcp", listen)
if err != nil {
fmt.Printf("fail to listen on: %s, error: %v\n", listen, err)
os.Exit(1)
}
fmt.Println("rpc.listening:", listen)
var mh codec.MsgpackHandle
mh.MapType = reflect.TypeOf(map[string]interface{}(nil))
duration := time.Duration(100) * time.Millisecond
for {
conn, err := l.Accept()
if err != nil {
logger.Warningf("listener accept error: %v", err)
time.Sleep(duration)
continue
}
var bufconn = struct {
io.Closer
*bufio.Reader
*bufio.Writer
}{conn, bufio.NewReader(conn), bufio.NewWriter(conn)}
go server.ServeCodec(codec.MsgpackSpecRpc.ServerCodec(bufconn, &mh))
}
}

159
ibex/server/server.go Normal file
View File

@@ -0,0 +1,159 @@
package server
import (
"context"
"fmt"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"os"
"os/signal"
"path/filepath"
"syscall"
"github.com/ccfos/nightingale/v6/ibex/server/config"
"github.com/ccfos/nightingale/v6/ibex/server/router"
"github.com/ccfos/nightingale/v6/ibex/server/rpc"
"github.com/ccfos/nightingale/v6/ibex/server/timer"
"github.com/ccfos/nightingale/v6/pkg/httpx"
"github.com/ccfos/nightingale/v6/pkg/logx"
"github.com/ccfos/nightingale/v6/storage"
"github.com/toolkits/pkg/i18n"
)
type Server struct {
ConfigFile string
Version string
}
type ServerOption func(*Server)
func SetConfigFile(f string) ServerOption {
return func(s *Server) {
s.ConfigFile = f
}
}
func SetVersion(v string) ServerOption {
return func(s *Server) {
s.Version = v
}
}
// Run run server
func Run(isCenter bool, opts ...ServerOption) {
code := 1
sc := make(chan os.Signal, 1)
signal.Notify(sc, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT)
server := Server{
ConfigFile: filepath.Join("etc", "ibex", "server.toml"),
Version: "not specified",
}
for _, opt := range opts {
opt(&server)
}
// parse config file
config.MustLoad(server.ConfigFile)
config.C.IsCenter = isCenter
cleanFunc, err := server.initialize()
if err != nil {
fmt.Println("server init fail:", err)
os.Exit(code)
}
EXIT:
for {
sig := <-sc
fmt.Println("received signal:", sig.String())
switch sig {
case syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGINT:
code = 0
break EXIT
case syscall.SIGHUP:
// reload configuration?
default:
break EXIT
}
}
cleanFunc()
fmt.Println("server exited")
os.Exit(code)
}
func (s Server) initialize() (func(), error) {
fns := Functions{}
bgCtx, cancel := context.WithCancel(context.Background())
fns.Add(cancel)
// init i18n
i18n.Init()
// init logger
loggerClean, err := logx.Init(config.C.Log)
if err != nil {
return fns.Ret(), err
} else {
fns.Add(loggerClean)
}
var ctxC *ctx.Context
var redis storage.Redis
if redis, err = storage.NewRedis(config.C.Redis); err != nil {
return fns.Ret(), err
}
// init database
if config.C.IsCenter {
db, err := storage.New(config.C.DB)
if err != nil {
return fns.Ret(), err
}
ctxC = ctx.NewContext(context.Background(), db, redis, true, config.C.CenterApi)
} else {
ctxC = ctx.NewContext(context.Background(), nil, redis, false, config.C.CenterApi)
}
if err := storage.IdInit(ctxC.Redis); err != nil {
fmt.Println("cannot init id generator: ", err)
os.Exit(1)
}
timer.CacheHostDoing(ctxC)
timer.ReportResult(ctxC)
if config.C.IsCenter {
go timer.Heartbeat(ctxC)
go timer.Schedule(ctxC)
go timer.CleanLong(ctxC)
}
// init http server
r := router.New(ctxC, s.Version)
httpClean := httpx.Init(config.C.HTTP, bgCtx, r)
fns.Add(httpClean)
// start rpc server
rpc.Start(config.C.RPC.Listen, ctxC)
// release all the resources
return fns.Ret(), nil
}
type Functions struct {
List []func()
}
func (fs *Functions) Add(f func()) {
fs.List = append(fs.List, f)
}
func (fs *Functions) Ret() func() {
return func() {
for i := 0; i < len(fs.List); i++ {
fs.List[i]()
}
}
}

View File

@@ -0,0 +1,76 @@
package timer
import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"time"
"github.com/ccfos/nightingale/v6/ibex/server/config"
"github.com/toolkits/pkg/logger"
)
func Heartbeat(ctx *ctx.Context) {
if config.C.Heartbeat.Interval == 0 {
config.C.Heartbeat.Interval = 1000
}
for {
heartbeat(ctx)
time.Sleep(time.Duration(config.C.Heartbeat.Interval) * time.Millisecond)
}
}
func heartbeat(ctx *ctx.Context) {
ident := config.C.Heartbeat.LocalAddr
err := models.TaskSchedulerHeartbeat(ctx, ident)
if err != nil {
logger.Errorf("task scheduler(%s) cannot heartbeat: %v", ident, err)
return
}
dss, err := models.DeadTaskSchedulers(ctx)
if err != nil {
logger.Errorf("cannot get dead task schedulers: %v", err)
return
}
cnt := len(dss)
if cnt == 0 {
return
}
for i := 0; i < cnt; i++ {
ids, err := models.TasksOfScheduler(ctx, dss[i])
if err != nil {
logger.Errorf("cannot get tasks of scheduler(%s): %v", dss[i], err)
return
}
if len(ids) == 0 {
err = models.DelDeadTaskScheduler(ctx, dss[i])
if err != nil {
logger.Errorf("cannot del dead task scheduler(%s): %v", dss[i], err)
return
}
}
takeOverTasks(ctx, ident, dss[i], ids)
}
}
func takeOverTasks(ctx *ctx.Context, alive, dead string, ids []int64) {
count := len(ids)
for i := 0; i < count; i++ {
success, err := models.TakeOverTask(ctx, ids[i], dead, alive)
if err != nil {
logger.Errorf("cannot take over task: %v", err)
return
}
if success {
logger.Infof("%s take over task[%d] of %s", alive, ids[i], dead)
}
}
}

View File

@@ -0,0 +1,53 @@
package timer
import (
"fmt"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"time"
"github.com/toolkits/pkg/logger"
)
// CacheHostDoing 缓存task_host_doing表全部内容减轻DB压力
func CacheHostDoing(ctx *ctx.Context) {
if err := cacheHostDoing(ctx); err != nil {
fmt.Println("cannot cache task_host_doing data: ", err)
}
go loopCacheHostDoing(ctx)
}
func loopCacheHostDoing(ctx *ctx.Context) {
for {
time.Sleep(time.Millisecond * 400)
if err := cacheHostDoing(ctx); err != nil {
logger.Warning("cannot cache task_host_doing data: ", err)
}
}
}
func cacheHostDoing(ctx *ctx.Context) error {
doingsFromDb, err := models.TableRecordGets[[]models.TaskHostDoing](ctx, models.TaskHostDoing{}.TableName(), "")
if err != nil {
logger.Errorf("models.TableRecordGets fail: %v", err)
}
doingsFromRedis, err := models.CacheRecordGets[models.TaskHostDoing](ctx)
if err != nil {
logger.Errorf("models.CacheRecordGets fail: %v", err)
}
set := make(map[string][]models.TaskHostDoing)
for _, doing := range doingsFromDb {
doing.AlertTriggered = false
set[doing.Host] = append(set[doing.Host], doing)
}
for _, doing := range doingsFromRedis {
doing.AlertTriggered = true
set[doing.Host] = append(set[doing.Host], doing)
}
models.SetDoingCache(set)
return err
}

View File

@@ -0,0 +1,27 @@
package timer
import (
"fmt"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"time"
"github.com/toolkits/pkg/logger"
)
func ReportResult(ctx *ctx.Context) {
if err := models.ReportCacheResult(ctx); err != nil {
fmt.Println("cannot report task_host result from alter trigger: ", err)
}
go loopReport(ctx)
}
func loopReport(ctx *ctx.Context) {
d := time.Duration(2) * time.Second
for {
time.Sleep(d)
if err := models.ReportCacheResult(ctx); err != nil {
logger.Warning("cannot report task_host result from alter trigger: ", err)
}
}
}

View File

@@ -0,0 +1,79 @@
package timer
import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"time"
"github.com/ccfos/nightingale/v6/ibex/server/config"
"github.com/ccfos/nightingale/v6/ibex/server/logic"
"github.com/toolkits/pkg/logger"
)
func Schedule(ctx *ctx.Context) {
for {
scheduleOrphan(ctx)
scheduleMine(ctx)
time.Sleep(time.Second)
}
}
func scheduleMine(ctx *ctx.Context) {
ids, err := models.TasksOfScheduler(ctx, config.C.Heartbeat.LocalAddr)
if err != nil {
logger.Errorf("cannot get tasks of scheduler(%s): %v", config.C.Heartbeat.LocalAddr, err)
return
}
count := len(ids)
for i := 0; i < count; i++ {
logic.CheckTimeout(ctx, ids[i])
logic.ScheduleTask(ctx, ids[i])
}
}
func scheduleOrphan(ctx *ctx.Context) {
ids, err := models.OrphanTaskIds(ctx)
if err != nil {
logger.Errorf("cannot get orphan task ids: %v", err)
return
}
count := len(ids)
if count == 0 {
return
}
logger.Debug("orphan task ids:", ids)
for i := 0; i < count; i++ {
action, err := models.TaskActionGet(ctx, "id=?", ids[i])
if err != nil {
logger.Errorf("cannot get task[%d] action: %v", ids[i], err)
continue
}
if action == nil {
continue
}
if action.Action == "pause" {
continue
}
mine, err := models.TakeOverTask(ctx, ids[i], "", config.C.Heartbeat.LocalAddr)
if err != nil {
logger.Errorf("cannot take over task[%d]: %v", ids[i], err)
continue
}
if !mine {
continue
}
logger.Debugf("task[%d] is mine", ids[i])
logic.ScheduleTask(ctx, ids[i])
}
}

View File

@@ -0,0 +1,38 @@
package timer
import (
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"time"
"github.com/toolkits/pkg/logger"
)
func CleanLong(ctx *ctx.Context) {
d := time.Duration(24) * time.Hour
for {
cleanLongTask(ctx)
time.Sleep(d)
}
}
func cleanLongTask(ctx *ctx.Context) {
ids, err := models.LongTaskIds(ctx)
if err != nil {
logger.Error("LongTaskIds:", err)
return
}
if ids == nil {
return
}
count := len(ids)
for i := 0; i < count; i++ {
action := models.TaskAction{Id: ids[i]}
err = action.Update(ctx, "cancel")
if err != nil {
logger.Errorf("cannot cancel long task[%d]: %v", ids[i], err)
}
}
}

33
ibex/types/types.go Normal file
View File

@@ -0,0 +1,33 @@
package types
type TaskMetaResponse struct {
Message string
Script string
Args string
Account string
Stdin string
}
type ReportTask struct {
Id int64
Clock int64
Status string
Stdout string
Stderr string
}
type ReportRequest struct {
Ident string
ReportTasks []ReportTask
}
type AssignTask struct {
Id int64
Clock int64
Action string
}
type ReportResponse struct {
Message string
AssignTasks []AssignTask
}

69
models/ibex_models.go Normal file
View File

@@ -0,0 +1,69 @@
package models
import (
"encoding/json"
"fmt"
"github.com/ccfos/nightingale/v6/ibex/server/config"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/poster"
"gorm.io/gorm"
)
func IbexCount(tx *gorm.DB) (int64, error) {
var cnt int64
err := tx.Count(&cnt).Error
return cnt, err
}
func tht(id int64) string {
return fmt.Sprintf("task_host_%d", id%100)
}
func TableRecordGets[T any](ctx *ctx.Context, table, where string, args ...interface{}) (lst T, err error) {
if config.C.IsCenter {
if where == "" || len(args) == 0 {
err = DB(ctx).Table(table).Find(&lst).Error
} else {
err = DB(ctx).Table(table).Where(where, args...).Find(&lst).Error
}
return
}
return poster.PostByUrlsWithResp[T](ctx, "/ibex/v1/table/record/list", map[string]interface{}{
"table": table,
"where": where,
"args": args,
})
}
func TableRecordCount(ctx *ctx.Context, table, where string, args ...interface{}) (int64, error) {
if config.C.IsCenter {
if where == "" || len(args) == 0 {
return IbexCount(DB(ctx).Table(table))
}
return IbexCount(DB(ctx).Table(table).Where(where, args...))
}
return poster.PostByUrlsWithResp[int64](ctx, "/ibex/v1/table/record/count", map[string]interface{}{
"table": table,
"where": where,
"args": args,
})
}
var IBEX_HOST_DOING = "ibex-host-doing"
func CacheRecordGets[T any](ctx *ctx.Context) ([]T, error) {
lst := make([]T, 0)
values, _ := ctx.Redis.HVals(ctx.Ctx, IBEX_HOST_DOING).Result()
for _, val := range values {
t := new(T)
if err := json.Unmarshal([]byte(val), t); err != nil {
return nil, err
}
lst = append(lst, *t)
}
return lst, nil
}

112
models/ibex_task_action.go Normal file
View File

@@ -0,0 +1,112 @@
package models
import (
"fmt"
"time"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"gorm.io/gorm"
)
type TaskAction struct {
Id int64 `gorm:"column:id;primaryKey"`
Action string `gorm:"column:action;size:32;not null"`
Clock int64 `gorm:"column:clock;not null;default:0"`
}
func (TaskAction) TableName() string {
return "task_action"
}
func TaskActionGet(ctx *ctx.Context, where string, args ...interface{}) (*TaskAction, error) {
var obj TaskAction
ret := DB(ctx).Where(where, args...).Find(&obj)
if ret.Error != nil {
return nil, ret.Error
}
if ret.RowsAffected == 0 {
return nil, nil
}
return &obj, nil
}
func TaskActionExistsIds(ctx *ctx.Context, ids []int64) ([]int64, error) {
if len(ids) == 0 {
return ids, nil
}
var ret []int64
err := DB(ctx).Model(&TaskAction{}).Where("id in ?", ids).Pluck("id", &ret).Error
return ret, err
}
func CancelWaitingHosts(ctx *ctx.Context, id int64) error {
return DB(ctx).Table(tht(id)).Where("id = ? and status = ?", id, "waiting").Update("status", "cancelled").Error
}
func StartTask(ctx *ctx.Context, id int64) error {
return DB(ctx).Model(&TaskScheduler{}).Where("id = ?", id).Update("scheduler", "").Error
}
func CancelTask(ctx *ctx.Context, id int64) error {
return CancelWaitingHosts(ctx, id)
}
func KillTask(ctx *ctx.Context, id int64) error {
if err := CancelWaitingHosts(ctx, id); err != nil {
return err
}
now := time.Now().Unix()
return DB(ctx).Transaction(func(tx *gorm.DB) error {
err := tx.Model(&TaskHostDoing{}).Where("id = ? and action <> ?", id, "kill").Updates(map[string]interface{}{
"clock": now,
"action": "kill",
}).Error
if err != nil {
return err
}
return tx.Table(tht(id)).Where("id = ? and status = ?", id, "running").Update("status", "killing").Error
})
}
func (a *TaskAction) Update(ctx *ctx.Context, action string) error {
if !(action == "start" || action == "cancel" || action == "kill" || action == "pause") {
return fmt.Errorf("action invalid")
}
err := DB(ctx).Model(a).Updates(map[string]interface{}{
"action": action,
"clock": time.Now().Unix(),
}).Error
if err != nil {
return err
}
if action == "start" {
return StartTask(ctx, a.Id)
}
if action == "cancel" {
return CancelTask(ctx, a.Id)
}
if action == "kill" {
return KillTask(ctx, a.Id)
}
return nil
}
// LongTaskIds two weeks ago
func LongTaskIds(ctx *ctx.Context) ([]int64, error) {
clock := time.Now().Unix() - 604800*2
var ids []int64
err := DB(ctx).Model(&TaskAction{}).Where("clock < ?", clock).Pluck("id", &ids).Error
return ids, err
}

262
models/ibex_task_host.go Normal file
View File

@@ -0,0 +1,262 @@
package models
import (
"fmt"
"sync"
"time"
"github.com/ccfos/nightingale/v6/ibex/server/config"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/poster"
"github.com/ccfos/nightingale/v6/storage"
"github.com/toolkits/pkg/logger"
"gorm.io/gorm"
"gorm.io/gorm/clause"
)
type TaskHost struct {
II int64 `gorm:"column:ii;primaryKey;autoIncrement" json:"-"`
Id int64 `gorm:"column:id;uniqueIndex:idx_id_host;not null" json:"id"`
Host string `gorm:"column:host;uniqueIndex:idx_id_host;size:128;not null" json:"host"`
Status string `gorm:"column:status;size:32;not null" json:"status"`
Stdout string `gorm:"column:stdout;type:text" json:"stdout"`
Stderr string `gorm:"column:stderr;type:text" json:"stderr"`
}
func (taskHost *TaskHost) Upsert(ctx *ctx.Context) error {
return DB(ctx).Table(tht(taskHost.Id)).Clauses(clause.OnConflict{
Columns: []clause.Column{{Name: "id"}, {Name: "host"}},
DoUpdates: clause.AssignmentColumns([]string{"status", "stdout", "stderr"}),
}).Create(taskHost).Error
}
func (taskHost *TaskHost) Create(ctx *ctx.Context) error {
if config.C.IsCenter {
return DB(ctx).Table(tht(taskHost.Id)).Create(taskHost).Error
}
return poster.PostByUrls(ctx, "/ibex/v1/task/host", taskHost)
}
func TaskHostUpserts(ctx *ctx.Context, lst []TaskHost) (map[string]error, error) {
if len(lst) == 0 {
return nil, fmt.Errorf("empty list")
}
if !config.C.IsCenter {
return poster.PostByUrlsWithResp[map[string]error](ctx, "/ibex/v1/task/hosts/upsert", lst)
}
errs := make(map[string]error, 0)
for _, taskHost := range lst {
if err := taskHost.Upsert(ctx); err != nil {
errs[fmt.Sprintf("%d:%s", taskHost.Id, taskHost.Host)] = err
}
}
return errs, nil
}
func TaskHostGet(ctx *ctx.Context, id int64, host string) (*TaskHost, error) {
var ret []*TaskHost
err := DB(ctx).Table(tht(id)).Where("id=? and host=?", id, host).Find(&ret).Error
if err != nil {
return nil, err
}
if len(ret) == 0 {
return nil, nil
}
return ret[0], nil
}
func MarkDoneStatus(ctx *ctx.Context, id, clock int64, host, status, stdout, stderr string, edgeAlertTriggered ...bool) error {
if len(edgeAlertTriggered) > 0 && edgeAlertTriggered[0] {
return CacheMarkDone(ctx, TaskHost{
Id: id,
Host: host,
Status: status,
Stdout: stdout,
Stderr: stderr,
})
}
if !config.C.IsCenter {
return poster.PostByUrls(ctx, "/ibex/v1/mark/done", map[string]interface{}{
"id": id,
"clock": clock,
"host": host,
"status": status,
"stdout": stdout,
"stderr": stderr,
})
}
count, err := TableRecordCount(ctx, TaskHostDoing{}.TableName(), "id=? and host=? and clock=?", id, host, clock)
if err != nil {
return err
}
if count == 0 {
// 如果是timeout了后来任务执行完成之后结果又上来了stdout和stderr最好还是存库让用户看到
count, err = TableRecordCount(ctx, tht(id), "id=? and host=? and status=?", id, host, "timeout")
if err != nil {
return err
}
if count == 1 {
return DB(ctx).Table(tht(id)).Where("id=? and host=?", id, host).Updates(map[string]interface{}{
"status": status,
"stdout": stdout,
"stderr": stderr,
}).Error
}
return nil
}
return DB(ctx).Transaction(func(tx *gorm.DB) error {
err = tx.Table(tht(id)).Where("id=? and host=?", id, host).Updates(map[string]interface{}{
"status": status,
"stdout": stdout,
"stderr": stderr,
}).Error
if err != nil {
return err
}
if err = tx.Where("id=? and host=?", id, host).Delete(&TaskHostDoing{}).Error; err != nil {
return err
}
return nil
})
}
func RealTimeUpdateOutput(ctx *ctx.Context, id int64, host, stdout, stderr string) error {
return DB(ctx).Transaction(func(tx *gorm.DB) error {
err := tx.Table(tht(id)).Where("id=? and host=?", id, host).Updates(map[string]interface{}{
"stdout": stdout,
"stderr": stderr,
}).Error
if err != nil {
return err
}
return nil
})
}
func CacheMarkDone(ctx *ctx.Context, taskHost TaskHost) error {
if err := ctx.Redis.HDel(ctx.Ctx, IBEX_HOST_DOING, hostDoingCacheKey(taskHost.Id, taskHost.Host)).Err(); err != nil {
return err
}
TaskHostCachePush(taskHost)
return nil
}
func WaitingHostList(ctx *ctx.Context, id int64, limit ...int) ([]TaskHost, error) {
var hosts []TaskHost
session := DB(ctx).Table(tht(id)).Where("id = ? and status = 'waiting'", id).Order("ii")
if len(limit) > 0 {
session = session.Limit(limit[0])
}
err := session.Find(&hosts).Error
return hosts, err
}
func WaitingHostCount(ctx *ctx.Context, id int64) (int64, error) {
return TableRecordCount(ctx, tht(id), "id=? and status='waiting'", id)
}
func UnexpectedHostCount(ctx *ctx.Context, id int64) (int64, error) {
return TableRecordCount(ctx, tht(id), "id=? and status in ('failed', 'timeout', 'killfailed')", id)
}
func IngStatusHostCount(ctx *ctx.Context, id int64) (int64, error) {
return TableRecordCount(ctx, tht(id), "id=? and status in ('waiting', 'running', 'killing')", id)
}
func RunWaitingHosts(ctx *ctx.Context, taskHosts []TaskHost) error {
count := len(taskHosts)
if count == 0 {
return nil
}
now := time.Now().Unix()
return DB(ctx).Transaction(func(tx *gorm.DB) error {
for i := 0; i < count; i++ {
if err := tx.Table(tht(taskHosts[i].Id)).Where("id=? and host=?", taskHosts[i].Id, taskHosts[i].Host).Update("status", "running").Error; err != nil {
return err
}
err := tx.Create(&TaskHostDoing{Id: taskHosts[i].Id, Host: taskHosts[i].Host, Clock: now, Action: "start"}).Error
if err != nil {
return err
}
}
return nil
})
}
func TaskHostStatus(ctx *ctx.Context, id int64) ([]TaskHost, error) {
var ret []TaskHost
err := DB(ctx).Table(tht(id)).Select("id", "host", "status").Where("id=?", id).Order("ii").Find(&ret).Error
return ret, err
}
func TaskHostGets(ctx *ctx.Context, id int64) ([]TaskHost, error) {
var ret []TaskHost
err := DB(ctx).Table(tht(id)).Where("id=?", id).Order("ii").Find(&ret).Error
return ret, err
}
var (
taskHostCache = make([]TaskHost, 0, 128)
taskHostLock sync.RWMutex
)
func TaskHostCachePush(taskHost TaskHost) {
taskHostLock.Lock()
defer taskHostLock.Unlock()
taskHostCache = append(taskHostCache, taskHost)
}
func TaskHostCachePopAll() []TaskHost {
taskHostLock.Lock()
defer taskHostLock.Unlock()
all := taskHostCache
taskHostCache = make([]TaskHost, 0, 128)
return all
}
func ReportCacheResult(ctx *ctx.Context) error {
result := TaskHostCachePopAll()
reports := make([]TaskHost, 0)
for _, th := range result {
// id大于redis初始id说明是edge与center失联时本地告警规则触发的自愈脚本生成的id
// 为了防止不同边缘机房生成的脚本任务id相同不上报结果至数据库
if th.Id >= storage.IDINITIAL {
logger.Infof("task[%d] host[%s] done, result:[%v]", th.Id, th.Host, th)
} else {
reports = append(reports, th)
}
}
if len(reports) == 0 {
return nil
}
errs, err := TaskHostUpserts(ctx, reports)
if err != nil {
return err
}
for key, err := range errs {
logger.Warningf("report task_host_cache[%s] result error: %v", key, err)
}
return nil
}

View File

@@ -0,0 +1,65 @@
package models
import (
"encoding/json"
"fmt"
"sync"
)
type TaskHostDoing struct {
Id int64 `gorm:"column:id;index"`
Host string `gorm:"column:host;size:128;not null;index"`
Clock int64 `gorm:"column:clock;not null;default:0"`
Action string `gorm:"column:action;size:16;not null"`
AlertTriggered bool `gorm:"-"`
}
func (TaskHostDoing) TableName() string {
return "task_host_doing"
}
func (doing *TaskHostDoing) MarshalBinary() ([]byte, error) {
return json.Marshal(doing)
}
func (doing *TaskHostDoing) UnmarshalBinary(data []byte) error {
return json.Unmarshal(data, doing)
}
func hostDoingCacheKey(id int64, host string) string {
return fmt.Sprintf("%s:%d", host, id)
}
var (
doingLock sync.RWMutex
doingMaps map[string][]TaskHostDoing
)
func SetDoingCache(v map[string][]TaskHostDoing) {
doingLock.Lock()
doingMaps = v
doingLock.Unlock()
}
func GetDoingCache(host string) []TaskHostDoing {
doingLock.RLock()
defer doingLock.RUnlock()
return doingMaps[host]
}
func CheckExistAndEdgeAlertTriggered(host string, id int64) (exist, isAlertTriggered bool) {
doingLock.RLock()
defer doingLock.RUnlock()
doings := doingMaps[host]
for _, doing := range doings {
if doing.Id == id {
exist = true
isAlertTriggered = doing.AlertTriggered
return
}
}
return false, false
}

364
models/ibex_task_meta.go Normal file
View File

@@ -0,0 +1,364 @@
package models
import (
"context"
"encoding/json"
"fmt"
"strings"
"time"
"github.com/ccfos/nightingale/v6/ibex/server/config"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/poster"
"github.com/ccfos/nightingale/v6/storage"
"github.com/toolkits/pkg/str"
"gorm.io/gorm"
)
type TaskMeta struct {
Id int64 `gorm:"column:id;primaryKey;autoIncrement" json:"id"`
Title string `gorm:"column:title;size:255;not null;default:''" json:"title"`
Account string `gorm:"column:account;size:64;not null" json:"account"`
Batch int `gorm:"column:batch;not null;default:0" json:"batch"`
Tolerance int `gorm:"column:tolerance;not null;default:0" json:"tolerance"`
Timeout int `gorm:"column:timeout;not null;default:0" json:"timeout"`
Pause string `gorm:"column:pause;size:255;not null;default:''" json:"pause"`
Script string `gorm:"column:script;type:text;not null" json:"script"`
Args string `gorm:"column:args;size:512;not null;default:''" json:"args"`
Stdin string `gorm:"column:stdin;size:1024;not null;default:''" json:"stdin"`
Creator string `gorm:"column:creator;size:64;not null;default:'';index" json:"creator"`
Created time.Time `gorm:"column:created;not null;default:CURRENT_TIMESTAMP;type:timestamp;index" json:"created"`
Done bool `json:"done" gorm:"-"`
}
func (TaskMeta) TableName() string {
return "task_meta"
}
func (taskMeta *TaskMeta) MarshalBinary() ([]byte, error) {
return json.Marshal(taskMeta)
}
func (taskMeta *TaskMeta) UnmarshalBinary(data []byte) error {
return json.Unmarshal(data, taskMeta)
}
func (taskMeta *TaskMeta) Create(ctx *ctx.Context) error {
if config.C.IsCenter {
return DB(ctx).Create(taskMeta).Error
}
id, err := poster.PostByUrlsWithResp[int64](ctx, "/ibex/v1/task/meta", taskMeta)
if err == nil {
taskMeta.Id = id
}
return err
}
func taskMetaCacheKey(id int64) string {
return fmt.Sprintf("task:meta:%d", id)
}
func TaskMetaGet(ctx *ctx.Context, where string, args ...interface{}) (*TaskMeta, error) {
lst, err := TableRecordGets[[]*TaskMeta](ctx, TaskMeta{}.TableName(), where, args...)
if err != nil {
return nil, err
}
if len(lst) == 0 {
return nil, nil
}
return lst[0], nil
}
// TaskMetaGet 根据ID获取任务元信息会用到缓存
func TaskMetaGetByID(ctx *ctx.Context, id int64) (*TaskMeta, error) {
meta, err := TaskMetaCacheGet(ctx, id)
if err == nil {
return meta, nil
}
meta, err = TaskMetaGet(ctx, "id=?", id)
if err != nil {
return nil, err
}
if meta == nil {
return nil, nil
}
_, err = ctx.Redis.Set(context.Background(), taskMetaCacheKey(id), meta, storage.DEFAULT).Result()
return meta, err
}
func TaskMetaCacheGet(ctx *ctx.Context, id int64) (*TaskMeta, error) {
res := ctx.Redis.Get(context.Background(), taskMetaCacheKey(id))
meta := new(TaskMeta)
err := res.Scan(meta)
return meta, err
}
func (m *TaskMeta) CleanFields() error {
if m.Batch < 0 {
return fmt.Errorf("arg(batch) should be nonnegative")
}
if m.Tolerance < 0 {
return fmt.Errorf("arg(tolerance) should be nonnegative")
}
if m.Timeout < 0 {
return fmt.Errorf("arg(timeout) should be nonnegative")
}
if m.Timeout > 3600*24*5 {
return fmt.Errorf("arg(timeout) longer than five days")
}
if m.Timeout == 0 {
m.Timeout = 30
}
m.Pause = strings.Replace(m.Pause, "", ",", -1)
m.Pause = strings.Replace(m.Pause, " ", "", -1)
m.Args = strings.Replace(m.Args, "", ",", -1)
if m.Title == "" {
return fmt.Errorf("arg(title) is required")
}
if str.Dangerous(m.Title) {
return fmt.Errorf("arg(title) is dangerous")
}
if m.Script == "" {
return fmt.Errorf("arg(script) is required")
}
if str.Dangerous(m.Args) {
return fmt.Errorf("arg(args) is dangerous")
}
if str.Dangerous(m.Pause) {
return fmt.Errorf("arg(pause) is dangerous")
}
return nil
}
func (m *TaskMeta) HandleFH(fh string) {
i := strings.Index(m.Title, " FH: ")
if i > 0 {
m.Title = m.Title[:i]
}
m.Title = m.Title + " FH: " + fh
}
func (taskMeta *TaskMeta) Cache(ctx *ctx.Context, host string) error {
tx := ctx.Redis.TxPipeline()
tx.Set(ctx.Ctx, taskMetaCacheKey(taskMeta.Id), taskMeta, storage.DEFAULT)
tx.HSet(ctx.Ctx, IBEX_HOST_DOING, hostDoingCacheKey(taskMeta.Id, host), &TaskHostDoing{
Id: taskMeta.Id,
Host: host,
Clock: time.Now().Unix(),
Action: "start",
})
_, err := tx.Exec(ctx.Ctx)
return err
}
func (taskMeta *TaskMeta) Save(ctx *ctx.Context, hosts []string, action string) error {
return DB(ctx).Transaction(func(tx *gorm.DB) error {
if err := tx.Create(taskMeta).Error; err != nil {
return err
}
id := taskMeta.Id
if err := tx.Create(&TaskScheduler{Id: id}).Error; err != nil {
return err
}
if err := tx.Create(&TaskAction{Id: id, Action: action, Clock: time.Now().Unix()}).Error; err != nil {
return err
}
for i := 0; i < len(hosts); i++ {
host := strings.TrimSpace(hosts[i])
if host == "" {
continue
}
err := tx.Exec("INSERT INTO "+tht(id)+" (id, host, status) VALUES (?, ?, ?)", id, host, "waiting").Error
if err != nil {
return err
}
}
return nil
})
}
func (m *TaskMeta) Action(ctx *ctx.Context) (*TaskAction, error) {
return TaskActionGet(ctx, "id=?", m.Id)
}
func (m *TaskMeta) Hosts(ctx *ctx.Context) ([]TaskHost, error) {
var ret []TaskHost
err := DB(ctx).Table(tht(m.Id)).Where("id=?", m.Id).Select("id", "host", "status").Order("ii").Find(&ret).Error
return ret, err
}
func (m *TaskMeta) KillHost(ctx *ctx.Context, host string) error {
bean, err := TaskHostGet(ctx, m.Id, host)
if err != nil {
return err
}
if bean == nil {
return fmt.Errorf("no such host")
}
if !(bean.Status == "running" || bean.Status == "timeout") {
return fmt.Errorf("current status cannot kill")
}
if err := redoHost(ctx, m.Id, host, "kill"); err != nil {
return err
}
return statusSet(ctx, m.Id, host, "killing")
}
func (m *TaskMeta) IgnoreHost(ctx *ctx.Context, host string) error {
return statusSet(ctx, m.Id, host, "ignored")
}
func (m *TaskMeta) RedoHost(ctx *ctx.Context, host string) error {
bean, err := TaskHostGet(ctx, m.Id, host)
if err != nil {
return err
}
if bean == nil {
return fmt.Errorf("no such host")
}
if err := redoHost(ctx, m.Id, host, "start"); err != nil {
return err
}
return statusSet(ctx, m.Id, host, "running")
}
func statusSet(ctx *ctx.Context, id int64, host, status string) error {
return DB(ctx).Table(tht(id)).Where("id=? and host=?", id, host).Update("status", status).Error
}
func redoHost(ctx *ctx.Context, id int64, host, action string) error {
count, err := IbexCount(DB(ctx).Model(&TaskHostDoing{}).Where("id=? and host=?", id, host))
if err != nil {
return err
}
now := time.Now().Unix()
if count == 0 {
err = DB(ctx).Table("task_host_doing").Create(map[string]interface{}{
"id": id,
"host": host,
"clock": now,
"action": action,
}).Error
} else {
err = DB(ctx).Table("task_host_doing").Where("id=? and host=? and action <> ?", id, host, action).Updates(map[string]interface{}{
"clock": now,
"action": action,
}).Error
}
return err
}
func (m *TaskMeta) HostStrs(ctx *ctx.Context) ([]string, error) {
var ret []string
err := DB(ctx).Table(tht(m.Id)).Where("id=?", m.Id).Order("ii").Pluck("host", &ret).Error
return ret, err
}
func (m *TaskMeta) Stdouts(ctx *ctx.Context) ([]TaskHost, error) {
var ret []TaskHost
err := DB(ctx).Table(tht(m.Id)).Where("id=?", m.Id).Select("id", "host", "status", "stdout").Order("ii").Find(&ret).Error
return ret, err
}
func (m *TaskMeta) Stderrs(ctx *ctx.Context) ([]TaskHost, error) {
var ret []TaskHost
err := DB(ctx).Table(tht(m.Id)).Where("id=?", m.Id).Select("id", "host", "status", "stderr").Order("ii").Find(&ret).Error
return ret, err
}
func TaskMetaTotal(ctx *ctx.Context, creator, query string, before time.Time) (int64, error) {
session := DB(ctx).Model(&TaskMeta{})
session = session.Where("created > '" + before.Format("2006-01-02 15:04:05") + "'")
if creator != "" {
session = session.Where("creator = ?", creator)
}
if query != "" {
// q1 q2 -q3
arr := strings.Fields(query)
for i := 0; i < len(arr); i++ {
if arr[i] == "" {
continue
}
if strings.HasPrefix(arr[i], "-") {
q := "%" + arr[i][1:] + "%"
session = session.Where("title not like ?", q)
} else {
q := "%" + arr[i] + "%"
session = session.Where("title like ?", q)
}
}
}
return IbexCount(session)
}
func TaskMetaGets(ctx *ctx.Context, creator, query string, before time.Time, limit, offset int) ([]TaskMeta, error) {
session := DB(ctx).Model(&TaskMeta{}).Order("created desc").Limit(limit).Offset(offset)
session = session.Where("created > '" + before.Format("2006-01-02 15:04:05") + "'")
if creator != "" {
session = session.Where("creator = ?", creator)
}
if query != "" {
// q1 q2 -q3
arr := strings.Fields(query)
for i := 0; i < len(arr); i++ {
if arr[i] == "" {
continue
}
if strings.HasPrefix(arr[i], "-") {
q := "%" + arr[i][1:] + "%"
session = session.Where("title not like ?", q)
} else {
q := "%" + arr[i] + "%"
session = session.Where("title like ?", q)
}
}
}
var objs []TaskMeta
err := session.Find(&objs).Error
return objs, err
}

View File

@@ -0,0 +1,47 @@
package models
import (
"github.com/ccfos/nightingale/v6/pkg/ctx"
"gorm.io/gorm"
)
type TaskScheduler struct {
Id int64 `gorm:"column:id;primaryKey"`
Scheduler string `gorm:"column:scheduler;size:128;not null;default:''"`
}
func (TaskScheduler) TableName() string {
return "task_scheduler"
}
func TasksOfScheduler(ctx *ctx.Context, scheduler string) ([]int64, error) {
var ids []int64
err := DB(ctx).Model(&TaskScheduler{}).Where("scheduler = ?", scheduler).Pluck("id", &ids).Error
return ids, err
}
func TakeOverTask(ctx *ctx.Context, id int64, pre, current string) (bool, error) {
ret := DB(ctx).Model(&TaskScheduler{}).Where("id = ? and scheduler = ?", id, pre).Update("scheduler", current)
if ret.Error != nil {
return false, ret.Error
}
return ret.RowsAffected > 0, nil
}
func OrphanTaskIds(ctx *ctx.Context) ([]int64, error) {
var ids []int64
err := DB(ctx).Model(&TaskScheduler{}).Where("scheduler = ''").Pluck("id", &ids).Error
return ids, err
}
func CleanDoneTask(ctx *ctx.Context, id int64) error {
return DB(ctx).Transaction(func(tx *gorm.DB) error {
if err := tx.Where("id = ?", id).Delete(&TaskScheduler{}).Error; err != nil {
return err
}
return tx.Where("id = ?", id).Delete(&TaskAction{}).Error
})
}

View File

@@ -0,0 +1,47 @@
package models
import (
"github.com/ccfos/nightingale/v6/pkg/ctx"
"time"
)
type TaskSchedulerHealth struct {
Scheduler string `gorm:"column:scheduler;uniqueIndex;size:128;not null"`
Clock int64 `gorm:"column:clock;not null;index"`
}
func (TaskSchedulerHealth) TableName() string {
return "task_scheduler_health"
}
func TaskSchedulerHeartbeat(ctx *ctx.Context, scheduler string) error {
var cnt int64
err := DB(ctx).Model(&TaskSchedulerHealth{}).Where("scheduler = ?", scheduler).Count(&cnt).Error
if err != nil {
return err
}
if cnt == 0 {
ret := DB(ctx).Create(&TaskSchedulerHealth{
Scheduler: scheduler,
Clock: time.Now().Unix(),
})
err = ret.Error
} else {
err = DB(ctx).Model(&TaskSchedulerHealth{}).Where("scheduler = ?", scheduler).Update("clock", time.Now().Unix()).Error
}
return err
}
func DeadTaskSchedulers(ctx *ctx.Context) ([]string, error) {
clock := time.Now().Unix() - 10
var arr []string
err := DB(ctx).Model(&TaskSchedulerHealth{}).Where("clock < ?", clock).Pluck("scheduler", &arr).Error
return arr, err
}
func DelDeadTaskScheduler(ctx *ctx.Context, scheduler string) error {
return DB(ctx).Where("scheduler = ?", scheduler).Delete(&TaskSchedulerHealth{}).Error
}

View File

@@ -2,11 +2,9 @@ package migrate
import (
"fmt"
"github.com/ccfos/nightingale/v6/models"
"github.com/ccfos/nightingale/v6/pkg/ormx"
imodels "github.com/flashcatcloud/ibex/src/models"
"github.com/toolkits/pkg/logger"
"gorm.io/driver/mysql"
"gorm.io/gorm"
@@ -28,7 +26,7 @@ func MigrateIbexTables(db *gorm.DB) {
db = db.Set("gorm:table_options", tableOptions)
}
dts := []interface{}{&imodels.TaskMeta{}, &imodels.TaskScheduler{}, &imodels.TaskSchedulerHealth{}, &imodels.TaskHostDoing{}, &imodels.TaskAction{}}
dts := []interface{}{&models.TaskMeta{}, &models.TaskScheduler{}, &models.TaskSchedulerHealth{}, &models.TaskHostDoing{}, &models.TaskAction{}}
for _, dt := range dts {
err := db.AutoMigrate(dt)
if err != nil {
@@ -38,7 +36,7 @@ func MigrateIbexTables(db *gorm.DB) {
for i := 0; i < 100; i++ {
tableName := fmt.Sprintf("task_host_%d", i)
err := db.Table(tableName).AutoMigrate(&imodels.TaskHost{})
err := db.Table(tableName).AutoMigrate(&models.TaskHost{})
if err != nil {
logger.Errorf("failed to migrate table:%s %v", tableName, err)
}

View File

@@ -2,20 +2,21 @@ package ctx
import (
"context"
"github.com/ccfos/nightingale/v6/storage"
"github.com/ccfos/nightingale/v6/conf"
"gorm.io/gorm"
)
type Context struct {
DB *gorm.DB
Redis storage.Redis
CenterApi conf.CenterApi
Ctx context.Context
IsCenter bool
}
func NewContext(ctx context.Context, db *gorm.DB, isCenter bool, centerApis ...conf.CenterApi) *Context {
func NewContext(ctx context.Context, db *gorm.DB, redis storage.Redis, isCenter bool, centerApis ...conf.CenterApi) *Context {
var api conf.CenterApi
if len(centerApis) > 0 {
api = centerApis[0]
@@ -24,6 +25,7 @@ func NewContext(ctx context.Context, db *gorm.DB, isCenter bool, centerApis ...c
return &Context{
Ctx: ctx,
DB: db,
Redis: redis,
CenterApi: api,
IsCenter: isCenter,
}

View File

@@ -120,7 +120,7 @@ func GinEngine(mode string, cfg Config) *gin.Engine {
return r
}
func Init(cfg Config, handler http.Handler) func() {
func Init(cfg Config, ctx context.Context, handler http.Handler) func() {
addr := fmt.Sprintf("%s:%d", cfg.Host, cfg.Port)
srv := &http.Server{
Addr: addr,
@@ -146,7 +146,7 @@ func Init(cfg Config, handler http.Handler) func() {
}()
return func() {
ctx, cancel := context.WithTimeout(context.Background(), time.Second*time.Duration(cfg.ShutdownTimeout))
ctx, cancel := context.WithTimeout(ctx, time.Second*time.Duration(cfg.ShutdownTimeout))
defer cancel()
srv.SetKeepAlivesEnabled(false)

View File

@@ -32,8 +32,6 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
return nil, err
}
ctx := ctx.NewContext(context.Background(), nil, false, config.CenterApi)
var redis storage.Redis
if config.Redis.Address != "" {
redis, err = storage.NewRedis(config.Redis)
@@ -41,6 +39,9 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
return nil, err
}
}
ctx := ctx.NewContext(context.Background(), nil, redis, false, config.CenterApi)
idents := idents.New(ctx, redis)
metas := metas.New(redis)
@@ -55,7 +56,7 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
rt := router.New(config.HTTP, config.Pushgw, config.Alert, targetCache, busiGroupCache, idents, metas, writers, ctx)
rt.Config(r)
httpClean := httpx.Init(config.HTTP, r)
httpClean := httpx.Init(config.HTTP, context.Background(), r)
return func() {
logxClean()

View File

@@ -6,6 +6,7 @@ import (
"fmt"
"os"
"strings"
"time"
"github.com/ccfos/nightingale/v6/pkg/tlsx"
"github.com/redis/go-redis/v9"
@@ -135,3 +136,15 @@ func MSet(ctx context.Context, r Redis, m map[string]interface{}) error {
_, err := pipe.Exec(ctx)
return err
}
const DEFAULT = time.Hour
const IDINITIAL = 1 << 32
func IdInit(redis Redis) error {
return redis.Set(context.Background(), "id", IDINITIAL, 0).Err()
}
func IdGet(redis Redis) (int64, error) {
return redis.Incr(context.Background(), "id").Result()
}