Compare commits

...

2 Commits

Author SHA1 Message Date
Ulric Qin
93ff325f72 fix execution of notify script 2025-07-06 08:38:45 +08:00
ulricqin
84ee14d21e add img (#2767) 2025-07-03 19:48:39 +08:00
6 changed files with 97 additions and 16 deletions

View File

@@ -31,7 +31,9 @@
Nightingale is an open-source monitoring project that focuses on alerting. Similar to Grafana, Nightingale also connects with various existing data sources. However, while Grafana emphasizes visualization, Nightingale places greater emphasis on the alerting engine, as well as the processing and distribution of alarms.
The Nightingale project was initially developed and open-sourced by DiDi.inc. On May 11, 2022, it was donated to the Open Source Development Committee of the China Computer Federation (CCF ODC).
> The Nightingale project was initially developed and open-sourced by DiDi.inc. On May 11, 2022, it was donated to the Open Source Development Committee of the China Computer Federation (CCF ODC).
![](https://n9e.github.io/img/global/arch-bg.png)
## 💡 How Nightingale Works
@@ -104,7 +106,7 @@ Then Nightingale is not suitable. It is recommended that you choose on-call prod
## 🤝 Community Co-Building
- ❇️ Please read the [Nightingale Open Source Project and Community Governance Draft](./doc/community-governance.md). We sincerely welcome every user, developer, company, and organization to use Nightingale, actively report bugs, submit feature requests, share best practices, and help build a professional and active open-source community.
- ❤️ Nightingale Contributors
- ❤️ Nightingale Contributors
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
<img src="https://contrib.rocks/image?repo=ccfos/nightingale" />
</a>

View File

@@ -31,7 +31,9 @@
夜莺监控Nightingale是一款侧重告警的监控类开源项目。类似 Grafana 的数据源集成方式,夜莺也是对接多种既有的数据源,不过 Grafana 侧重在可视化,夜莺是侧重在告警引擎、告警事件的处理和分发。
夜莺监控项目,最初由滴滴开发和开源,并于 2022 年 5 月 11 日捐赠予中国计算机学会开源发展委员会CCF ODC为 CCF ODC 成立后接受捐赠的第一个开源项目。
> 夜莺监控项目,最初由滴滴开发和开源,并于 2022 年 5 月 11 日捐赠予中国计算机学会开源发展委员会CCF ODC为 CCF ODC 成立后接受捐赠的第一个开源项目。
![](https://n9e.github.io/img/global/arch-bg.png)
## 夜莺的工作逻辑

View File

@@ -21,10 +21,10 @@ import (
"sort"
"strconv"
"strings"
"syscall"
"time"
"unicode/utf8"
"github.com/ccfos/nightingale/v6/pkg/cmdx"
"github.com/ccfos/nightingale/v6/pkg/ctx"
"github.com/ccfos/nightingale/v6/pkg/poster"
"github.com/ccfos/nightingale/v6/pkg/tplx"
@@ -33,7 +33,6 @@ import (
"github.com/pkg/errors"
"github.com/toolkits/pkg/file"
"github.com/toolkits/pkg/logger"
"github.com/toolkits/pkg/sys"
"gopkg.in/gomail.v2"
)
@@ -196,10 +195,8 @@ func (ncc *NotifyChannelConfig) SendScript(events []*AlertCurEvent, tpl map[stri
cmd.Stdout = &buf
cmd.Stderr = &buf
err := startCmd(cmd)
if err != nil {
return "", "", fmt.Errorf("failed to start script: %v", err)
}
err, isTimeout := cmdx.RunTimeout(cmd, time.Duration(config.Timeout)*time.Millisecond)
logger.Infof("event_script_notify_result: exec %s output: %s isTimeout: %v err: %v", fpath, buf.String(), isTimeout, err)
res := buf.String()
@@ -218,8 +215,6 @@ func (ncc *NotifyChannelConfig) SendScript(events []*AlertCurEvent, tpl map[stri
res = res[:validLen] + "..."
}
err, isTimeout := sys.WrapTimeout(cmd, time.Duration(config.Timeout)*time.Second)
logger.Infof("event_script_notify_result: exec %s output: %s isTimeout: %v err: %v", fpath, buf.String(), isTimeout, err)
if isTimeout {
if err == nil {
return cmd.String(), res, errors.New("timeout and killed process")
@@ -257,11 +252,6 @@ func getStdinBytes(events []*AlertCurEvent, tpl map[string]interface{}, params m
return jsonBytes
}
func startCmd(c *exec.Cmd) error {
c.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
return c.Start()
}
func NotifyChannelStatistics(ctx *ctx.Context) (*Statistics, error) {
if !ctx.IsCenter {
s, err := poster.GetByUrls[*Statistics](ctx, "/v1/n9e/statistic?name=notify_channel")

View File

@@ -0,0 +1,37 @@
//go:build !windows
// +build !windows
package cmdx
import (
"os/exec"
"syscall"
"time"
)
func CmdWait(cmd *exec.Cmd, timeout time.Duration) (error, bool) {
var err error
done := make(chan error)
go func() {
done <- cmd.Wait()
}()
select {
case <-time.After(timeout):
go func() {
<-done // allow goroutine to exit
}()
// IMPORTANT: cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} is necessary before cmd.Start()
err = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
return err, true
case err = <-done:
return err, false
}
}
func CmdStart(cmd *exec.Cmd) error {
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
return cmd.Start()
}

35
pkg/cmdx/cmd_windows.go Normal file
View File

@@ -0,0 +1,35 @@
//go:build windows
// +build windows
package cmdx
import (
"os/exec"
"syscall"
"time"
)
func CmdWait(cmd *exec.Cmd, timeout time.Duration) (error, bool) {
var err error
done := make(chan error)
go func() {
done <- cmd.Wait()
}()
select {
case <-time.After(timeout):
go func() {
<-done // allow goroutine to exit
}()
err = cmd.Process.Signal(syscall.SIGKILL)
return err, true
case err = <-done:
return err, false
}
}
func CmdStart(cmd *exec.Cmd) error {
return cmd.Start()
}

15
pkg/cmdx/cmdx.go Normal file
View File

@@ -0,0 +1,15 @@
package cmdx
import (
"os/exec"
"time"
)
func RunTimeout(cmd *exec.Cmd, timeout time.Duration) (error, bool) {
err := CmdStart(cmd)
if err != nil {
return err, false
}
return CmdWait(cmd, timeout)
}