Compare commits
22 Commits
v8.2.0
...
integratio
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6743b125a3 | ||
|
|
928388f637 | ||
|
|
664c98e6a6 | ||
|
|
cd1be178e7 | ||
|
|
a7034917b5 | ||
|
|
b872329508 | ||
|
|
b5ede2f2d8 | ||
|
|
209e82787f | ||
|
|
dd94719952 | ||
|
|
0991967c61 | ||
|
|
ee649d0b4b | ||
|
|
ca1094b263 | ||
|
|
e2b6b1f9ba | ||
|
|
be1b20685e | ||
|
|
335d551bd9 | ||
|
|
5b3e2e9764 | ||
|
|
cc67908ac3 | ||
|
|
8b297d7df1 | ||
|
|
0d1114b988 | ||
|
|
b800fcd47f | ||
|
|
3f865e2d19 | ||
|
|
e3ea65d098 |
@@ -345,7 +345,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) []common.Anom
|
||||
missTargets = append(missTargets, ident)
|
||||
}
|
||||
}
|
||||
|
||||
logger.Debugf("rule_eval:%s missTargets:%v", arw.Key(), missTargets)
|
||||
targets := arw.processor.TargetCache.Gets(missTargets)
|
||||
for _, target := range targets {
|
||||
m := make(map[string]string)
|
||||
@@ -383,7 +383,6 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) []common.Anom
|
||||
// means this target is not collect by categraf, do not check offset
|
||||
continue
|
||||
}
|
||||
|
||||
if target, exists := targetMap[ident]; exists {
|
||||
if now-target.UpdateAt > 120 {
|
||||
// means this target is not a active host, do not check offset
|
||||
@@ -397,6 +396,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) []common.Anom
|
||||
}
|
||||
}
|
||||
|
||||
logger.Debugf("rule_eval:%s offsetIdents:%v", arw.Key(), offsetIdents)
|
||||
for host, offset := range offsetIdents {
|
||||
m := make(map[string]string)
|
||||
target, exists := arw.processor.TargetCache.Get(host)
|
||||
@@ -431,7 +431,7 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) []common.Anom
|
||||
missTargets = append(missTargets, ident)
|
||||
}
|
||||
}
|
||||
|
||||
logger.Debugf("rule_eval:%s missTargets:%v", arw.Key(), missTargets)
|
||||
pct := float64(len(missTargets)) / float64(len(idents)) * 100
|
||||
if pct >= float64(trigger.Percent) {
|
||||
lst = append(lst, common.NewAnomalyPoint(trigger.Type, nil, now, pct, trigger.Severity))
|
||||
|
||||
@@ -76,7 +76,6 @@ ops:
|
||||
- "/dashboards/add"
|
||||
- "/dashboards/put"
|
||||
- "/dashboards/del"
|
||||
- "/dashboards-built-in"
|
||||
|
||||
- name: alert
|
||||
cname: 告警规则
|
||||
@@ -85,7 +84,7 @@ ops:
|
||||
- "/alert-rules/add"
|
||||
- "/alert-rules/put"
|
||||
- "/alert-rules/del"
|
||||
- "/alert-rules-built-in"
|
||||
|
||||
- name: alert-mutes
|
||||
cname: 告警静默管理
|
||||
ops:
|
||||
@@ -180,6 +179,14 @@ ops:
|
||||
- "/builtin-metrics/put"
|
||||
- "/builtin-metrics/del"
|
||||
|
||||
- name: built-in-components
|
||||
cname: 模版中心
|
||||
ops:
|
||||
- "/built-in-components"
|
||||
- "/built-in-components/add"
|
||||
- "/built-in-components/put"
|
||||
- "/built-in-components/del"
|
||||
|
||||
- name: system
|
||||
cname: 系统信息
|
||||
ops:
|
||||
|
||||
@@ -3,6 +3,7 @@ package integration
|
||||
import (
|
||||
"encoding/json"
|
||||
"path"
|
||||
"strings"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
@@ -31,145 +32,148 @@ func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
|
||||
|
||||
// get logo name
|
||||
// /api/n9e/integrations/icon/AliYun/aliyun.png
|
||||
// files, err := file.FilesUnder(componentDir + "/icon")
|
||||
// if err == nil && len(files) > 0 {
|
||||
// component.Logo = "/api/n9e/integrations/icon/" + component.Ident + "/" + files[0]
|
||||
// } else if err != nil {
|
||||
// logger.Warningf("read builtin component icon dir fail %s %v", component.Ident, err)
|
||||
// }
|
||||
files, err := file.FilesUnder(componentDir + "/icon")
|
||||
if err == nil && len(files) > 0 {
|
||||
component.Logo = "/api/n9e/integrations/icon/" + component.Ident + "/" + files[0]
|
||||
} else if err != nil {
|
||||
logger.Warningf("read builtin component icon dir fail %s %v", component.Ident, err)
|
||||
}
|
||||
|
||||
// // get description
|
||||
// files, err = file.FilesUnder(componentDir + "/markdown")
|
||||
// if err == nil && len(files) > 0 {
|
||||
// var readmeFile string
|
||||
// for _, file := range files {
|
||||
// if strings.HasSuffix(strings.ToLower(file), "md") {
|
||||
// readmeFile = file
|
||||
// }
|
||||
// }
|
||||
// if readmeFile != "" {
|
||||
// component.Readme, _ = file.ReadString(readmeFile)
|
||||
// }
|
||||
// } else if err != nil {
|
||||
// logger.Warningf("read builtin component markdown dir fail %s %v", component.Ident, err)
|
||||
// }
|
||||
// get description
|
||||
files, err = file.FilesUnder(componentDir + "/markdown")
|
||||
if err == nil && len(files) > 0 {
|
||||
var readmeFile string
|
||||
for _, file := range files {
|
||||
if strings.HasSuffix(strings.ToLower(file), "md") {
|
||||
readmeFile = componentDir + "/markdown/" + file
|
||||
break
|
||||
}
|
||||
}
|
||||
if readmeFile != "" {
|
||||
component.Readme, _ = file.ReadString(readmeFile)
|
||||
}
|
||||
} else if err != nil {
|
||||
logger.Warningf("read builtin component markdown dir fail %s %v", component.Ident, err)
|
||||
}
|
||||
|
||||
// exists, _ := models.BuiltinComponentExists(ctx, &component)
|
||||
// if !exists {
|
||||
// err = component.Add(ctx, "system")
|
||||
// if err != nil {
|
||||
// logger.Warning("add builtin component fail ", component, err)
|
||||
// continue
|
||||
// }
|
||||
// }
|
||||
exists, _ := models.BuiltinComponentExists(ctx, &component)
|
||||
if !exists {
|
||||
err = component.Add(ctx, "system")
|
||||
if err != nil {
|
||||
logger.Warning("add builtin component fail ", component, err)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// // alerts
|
||||
// files, err = file.FilesUnder(componentDir + "/alerts")
|
||||
// if err == nil && len(files) > 0 {
|
||||
// for _, f := range files {
|
||||
// fp := componentDir + "/alerts/" + f
|
||||
// bs, err := file.ReadBytes(fp)
|
||||
// if err != nil {
|
||||
// logger.Warning("read builtin component alerts file fail ", f, err)
|
||||
// continue
|
||||
// }
|
||||
// alerts
|
||||
files, err = file.FilesUnder(componentDir + "/alerts")
|
||||
if err == nil && len(files) > 0 {
|
||||
for _, f := range files {
|
||||
fp := componentDir + "/alerts/" + f
|
||||
bs, err := file.ReadBytes(fp)
|
||||
if err != nil {
|
||||
logger.Warning("read builtin component alerts file fail ", f, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// alerts := []models.AlertRule{}
|
||||
// err = json.Unmarshal(bs, &alerts)
|
||||
// if err != nil {
|
||||
// logger.Warning("parse builtin component alerts file fail ", f, err)
|
||||
// continue
|
||||
// }
|
||||
alerts := []models.AlertRule{}
|
||||
err = json.Unmarshal(bs, &alerts)
|
||||
if err != nil {
|
||||
logger.Warning("parse builtin component alerts file fail ", f, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// for _, alert := range alerts {
|
||||
// content, err := json.Marshal(alert)
|
||||
// if err != nil {
|
||||
// logger.Warning("marshal builtin alert fail ", alert, err)
|
||||
// continue
|
||||
// }
|
||||
for _, alert := range alerts {
|
||||
content, err := json.Marshal(alert)
|
||||
if err != nil {
|
||||
logger.Warning("marshal builtin alert fail ", alert, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// cate := strings.Replace(f, ".json", "", -1)
|
||||
// builtinAlert := models.BuiltinPayload{
|
||||
// Component: component.Ident,
|
||||
// Type: "alert",
|
||||
// Cate: cate,
|
||||
// Name: alert.Name,
|
||||
// Content: string(content),
|
||||
// }
|
||||
cate := strings.Replace(f, ".json", "", -1)
|
||||
builtinAlert := models.BuiltinPayload{
|
||||
Component: component.Ident,
|
||||
Type: "alert",
|
||||
Cate: cate,
|
||||
Name: alert.Name,
|
||||
Tags: alert.AppendTags,
|
||||
Content: string(content),
|
||||
}
|
||||
|
||||
// exists, err := models.BuiltinPayloadExists(ctx, &builtinAlert)
|
||||
// if err != nil {
|
||||
// logger.Warning("check builtin alert exists fail ", builtinAlert, err)
|
||||
// continue
|
||||
// }
|
||||
exists, err := models.BuiltinPayloadExists(ctx, &builtinAlert)
|
||||
if err != nil {
|
||||
logger.Warning("check builtin alert exists fail ", builtinAlert, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// if exists {
|
||||
// continue
|
||||
// }
|
||||
if exists {
|
||||
continue
|
||||
}
|
||||
|
||||
// err = builtinAlert.Add(ctx, "system")
|
||||
// if err != nil {
|
||||
// logger.Warningf("add builtin alert:%+v fail %v", builtinAlert, err)
|
||||
// continue
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
err = builtinAlert.Add(ctx, "system")
|
||||
if err != nil {
|
||||
logger.Warningf("add builtin alert:%+v fail %v", builtinAlert, err)
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// // dashboards
|
||||
// files, err = file.FilesUnder(componentDir + "/dashboards")
|
||||
// if err == nil && len(files) > 0 {
|
||||
// for _, f := range files {
|
||||
// fp := componentDir + "/dashboards/" + f
|
||||
// bs, err := file.ReadBytes(fp)
|
||||
// if err != nil {
|
||||
// logger.Warning("read builtin component dashboards file fail ", f, err)
|
||||
// continue
|
||||
// }
|
||||
// dashboards
|
||||
files, err = file.FilesUnder(componentDir + "/dashboards")
|
||||
if err == nil && len(files) > 0 {
|
||||
for _, f := range files {
|
||||
fp := componentDir + "/dashboards/" + f
|
||||
bs, err := file.ReadBytes(fp)
|
||||
if err != nil {
|
||||
logger.Warning("read builtin component dashboards file fail ", f, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// dashboard := BuiltinBoard{}
|
||||
// err = json.Unmarshal(bs, &dashboard)
|
||||
// if err != nil {
|
||||
// logger.Warning("parse builtin component dashboards file fail ", f, err)
|
||||
// continue
|
||||
// }
|
||||
dashboard := BuiltinBoard{}
|
||||
err = json.Unmarshal(bs, &dashboard)
|
||||
if err != nil {
|
||||
logger.Warning("parse builtin component dashboards file fail ", f, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// content, err := json.Marshal(dashboard)
|
||||
// if err != nil {
|
||||
// logger.Warning("marshal builtin dashboard fail ", dashboard, err)
|
||||
// continue
|
||||
// }
|
||||
content, err := json.Marshal(dashboard)
|
||||
if err != nil {
|
||||
logger.Warning("marshal builtin dashboard fail ", dashboard, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// builtinDashboard := models.BuiltinPayload{
|
||||
// Component: component.Ident,
|
||||
// Type: "dashboard",
|
||||
// Cate: "",
|
||||
// Name: dashboard.Name,
|
||||
// Content: string(content),
|
||||
// }
|
||||
builtinDashboard := models.BuiltinPayload{
|
||||
Component: component.Ident,
|
||||
Type: "dashboard",
|
||||
Cate: "",
|
||||
Name: dashboard.Name,
|
||||
Tags: dashboard.Tags,
|
||||
Content: string(content),
|
||||
}
|
||||
|
||||
// exists, err := models.BuiltinPayloadExists(ctx, &builtinDashboard)
|
||||
// if err != nil {
|
||||
// logger.Warning("check builtin dashboard exists fail ", builtinDashboard, err)
|
||||
// continue
|
||||
// }
|
||||
exists, err := models.BuiltinPayloadExists(ctx, &builtinDashboard)
|
||||
if err != nil {
|
||||
logger.Warning("check builtin dashboard exists fail ", builtinDashboard, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// if exists {
|
||||
// continue
|
||||
// }
|
||||
if exists {
|
||||
continue
|
||||
}
|
||||
|
||||
// err = builtinDashboard.Add(ctx, "system")
|
||||
// if err != nil {
|
||||
// logger.Warning("add builtin dashboard fail ", builtinDashboard, err)
|
||||
// continue
|
||||
// }
|
||||
// }
|
||||
// } else if err != nil {
|
||||
// logger.Warningf("read builtin component dash dir fail %s %v", component.Ident, err)
|
||||
// }
|
||||
err = builtinDashboard.Add(ctx, "system")
|
||||
if err != nil {
|
||||
logger.Warning("add builtin dashboard fail ", builtinDashboard, err)
|
||||
continue
|
||||
}
|
||||
}
|
||||
} else if err != nil {
|
||||
logger.Warningf("read builtin component dash dir fail %s %v", component.Ident, err)
|
||||
}
|
||||
|
||||
// metrics
|
||||
files, err := file.FilesUnder(componentDir + "/metrics")
|
||||
files, err = file.FilesUnder(componentDir + "/metrics")
|
||||
if err == nil && len(files) > 0 {
|
||||
for _, f := range files {
|
||||
fp := componentDir + "/metrics/" + f
|
||||
|
||||
@@ -274,13 +274,14 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.POST("/builtin-cate-favorite", rt.auth(), rt.user(), rt.builtinCateFavoriteAdd)
|
||||
pages.DELETE("/builtin-cate-favorite/:name", rt.auth(), rt.user(), rt.builtinCateFavoriteDel)
|
||||
|
||||
pages.GET("/builtin-boards", rt.builtinBoardGets)
|
||||
pages.GET("/builtin-board/:name", rt.builtinBoardGet)
|
||||
pages.GET("/dashboards/builtin/list", rt.builtinBoardGets)
|
||||
pages.GET("/builtin-boards-cates", rt.auth(), rt.user(), rt.builtinBoardCateGets)
|
||||
pages.POST("/builtin-boards-detail", rt.auth(), rt.user(), rt.builtinBoardDetailGets)
|
||||
pages.GET("/integrations/icon/:cate/:name", rt.builtinIcon)
|
||||
pages.GET("/integrations/makedown/:cate", rt.builtinMarkdown)
|
||||
|
||||
// pages.GET("/builtin-boards", rt.builtinBoardGets)
|
||||
// pages.GET("/builtin-board/:name", rt.builtinBoardGet)
|
||||
// pages.GET("/dashboards/builtin/list", rt.builtinBoardGets)
|
||||
// pages.GET("/builtin-boards-cates", rt.auth(), rt.user(), rt.builtinBoardCateGets)
|
||||
// pages.POST("/builtin-boards-detail", rt.auth(), rt.user(), rt.builtinBoardDetailGets)
|
||||
// pages.GET("/integrations/makedown/:cate", rt.builtinMarkdown)
|
||||
|
||||
pages.GET("/busi-groups/public-boards", rt.auth(), rt.user(), rt.perm("/dashboards"), rt.publicBoardGets)
|
||||
pages.GET("/busi-groups/boards", rt.auth(), rt.user(), rt.perm("/dashboards"), rt.boardGetsByGids)
|
||||
@@ -299,8 +300,8 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.GET("/share-charts", rt.chartShareGets)
|
||||
pages.POST("/share-charts", rt.auth(), rt.chartShareAdd)
|
||||
|
||||
pages.GET("/alert-rules/builtin/alerts-cates", rt.auth(), rt.user(), rt.builtinAlertCateGets)
|
||||
pages.GET("/alert-rules/builtin/list", rt.auth(), rt.user(), rt.builtinAlertRules)
|
||||
// pages.GET("/alert-rules/builtin/alerts-cates", rt.auth(), rt.user(), rt.builtinAlertCateGets)
|
||||
// pages.GET("/alert-rules/builtin/list", rt.auth(), rt.user(), rt.builtinAlertRules)
|
||||
pages.GET("/alert-rules/callbacks", rt.auth(), rt.user(), rt.alertRuleCallbacks)
|
||||
|
||||
pages.GET("/busi-groups/alert-rules", rt.auth(), rt.user(), rt.perm("/alert-rules"), rt.alertRuleGetsByGids)
|
||||
@@ -434,16 +435,17 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
// for admin api
|
||||
pages.GET("/user/busi-groups", rt.auth(), rt.admin(), rt.userBusiGroupsGets)
|
||||
|
||||
pages.POST("/builtin-components", rt.auth(), rt.user(), rt.perm("/builtin-components/add"), rt.builtinComponentsAdd)
|
||||
pages.GET("/builtin-components", rt.auth(), rt.user(), rt.perm("/builtin-components"), rt.builtinComponentsGets)
|
||||
pages.PUT("/builtin-components", rt.auth(), rt.user(), rt.perm("/builtin-components/put"), rt.builtinComponentsPut)
|
||||
pages.DELETE("/builtin-components", rt.auth(), rt.user(), rt.perm("/builtin-components/del"), rt.builtinComponentsDel)
|
||||
pages.GET("/builtin-components", rt.auth(), rt.user(), rt.builtinComponentsGets)
|
||||
pages.POST("/builtin-components", rt.auth(), rt.user(), rt.perm("/built-in-components/add"), rt.builtinComponentsAdd)
|
||||
pages.PUT("/builtin-components", rt.auth(), rt.user(), rt.perm("/built-in-components/put"), rt.builtinComponentsPut)
|
||||
pages.DELETE("/builtin-components", rt.auth(), rt.user(), rt.perm("/built-in-components/del"), rt.builtinComponentsDel)
|
||||
|
||||
pages.POST("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/builtin-payloads/add"), rt.builtinPayloadsAdd)
|
||||
pages.GET("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/builtin-payloads"), rt.builtinPayloadsGets)
|
||||
pages.GET("/builtin-payload/:id", rt.auth(), rt.user(), rt.perm("/builtin-payloads"), rt.builtinPayloadGet)
|
||||
pages.PUT("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/builtin-payloads/put"), rt.builtinPayloadsPut)
|
||||
pages.DELETE("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/builtin-payloads/del"), rt.builtinPayloadsDel)
|
||||
pages.GET("/builtin-payloads", rt.auth(), rt.user(), rt.builtinPayloadsGets)
|
||||
pages.GET("/builtin-payloads/cates", rt.auth(), rt.user(), rt.builtinPayloadcatesGet)
|
||||
pages.POST("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/built-in-components/add"), rt.builtinPayloadsAdd)
|
||||
pages.GET("/builtin-payload/:id", rt.auth(), rt.user(), rt.perm("/built-in-components"), rt.builtinPayloadGet)
|
||||
pages.PUT("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/built-in-components/put"), rt.builtinPayloadsPut)
|
||||
pages.DELETE("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/built-in-components/del"), rt.builtinPayloadsDel)
|
||||
}
|
||||
|
||||
r.GET("/api/n9e/versions", func(c *gin.Context) {
|
||||
|
||||
@@ -31,18 +31,11 @@ func (rt *Router) builtinComponentsAdd(c *gin.Context) {
|
||||
|
||||
func (rt *Router) builtinComponentsGets(c *gin.Context) {
|
||||
query := ginx.QueryStr(c, "query", "")
|
||||
limit := ginx.QueryInt(c, "limit", 20)
|
||||
|
||||
bc, err := models.BuiltinComponentGets(rt.Ctx, query, limit, ginx.Offset(c, limit))
|
||||
bc, err := models.BuiltinComponentGets(rt.Ctx, query)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
total, err := models.BuiltinComponentCount(rt.Ctx, query)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(gin.H{
|
||||
"list": bc,
|
||||
"total": total,
|
||||
}, nil)
|
||||
ginx.NewRender(c).Data(bc, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) builtinComponentsPut(c *gin.Context) {
|
||||
|
||||
@@ -1,13 +1,21 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
type Board struct {
|
||||
Name string `json:"name"`
|
||||
Tags string `json:"tags"`
|
||||
Configs interface{} `json:"configs"`
|
||||
}
|
||||
|
||||
func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
|
||||
var lst []models.BuiltinPayload
|
||||
ginx.BindJSON(c, &lst)
|
||||
@@ -21,9 +29,118 @@ func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
|
||||
|
||||
reterr := make(map[string]string)
|
||||
for i := 0; i < count; i++ {
|
||||
if err := lst[i].Add(rt.Ctx, username); err != nil {
|
||||
reterr[lst[i].Name] = err.Error()
|
||||
if lst[i].Type == "alert" {
|
||||
if strings.HasPrefix(strings.TrimSpace(lst[i].Content), "[") {
|
||||
// 处理多个告警规则模板的情况
|
||||
alertRules := []models.AlertRule{}
|
||||
if err := json.Unmarshal([]byte(lst[i].Content), &alertRules); err != nil {
|
||||
reterr[lst[i].Name] = err.Error()
|
||||
}
|
||||
|
||||
for _, rule := range alertRules {
|
||||
contentBytes, err := json.Marshal(rule)
|
||||
if err != nil {
|
||||
reterr[rule.Name] = err.Error()
|
||||
continue
|
||||
}
|
||||
|
||||
bp := models.BuiltinPayload{
|
||||
Type: lst[i].Type,
|
||||
Component: lst[i].Component,
|
||||
Cate: lst[i].Cate,
|
||||
Name: rule.Name,
|
||||
Tags: rule.AppendTags,
|
||||
Content: string(contentBytes),
|
||||
CreatedBy: username,
|
||||
UpdatedBy: username,
|
||||
}
|
||||
|
||||
if err := bp.Add(rt.Ctx, username); err != nil {
|
||||
reterr[bp.Name] = err.Error()
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
alertRule := models.AlertRule{}
|
||||
if err := json.Unmarshal([]byte(lst[i].Content), &alertRule); err != nil {
|
||||
reterr[lst[i].Name] = err.Error()
|
||||
continue
|
||||
}
|
||||
|
||||
bp := models.BuiltinPayload{
|
||||
Type: lst[i].Type,
|
||||
Component: lst[i].Component,
|
||||
Cate: lst[i].Cate,
|
||||
Name: alertRule.Name,
|
||||
Tags: alertRule.AppendTags,
|
||||
Content: lst[i].Content,
|
||||
CreatedBy: username,
|
||||
UpdatedBy: username,
|
||||
}
|
||||
|
||||
if err := bp.Add(rt.Ctx, username); err != nil {
|
||||
reterr[bp.Name] = err.Error()
|
||||
}
|
||||
} else if lst[i].Type == "dashboard" {
|
||||
if strings.HasPrefix(strings.TrimSpace(lst[i].Content), "[") {
|
||||
// 处理多个告警规则模板的情况
|
||||
dashboards := []Board{}
|
||||
if err := json.Unmarshal([]byte(lst[i].Content), &dashboards); err != nil {
|
||||
reterr[lst[i].Name] = err.Error()
|
||||
}
|
||||
|
||||
for _, dashboard := range dashboards {
|
||||
contentBytes, err := json.Marshal(dashboard)
|
||||
if err != nil {
|
||||
reterr[dashboard.Name] = err.Error()
|
||||
continue
|
||||
}
|
||||
|
||||
bp := models.BuiltinPayload{
|
||||
Type: lst[i].Type,
|
||||
Component: lst[i].Component,
|
||||
Cate: lst[i].Cate,
|
||||
Name: dashboard.Name,
|
||||
Tags: dashboard.Tags,
|
||||
Content: string(contentBytes),
|
||||
CreatedBy: username,
|
||||
UpdatedBy: username,
|
||||
}
|
||||
|
||||
if err := bp.Add(rt.Ctx, username); err != nil {
|
||||
reterr[bp.Name] = err.Error()
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
dashboard := Board{}
|
||||
if err := json.Unmarshal([]byte(lst[i].Content), &dashboard); err != nil {
|
||||
reterr[lst[i].Name] = err.Error()
|
||||
continue
|
||||
}
|
||||
|
||||
bp := models.BuiltinPayload{
|
||||
Type: lst[i].Type,
|
||||
Component: lst[i].Component,
|
||||
Cate: lst[i].Cate,
|
||||
Name: dashboard.Name,
|
||||
Tags: dashboard.Tags,
|
||||
Content: lst[i].Content,
|
||||
CreatedBy: username,
|
||||
UpdatedBy: username,
|
||||
}
|
||||
|
||||
if err := bp.Add(rt.Ctx, username); err != nil {
|
||||
reterr[bp.Name] = err.Error()
|
||||
}
|
||||
} else {
|
||||
if err := lst[i].Add(rt.Ctx, username); err != nil {
|
||||
reterr[lst[i].Name] = err.Error()
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(reterr, nil)
|
||||
@@ -33,20 +150,18 @@ func (rt *Router) builtinPayloadsGets(c *gin.Context) {
|
||||
typ := ginx.QueryStr(c, "type", "")
|
||||
component := ginx.QueryStr(c, "component", "")
|
||||
cate := ginx.QueryStr(c, "cate", "")
|
||||
name := ginx.QueryStr(c, "name", "")
|
||||
limit := ginx.QueryInt(c, "limit", 20)
|
||||
query := ginx.QueryStr(c, "query", "")
|
||||
|
||||
lst, err := models.BuiltinPayloadGets(rt.Ctx, typ, component, cate, name, limit, ginx.Offset(c, limit))
|
||||
ginx.Dangerous(err)
|
||||
m := make(map[string][]*models.BuiltinPayload)
|
||||
for _, v := range lst {
|
||||
if _, ok := m[v.Cate]; !ok {
|
||||
m[v.Cate] = make([]*models.BuiltinPayload, 0)
|
||||
}
|
||||
m[v.Cate] = append(m[v.Cate], v)
|
||||
}
|
||||
lst, err := models.BuiltinPayloadGets(rt.Ctx, typ, component, cate, query)
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(m, nil)
|
||||
func (rt *Router) builtinPayloadcatesGet(c *gin.Context) {
|
||||
typ := ginx.QueryStr(c, "type", "")
|
||||
component := ginx.QueryStr(c, "component", "")
|
||||
|
||||
cates, err := models.BuiltinPayloadCates(rt.Ctx, typ, component)
|
||||
ginx.NewRender(c).Data(cates, err)
|
||||
}
|
||||
|
||||
func (rt *Router) builtinPayloadGet(c *gin.Context) {
|
||||
@@ -75,6 +190,24 @@ func (rt *Router) builtinPayloadsPut(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
if req.Type == "alert" {
|
||||
alertRule := models.AlertRule{}
|
||||
if err := json.Unmarshal([]byte(req.Content), &alertRule); err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, err.Error())
|
||||
}
|
||||
|
||||
req.Name = alertRule.Name
|
||||
req.Tags = alertRule.AppendTags
|
||||
} else if req.Type == "dashboard" {
|
||||
dashboard := Board{}
|
||||
if err := json.Unmarshal([]byte(req.Content), &dashboard); err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, err.Error())
|
||||
}
|
||||
|
||||
req.Name = dashboard.Name
|
||||
req.Tags = dashboard.Tags
|
||||
}
|
||||
|
||||
username := Username(c)
|
||||
req.UpdatedBy = username
|
||||
|
||||
|
||||
@@ -508,6 +508,38 @@ CREATE TABLE `board_busigroup` (
|
||||
PRIMARY KEY (`busi_group_id`, `board_id`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
|
||||
|
||||
Create Table: CREATE TABLE `builtin_components` (
|
||||
`id` bigint(20) NOT NULL AUTO_INCREMENT COMMENT '''unique identifier''',
|
||||
`ident` varchar(191) NOT NULL COMMENT '''identifier of component''',
|
||||
`logo` varchar(191) NOT NULL COMMENT '''logo of component''',
|
||||
`readme` text NOT NULL COMMENT '''readme of component''',
|
||||
`created_at` bigint(20) NOT NULL DEFAULT 0 COMMENT '''create time''',
|
||||
`created_by` varchar(191) NOT NULL DEFAULT '' COMMENT '''creator''',
|
||||
`updated_at` bigint(20) NOT NULL DEFAULT 0 COMMENT '''update time''',
|
||||
`updated_by` varchar(191) NOT NULL DEFAULT '' COMMENT '''updater''',
|
||||
PRIMARY KEY (`id`),
|
||||
KEY `idx_ident` (`ident`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
|
||||
|
||||
Create Table: CREATE TABLE `builtin_payloads` (
|
||||
`id` bigint(20) NOT NULL AUTO_INCREMENT COMMENT '''unique identifier''',
|
||||
`type` varchar(191) NOT NULL COMMENT '''type of payload''',
|
||||
`component` varchar(191) NOT NULL COMMENT '''component of payload''',
|
||||
`cate` varchar(191) NOT NULL COMMENT '''category of payload''',
|
||||
`name` varchar(191) NOT NULL COMMENT '''name of payload''',
|
||||
`tags` varchar(191) NOT NULL DEFAULT '' COMMENT '''tags of payload''',
|
||||
`content` longtext NOT NULL COMMENT '''content of payload''',
|
||||
`created_at` bigint(20) NOT NULL DEFAULT 0 COMMENT '''create time''',
|
||||
`created_by` varchar(191) NOT NULL DEFAULT '' COMMENT '''creator''',
|
||||
`updated_at` bigint(20) NOT NULL DEFAULT 0 COMMENT '''update time''',
|
||||
`updated_by` varchar(191) NOT NULL DEFAULT '' COMMENT '''updater''',
|
||||
PRIMARY KEY (`id`),
|
||||
KEY `idx_component` (`component`),
|
||||
KEY `idx_name` (`name`),
|
||||
KEY `idx_cate` (`cate`),
|
||||
KEY `idx_type` (`type`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
|
||||
|
||||
CREATE TABLE `task_tpl`
|
||||
(
|
||||
`id` int unsigned NOT NULL AUTO_INCREMENT,
|
||||
|
||||
@@ -38,4 +38,38 @@ CREATE TABLE `board_busigroup` (
|
||||
`busi_group_id` bigint(20) NOT NULL DEFAULT '0' COMMENT 'busi group id',
|
||||
`board_id` bigint(20) NOT NULL DEFAULT '0' COMMENT 'board id',
|
||||
PRIMARY KEY (`busi_group_id`, `board_id`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
|
||||
|
||||
/* beta.5 */
|
||||
|
||||
Create Table: CREATE TABLE `builtin_components` (
|
||||
`id` bigint(20) NOT NULL AUTO_INCREMENT COMMENT '''unique identifier''',
|
||||
`ident` varchar(191) NOT NULL COMMENT '''identifier of component''',
|
||||
`logo` varchar(191) NOT NULL COMMENT '''logo of component''',
|
||||
`readme` text NOT NULL COMMENT '''readme of component''',
|
||||
`created_at` bigint(20) NOT NULL DEFAULT 0 COMMENT '''create time''',
|
||||
`created_by` varchar(191) NOT NULL DEFAULT '' COMMENT '''creator''',
|
||||
`updated_at` bigint(20) NOT NULL DEFAULT 0 COMMENT '''update time''',
|
||||
`updated_by` varchar(191) NOT NULL DEFAULT '' COMMENT '''updater''',
|
||||
PRIMARY KEY (`id`),
|
||||
KEY `idx_ident` (`ident`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
|
||||
|
||||
Create Table: CREATE TABLE `builtin_payloads` (
|
||||
`id` bigint(20) NOT NULL AUTO_INCREMENT COMMENT '''unique identifier''',
|
||||
`type` varchar(191) NOT NULL COMMENT '''type of payload''',
|
||||
`component` varchar(191) NOT NULL COMMENT '''component of payload''',
|
||||
`cate` varchar(191) NOT NULL COMMENT '''category of payload''',
|
||||
`name` varchar(191) NOT NULL COMMENT '''name of payload''',
|
||||
`tags` varchar(191) NOT NULL DEFAULT '' COMMENT '''tags of payload''',
|
||||
`content` longtext NOT NULL COMMENT '''content of payload''',
|
||||
`created_at` bigint(20) NOT NULL DEFAULT 0 COMMENT '''create time''',
|
||||
`created_by` varchar(191) NOT NULL DEFAULT '' COMMENT '''creator''',
|
||||
`updated_at` bigint(20) NOT NULL DEFAULT 0 COMMENT '''update time''',
|
||||
`updated_by` varchar(191) NOT NULL DEFAULT '' COMMENT '''updater''',
|
||||
PRIMARY KEY (`id`),
|
||||
KEY `idx_component` (`component`),
|
||||
KEY `idx_name` (`name`),
|
||||
KEY `idx_cate` (`cate`),
|
||||
KEY `idx_type` (`type`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
|
||||
|
Before Width: | Height: | Size: 2.0 KiB |
@@ -1,30 +0,0 @@
|
||||
# 调整间隔时间
|
||||
如有诉求对此插件本身的采集间隔时间调整的话就启用,单位为秒
|
||||
interval = 15
|
||||
|
||||
# 获取被监控端设备的网卡名称
|
||||
可用以下命令获取网卡名称列表
|
||||
```
|
||||
ip addr | grep '^[0-9]' |awk -F':' '{print $2}'
|
||||
|
||||
lo
|
||||
eth0
|
||||
br-153e7f4f0c83
|
||||
br-2f302c2a8faa
|
||||
br-5ae0cdb82efc
|
||||
br-68cba8773a8c
|
||||
br-c50ca3122079
|
||||
docker0
|
||||
br-fd769e4347bd
|
||||
veth944ac75@if52
|
||||
```
|
||||
# 在数组instances中启用eth_device
|
||||
将以上获取的网卡列表,根据自己的诉求填入,如eth0
|
||||
```
|
||||
eth_device="eth0"
|
||||
```
|
||||
# 测试是否能获取到值
|
||||
```
|
||||
./categraf --test --inputs arp_packet
|
||||
|
||||
```
|
||||
|
Before Width: | Height: | Size: 1.9 KiB |
@@ -1,126 +0,0 @@
|
||||
# kernel_vmstat
|
||||
|
||||
该监控插件采集的是 `/proc/vmstat` 的指标数据,需要较高版本的 kernel,`/proc/vmstat`内容较多,配置文件中给了一个白名单的配置,大家按需启用,只有启用了才会采集。
|
||||
|
||||
```ini
|
||||
[white_list]
|
||||
oom_kill = 1
|
||||
nr_free_pages = 0
|
||||
nr_alloc_batch = 0
|
||||
nr_inactive_anon = 0
|
||||
nr_active_anon = 0
|
||||
nr_inactive_file = 0
|
||||
nr_active_file = 0
|
||||
nr_unevictable = 0
|
||||
nr_mlock = 0
|
||||
nr_anon_pages = 0
|
||||
nr_mapped = 0
|
||||
nr_file_pages = 0
|
||||
nr_dirty = 0
|
||||
nr_writeback = 0
|
||||
nr_slab_reclaimable = 0
|
||||
nr_slab_unreclaimable = 0
|
||||
nr_page_table_pages = 0
|
||||
nr_kernel_stack = 0
|
||||
nr_unstable = 0
|
||||
nr_bounce = 0
|
||||
nr_vmscan_write = 0
|
||||
nr_vmscan_immediate_reclaim = 0
|
||||
nr_writeback_temp = 0
|
||||
nr_isolated_anon = 0
|
||||
nr_isolated_file = 0
|
||||
nr_shmem = 0
|
||||
nr_dirtied = 0
|
||||
nr_written = 0
|
||||
numa_hit = 0
|
||||
numa_miss = 0
|
||||
numa_foreign = 0
|
||||
numa_interleave = 0
|
||||
numa_local = 0
|
||||
numa_other = 0
|
||||
workingset_refault = 0
|
||||
workingset_activate = 0
|
||||
workingset_nodereclaim = 0
|
||||
nr_anon_transparent_hugepages = 0
|
||||
nr_free_cma = 0
|
||||
nr_dirty_threshold = 0
|
||||
nr_dirty_background_threshold = 0
|
||||
pgpgin = 0
|
||||
pgpgout = 0
|
||||
pswpin = 0
|
||||
pswpout = 0
|
||||
pgalloc_dma = 0
|
||||
pgalloc_dma32 = 0
|
||||
pgalloc_normal = 0
|
||||
pgalloc_movable = 0
|
||||
pgfree = 0
|
||||
pgactivate = 0
|
||||
pgdeactivate = 0
|
||||
pgfault = 0
|
||||
pgmajfault = 0
|
||||
pglazyfreed = 0
|
||||
pgrefill_dma = 0
|
||||
pgrefill_dma32 = 0
|
||||
pgrefill_normal = 0
|
||||
pgrefill_movable = 0
|
||||
pgsteal_kswapd_dma = 0
|
||||
pgsteal_kswapd_dma32 = 0
|
||||
pgsteal_kswapd_normal = 0
|
||||
pgsteal_kswapd_movable = 0
|
||||
pgsteal_direct_dma = 0
|
||||
pgsteal_direct_dma32 = 0
|
||||
pgsteal_direct_normal = 0
|
||||
pgsteal_direct_movable = 0
|
||||
pgscan_kswapd_dma = 0
|
||||
pgscan_kswapd_dma32 = 0
|
||||
pgscan_kswapd_normal = 0
|
||||
pgscan_kswapd_movable = 0
|
||||
pgscan_direct_dma = 0
|
||||
pgscan_direct_dma32 = 0
|
||||
pgscan_direct_normal = 0
|
||||
pgscan_direct_movable = 0
|
||||
pgscan_direct_throttle = 0
|
||||
zone_reclaim_failed = 0
|
||||
pginodesteal = 0
|
||||
slabs_scanned = 0
|
||||
kswapd_inodesteal = 0
|
||||
kswapd_low_wmark_hit_quickly = 0
|
||||
kswapd_high_wmark_hit_quickly = 0
|
||||
pageoutrun = 0
|
||||
allocstall = 0
|
||||
pgrotated = 0
|
||||
drop_pagecache = 0
|
||||
drop_slab = 0
|
||||
numa_pte_updates = 0
|
||||
numa_huge_pte_updates = 0
|
||||
numa_hint_faults = 0
|
||||
numa_hint_faults_local = 0
|
||||
numa_pages_migrated = 0
|
||||
pgmigrate_success = 0
|
||||
pgmigrate_fail = 0
|
||||
compact_migrate_scanned = 0
|
||||
compact_free_scanned = 0
|
||||
compact_isolated = 0
|
||||
compact_stall = 0
|
||||
compact_fail = 0
|
||||
compact_success = 0
|
||||
htlb_buddy_alloc_success = 0
|
||||
htlb_buddy_alloc_fail = 0
|
||||
unevictable_pgs_culled = 0
|
||||
unevictable_pgs_scanned = 0
|
||||
unevictable_pgs_rescued = 0
|
||||
unevictable_pgs_mlocked = 0
|
||||
unevictable_pgs_munlocked = 0
|
||||
unevictable_pgs_cleared = 0
|
||||
unevictable_pgs_stranded = 0
|
||||
thp_fault_alloc = 0
|
||||
thp_fault_fallback = 0
|
||||
thp_collapse_alloc = 0
|
||||
thp_collapse_alloc_failed = 0
|
||||
thp_split = 0
|
||||
thp_zero_page_alloc = 0
|
||||
thp_zero_page_alloc_failed = 0
|
||||
balloon_inflate = 0
|
||||
balloon_deflate = 0
|
||||
balloon_migrate = 0
|
||||
```
|
||||
@@ -9,3 +9,193 @@ categraf 部署之后,就会自动采集 CPU、内存、磁盘、IO、网路
|
||||
## 内置告警规则
|
||||
|
||||
夜莺内置了告警规则,文件名是 `_categraf` 的表示是使用 categraf 作为采集器。文件名是 `_exporter` 的表示是使用 node-exporter 作为采集器。
|
||||
|
||||
下面是一个可自己配置开启的插件
|
||||
## arp packet
|
||||
### 调整间隔时间
|
||||
如有诉求对此插件本身的采集间隔时间调整的话就启用,单位为秒
|
||||
interval = 15
|
||||
|
||||
### 获取被监控端设备的网卡名称
|
||||
可用以下命令获取网卡名称列表
|
||||
```
|
||||
ip addr | grep '^[0-9]' |awk -F':' '{print $2}'
|
||||
|
||||
lo
|
||||
eth0
|
||||
br-153e7f4f0c83
|
||||
br-2f302c2a8faa
|
||||
br-5ae0cdb82efc
|
||||
br-68cba8773a8c
|
||||
br-c50ca3122079
|
||||
docker0
|
||||
br-fd769e4347bd
|
||||
veth944ac75@if52
|
||||
```
|
||||
### 在数组instances中启用eth_device
|
||||
将以上获取的网卡列表,根据自己的诉求填入,如eth0
|
||||
```
|
||||
eth_device="eth0"
|
||||
```
|
||||
### 测试是否能获取到值
|
||||
```
|
||||
./categraf --test --inputs arp_packet
|
||||
|
||||
```
|
||||
|
||||
## netstat
|
||||
该插件采集网络连接情况,比如有多少 time_wait 连接,多少 established 连接
|
||||
|
||||
## kernel_vmstat
|
||||
该监控插件采集的是 `/proc/vmstat` 的指标数据,需要较高版本的 kernel,`/proc/vmstat`内容较多,配置文件中给了一个白名单的配置,大家按需启用,只有启用了才会采集。
|
||||
|
||||
```ini
|
||||
[white_list]
|
||||
oom_kill = 1
|
||||
nr_free_pages = 0
|
||||
nr_alloc_batch = 0
|
||||
nr_inactive_anon = 0
|
||||
nr_active_anon = 0
|
||||
nr_inactive_file = 0
|
||||
nr_active_file = 0
|
||||
nr_unevictable = 0
|
||||
nr_mlock = 0
|
||||
nr_anon_pages = 0
|
||||
nr_mapped = 0
|
||||
nr_file_pages = 0
|
||||
nr_dirty = 0
|
||||
nr_writeback = 0
|
||||
nr_slab_reclaimable = 0
|
||||
nr_slab_unreclaimable = 0
|
||||
nr_page_table_pages = 0
|
||||
nr_kernel_stack = 0
|
||||
nr_unstable = 0
|
||||
nr_bounce = 0
|
||||
nr_vmscan_write = 0
|
||||
nr_vmscan_immediate_reclaim = 0
|
||||
nr_writeback_temp = 0
|
||||
nr_isolated_anon = 0
|
||||
nr_isolated_file = 0
|
||||
nr_shmem = 0
|
||||
nr_dirtied = 0
|
||||
nr_written = 0
|
||||
numa_hit = 0
|
||||
numa_miss = 0
|
||||
numa_foreign = 0
|
||||
numa_interleave = 0
|
||||
numa_local = 0
|
||||
numa_other = 0
|
||||
workingset_refault = 0
|
||||
workingset_activate = 0
|
||||
workingset_nodereclaim = 0
|
||||
nr_anon_transparent_hugepages = 0
|
||||
nr_free_cma = 0
|
||||
nr_dirty_threshold = 0
|
||||
nr_dirty_background_threshold = 0
|
||||
pgpgin = 0
|
||||
pgpgout = 0
|
||||
pswpin = 0
|
||||
pswpout = 0
|
||||
pgalloc_dma = 0
|
||||
pgalloc_dma32 = 0
|
||||
pgalloc_normal = 0
|
||||
pgalloc_movable = 0
|
||||
pgfree = 0
|
||||
pgactivate = 0
|
||||
pgdeactivate = 0
|
||||
pgfault = 0
|
||||
pgmajfault = 0
|
||||
pglazyfreed = 0
|
||||
pgrefill_dma = 0
|
||||
pgrefill_dma32 = 0
|
||||
pgrefill_normal = 0
|
||||
pgrefill_movable = 0
|
||||
pgsteal_kswapd_dma = 0
|
||||
pgsteal_kswapd_dma32 = 0
|
||||
pgsteal_kswapd_normal = 0
|
||||
pgsteal_kswapd_movable = 0
|
||||
pgsteal_direct_dma = 0
|
||||
pgsteal_direct_dma32 = 0
|
||||
pgsteal_direct_normal = 0
|
||||
pgsteal_direct_movable = 0
|
||||
pgscan_kswapd_dma = 0
|
||||
pgscan_kswapd_dma32 = 0
|
||||
pgscan_kswapd_normal = 0
|
||||
pgscan_kswapd_movable = 0
|
||||
pgscan_direct_dma = 0
|
||||
pgscan_direct_dma32 = 0
|
||||
pgscan_direct_normal = 0
|
||||
pgscan_direct_movable = 0
|
||||
pgscan_direct_throttle = 0
|
||||
zone_reclaim_failed = 0
|
||||
pginodesteal = 0
|
||||
slabs_scanned = 0
|
||||
kswapd_inodesteal = 0
|
||||
kswapd_low_wmark_hit_quickly = 0
|
||||
kswapd_high_wmark_hit_quickly = 0
|
||||
pageoutrun = 0
|
||||
allocstall = 0
|
||||
pgrotated = 0
|
||||
drop_pagecache = 0
|
||||
drop_slab = 0
|
||||
numa_pte_updates = 0
|
||||
numa_huge_pte_updates = 0
|
||||
numa_hint_faults = 0
|
||||
numa_hint_faults_local = 0
|
||||
numa_pages_migrated = 0
|
||||
pgmigrate_success = 0
|
||||
pgmigrate_fail = 0
|
||||
compact_migrate_scanned = 0
|
||||
compact_free_scanned = 0
|
||||
compact_isolated = 0
|
||||
compact_stall = 0
|
||||
compact_fail = 0
|
||||
compact_success = 0
|
||||
htlb_buddy_alloc_success = 0
|
||||
htlb_buddy_alloc_fail = 0
|
||||
unevictable_pgs_culled = 0
|
||||
unevictable_pgs_scanned = 0
|
||||
unevictable_pgs_rescued = 0
|
||||
unevictable_pgs_mlocked = 0
|
||||
unevictable_pgs_munlocked = 0
|
||||
unevictable_pgs_cleared = 0
|
||||
unevictable_pgs_stranded = 0
|
||||
thp_fault_alloc = 0
|
||||
thp_fault_fallback = 0
|
||||
thp_collapse_alloc = 0
|
||||
thp_collapse_alloc_failed = 0
|
||||
thp_split = 0
|
||||
thp_zero_page_alloc = 0
|
||||
thp_zero_page_alloc_failed = 0
|
||||
balloon_inflate = 0
|
||||
balloon_deflate = 0
|
||||
balloon_migrate = 0
|
||||
```
|
||||
|
||||
|
||||
# processes
|
||||
|
||||
如果进程总量太多,比如超过了 CPU core 的 3 倍,就需要关注了。
|
||||
|
||||
## 配置说明
|
||||
|
||||
configuration file: `conf/input.processes/processes.toml`
|
||||
|
||||
默认配置如下(一般维持默认不用动):
|
||||
|
||||
```toml
|
||||
# # collect interval
|
||||
# interval = 15
|
||||
|
||||
# # force use ps command to gather
|
||||
# force_ps = false
|
||||
|
||||
# # force use /proc to gather
|
||||
# force_proc = false
|
||||
```
|
||||
|
||||
有两种采集方式,使用 ps 命令,或者直接读取 `/proc` 目录,默认是后者。如果想强制使用 ps 命令才采集,开启 force_ps 即可:
|
||||
|
||||
```toml
|
||||
force_ps = true
|
||||
```
|
||||
|
Before Width: | Height: | Size: 3.6 KiB After Width: | Height: | Size: 3.6 KiB |
|
Before Width: | Height: | Size: 2.8 KiB |
@@ -1,7 +0,0 @@
|
||||
# netstat
|
||||
|
||||
该插件采集网络连接情况,比如有多少 time_wait 连接,多少 established 连接
|
||||
|
||||
# 监控大盘
|
||||
|
||||
该插件没有单独的监控大盘,OS 的监控大盘统一放到 system 下面了
|
||||
@@ -102,6 +102,227 @@ urls = [
|
||||
labels = {job="nginx-vts"}
|
||||
```
|
||||
|
||||
## 仪表盘
|
||||
# nginx_upstream_check 插件
|
||||
### 应用场景
|
||||
一般用于业务系统做对外或对外路由映射时使用代理服务,是运维最常见且最重要的代理工具。
|
||||
|
||||
夜莺内置了相关仪表盘,克隆到自己的业务组即可使用。
|
||||
### 部署场景
|
||||
需要在装有nginx服务的虚拟机启用此插件。
|
||||
|
||||
### 采集原理
|
||||
|
||||
- 该采集插件是读取 [nginx_upstream_check](https://github.com/yaoweibin/nginx_upstream_check_module) 的状态输出。[nginx_upstream_check](https://github.com/yaoweibin/nginx_upstream_check_module) 可以周期性检查 upstream 中的各个 server 是否存活,如果检查失败,就会标记为 `down`,如果检查成功,就标记为 `up`。
|
||||
|
||||
### 注意事项
|
||||
- 由于 TSDB 通常无法处理字符串,所以 Categraf 会做转换,将 `down` 转换为 2, `up` 转换为 1,其他状态转换为 0,使用 `nginx_upstream_check_status_code` 这个指标来表示,所以,我们可能需要这样的告警规则:
|
||||
|
||||
### 前置条件
|
||||
#### 条件1:nginx服务需要启用nginx_upstream_check_module模块
|
||||
```
|
||||
推荐源码编译方式安装模块,如不清楚要安装哪些模块,可参考:
|
||||
cd /opt/nginx-1.20.1 && ./configure \
|
||||
--prefix=/usr/share/nginx \
|
||||
--sbin-path=/usr/sbin/nginx \
|
||||
--modules-path=/usr/lib64/nginx/modules \
|
||||
--conf-path=/etc/nginx/nginx.conf \
|
||||
--error-log-path=/var/log/nginx/error.log \
|
||||
--http-log-path=/var/log/nginx/access.log \
|
||||
--http-client-body-temp-path=/var/lib/nginx/tmp/client_body \
|
||||
--http-proxy-temp-path=/var/lib/nginx/tmp/proxy \
|
||||
--http-fastcgi-temp-path=/var/lib/nginx/tmp/fastcgi \
|
||||
--http-uwsgi-temp-path=/var/lib/nginx/tmp/uwsgi \
|
||||
--http-scgi-temp-path=/var/lib/nginx/tmp/scgi \
|
||||
--pid-path=/var/run/nginx.pid \
|
||||
--lock-path=/run/lock/subsys/nginx \
|
||||
--user=nginx \
|
||||
--group=nginx \
|
||||
--with-compat \
|
||||
--with-threads \
|
||||
--with-http_addition_module \
|
||||
--with-http_auth_request_module \
|
||||
--with-http_dav_module \
|
||||
--with-http_flv_module \
|
||||
--with-http_gunzip_module \
|
||||
--with-http_gzip_static_module \
|
||||
--with-http_mp4_module \
|
||||
--with-http_random_index_module \
|
||||
--with-http_realip_module \
|
||||
--with-http_secure_link_module \
|
||||
--with-http_slice_module \
|
||||
--with-http_ssl_module \
|
||||
--with-http_stub_status_module \
|
||||
--with-http_sub_module \
|
||||
--with-http_v2_module \
|
||||
--with-mail \
|
||||
--with-mail_ssl_module \
|
||||
--with-stream \
|
||||
--with-stream_realip_module \
|
||||
--with-stream_ssl_module \
|
||||
--with-stream_ssl_preread_module \
|
||||
--with-select_module \
|
||||
--with-poll_module \
|
||||
--with-file-aio \
|
||||
--with-http_xslt_module=dynamic \
|
||||
--with-http_image_filter_module=dynamic \
|
||||
--with-http_perl_module=dynamic \
|
||||
--with-stream=dynamic \
|
||||
--with-mail=dynamic \
|
||||
--with-http_xslt_module=dynamic \
|
||||
--add-module=/etc/nginx/third-modules/nginx_upstream_check_module \
|
||||
--add-module=/etc/nginx/third-modules/ngx_devel_kit-0.3.0 \
|
||||
--add-module=/etc/nginx/third-modules/lua-nginx-module-0.10.13 \
|
||||
--add-module=/etc/nginx/third-modules/nginx-module-vts \
|
||||
--add-module=/etc/nginx/third-modules/ngx-fancyindex-0.5.2
|
||||
|
||||
# 根据cpu核数
|
||||
make -j2
|
||||
make install
|
||||
|
||||
注意:第三方模块nginx_upstream_check_module lua-nginx-module nginx-module-vts 都是相关插件所必备的依赖。
|
||||
```
|
||||
|
||||
#### 条件2:nginx启用check_status配置
|
||||
```
|
||||
[root@aliyun categraf]# cat /etc/nginx/conf.d/nginx-upstream.domains.com.conf
|
||||
server {
|
||||
listen 80;
|
||||
listen 443 ssl;
|
||||
server_name nginx-upstream.domains.com;
|
||||
include /etc/nginx/ssl_conf/domains.com.conf;
|
||||
|
||||
location / {
|
||||
check_status;
|
||||
include /etc/nginx/ip_whitelist.conf;
|
||||
}
|
||||
|
||||
access_log /var/log/nginx/nginx-upstream.domains.com.access.log main;
|
||||
error_log /var/log/nginx/nginx-upstream.domains.com.error.log warn;
|
||||
}
|
||||
```
|
||||
浏览器访问https://nginx-upstream.domains.com?format=json出现:
|
||||

|
||||
|
||||
浏览器访问https://nginx-upstream.domains.com出现:
|
||||

|
||||
|
||||
#### 条件3:在需要启用upstream监控的域名配置下进行配置
|
||||
例如:
|
||||
```
|
||||
[root@aliyun upstream_conf]# cat upstream_n9e.conf
|
||||
upstream n9e {
|
||||
server 127.0.0.1:18000 weight=10 max_fails=2 fail_timeout=5s;
|
||||
|
||||
check interval=3000 rise=2 fall=5 timeout=1000 type=tcp default_down=false port=18000;
|
||||
check_http_send "HEAD / HTTP/1.0\r\n\r\n";
|
||||
check_http_expect_alive http_2xx http_3xx;
|
||||
}
|
||||
|
||||
[root@aliyun upstream_conf]# cat upstream_n9e_server_api.conf
|
||||
upstream n9e-server-api {
|
||||
server 127.0.0.1:19000 weight=10 max_fails=2 fail_timeout=5s;
|
||||
|
||||
check interval=3000 rise=2 fall=5 timeout=1000 type=tcp default_down=false port=19000;
|
||||
check_http_send "HEAD / HTTP/1.0\r\n\r\n";
|
||||
check_http_expect_alive http_2xx http_3xx;
|
||||
}
|
||||
|
||||
[root@aliyun upstream_conf]# cat upstream_vm.conf
|
||||
upstream vm {
|
||||
server 127.0.0.1:8428 weight=10 max_fails=2 fail_timeout=5s;
|
||||
keepalive 20;
|
||||
|
||||
check interval=3000 rise=2 fall=5 timeout=1000 type=tcp default_down=false port=8428;
|
||||
check_http_send "HEAD / HTTP/1.0\r\n\r\n";
|
||||
check_http_expect_alive http_2xx http_3xx;
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
### 配置场景
|
||||
```
|
||||
本配置启用或数据定义如下功能:
|
||||
增加自定义标签,可通过自定义标签筛选数据及更加精确的告警推送。
|
||||
响应超时时间为5秒。
|
||||
urls字段填写条件2所定义好的域名。
|
||||
```
|
||||
|
||||
### 修改nginx.toml文件配置
|
||||
```
|
||||
[root@aliyun conf]# cat input.nginx_upstream_check/nginx_upstream_check.toml
|
||||
|
||||
# # collect interval
|
||||
# interval = 15
|
||||
|
||||
[[instances]]
|
||||
# 这个配置最关键,是要给出获取 status 信息的接口地址
|
||||
targets = [
|
||||
"https://nginx-upstream.domains.com/?format=json"
|
||||
]
|
||||
|
||||
# 标签这个配置请注意
|
||||
# 如果 Categraf 和 Nginx 是在一台机器上,target 可能配置的是 127.0.0.1
|
||||
# 如果 Nginx 有多台机器,每台机器都有 Categraf 来采集本机的 Nginx 的 Status 信息
|
||||
# 可能会导致时序数据标签相同,不易区分,当然,Categraf 会自带 ident 标签,该标签标识本机机器名
|
||||
# 如果大家觉得 ident 标签不够用,可以用下面 labels 配置,附加 instance、region 之类的标签
|
||||
|
||||
# # append some labels for series
|
||||
labels = { cloud="my-cloud", region="my-region",azone="az1", product="my-product" }
|
||||
|
||||
# # interval = global.interval * interval_times
|
||||
# interval_times = 1
|
||||
|
||||
### Set http_proxy (categraf uses the system wide proxy settings if it's is not set)
|
||||
# http_proxy = "http://localhost:8888"
|
||||
|
||||
### Interface to use when dialing an address
|
||||
# interface = "eth0"
|
||||
|
||||
### HTTP Request Method
|
||||
# method = "GET"
|
||||
|
||||
### Set timeout (default 5 seconds)
|
||||
# timeout = "5s"
|
||||
|
||||
### Whether to follow redirects from the server (defaults to false)
|
||||
# follow_redirects = false
|
||||
|
||||
### Optional HTTP Basic Auth Credentials
|
||||
# username = "username"
|
||||
# password = "pa$$word"
|
||||
|
||||
### Optional headers
|
||||
# headers = ["X-From", "categraf", "X-Xyz", "abc"]
|
||||
|
||||
### Optional TLS Config
|
||||
# use_tls = false
|
||||
# tls_ca = "/etc/categraf/ca.pem"
|
||||
# tls_cert = "/etc/categraf/cert.pem"
|
||||
# tls_key = "/etc/categraf/key.pem"
|
||||
### Use TLS but skip chain & host verification
|
||||
# insecure_skip_verify = false
|
||||
```
|
||||
|
||||
### 测试配置
|
||||
```
|
||||
./categraf --test --inputs nginx_upstream_check
|
||||
|
||||
```
|
||||
### 重启服务
|
||||
```
|
||||
重启categraf服务生效
|
||||
systemctl daemon-reload && systemctl restart categraf && systemctl status categraf
|
||||
|
||||
查看启动日志是否有错误
|
||||
journalctl -f -n 500 -u categraf | grep "E\!" | grep "W\!"
|
||||
```
|
||||
|
||||
### 检查数据呈现
|
||||
等待1-2分钟后数据就会在图表中展示出来,如图:
|
||||

|
||||
|
||||
|
||||
### 监控告警规则配置
|
||||
```
|
||||
一般查看后端是否异常为关键检查对象,nginx_upstream_check_status_code返回1代表正常,返回2代表异常(实际测试可从上图看出)。
|
||||
nginx_upstream_check_status_code!=1则视为异常需立即告警,级别为一级告警,执行频率为60秒,持续时长为60秒,留观时长2分钟,重复发送频率5分钟,最大发送次数0次,使用企业微信应用及电话语音通道将告警内容发送给系统运维组,此规则运用到周一到周日全天。
|
||||
```
|
||||
|
||||
|
Before Width: | Height: | Size: 1.7 KiB |
@@ -1,231 +0,0 @@
|
||||
# 应用场景
|
||||
一般用于业务系统做对外或对外路由映射时使用代理服务,是运维最常见且最重要的代理工具。
|
||||
|
||||
# 部署场景
|
||||
需要在装有nginx服务的虚拟机启用此插件。
|
||||
|
||||
# 采集原理
|
||||
|
||||
- 该采集插件是读取 [nginx_upstream_check](https://github.com/yaoweibin/nginx_upstream_check_module) 的状态输出。[nginx_upstream_check](https://github.com/yaoweibin/nginx_upstream_check_module) 可以周期性检查 upstream 中的各个 server 是否存活,如果检查失败,就会标记为 `down`,如果检查成功,就标记为 `up`。
|
||||
|
||||
# 注意事项
|
||||
- 由于 TSDB 通常无法处理字符串,所以 Categraf 会做转换,将 `down` 转换为 2, `up` 转换为 1,其他状态转换为 0,使用 `nginx_upstream_check_status_code` 这个指标来表示,所以,我们可能需要这样的告警规则:
|
||||
|
||||
# 前置条件
|
||||
## 条件1:nginx服务需要启用nginx_upstream_check_module模块
|
||||
```
|
||||
推荐源码编译方式安装模块,如不清楚要安装哪些模块,可参考:
|
||||
cd /opt/nginx-1.20.1 && ./configure \
|
||||
--prefix=/usr/share/nginx \
|
||||
--sbin-path=/usr/sbin/nginx \
|
||||
--modules-path=/usr/lib64/nginx/modules \
|
||||
--conf-path=/etc/nginx/nginx.conf \
|
||||
--error-log-path=/var/log/nginx/error.log \
|
||||
--http-log-path=/var/log/nginx/access.log \
|
||||
--http-client-body-temp-path=/var/lib/nginx/tmp/client_body \
|
||||
--http-proxy-temp-path=/var/lib/nginx/tmp/proxy \
|
||||
--http-fastcgi-temp-path=/var/lib/nginx/tmp/fastcgi \
|
||||
--http-uwsgi-temp-path=/var/lib/nginx/tmp/uwsgi \
|
||||
--http-scgi-temp-path=/var/lib/nginx/tmp/scgi \
|
||||
--pid-path=/var/run/nginx.pid \
|
||||
--lock-path=/run/lock/subsys/nginx \
|
||||
--user=nginx \
|
||||
--group=nginx \
|
||||
--with-compat \
|
||||
--with-threads \
|
||||
--with-http_addition_module \
|
||||
--with-http_auth_request_module \
|
||||
--with-http_dav_module \
|
||||
--with-http_flv_module \
|
||||
--with-http_gunzip_module \
|
||||
--with-http_gzip_static_module \
|
||||
--with-http_mp4_module \
|
||||
--with-http_random_index_module \
|
||||
--with-http_realip_module \
|
||||
--with-http_secure_link_module \
|
||||
--with-http_slice_module \
|
||||
--with-http_ssl_module \
|
||||
--with-http_stub_status_module \
|
||||
--with-http_sub_module \
|
||||
--with-http_v2_module \
|
||||
--with-mail \
|
||||
--with-mail_ssl_module \
|
||||
--with-stream \
|
||||
--with-stream_realip_module \
|
||||
--with-stream_ssl_module \
|
||||
--with-stream_ssl_preread_module \
|
||||
--with-select_module \
|
||||
--with-poll_module \
|
||||
--with-file-aio \
|
||||
--with-http_xslt_module=dynamic \
|
||||
--with-http_image_filter_module=dynamic \
|
||||
--with-http_perl_module=dynamic \
|
||||
--with-stream=dynamic \
|
||||
--with-mail=dynamic \
|
||||
--with-http_xslt_module=dynamic \
|
||||
--add-module=/etc/nginx/third-modules/nginx_upstream_check_module \
|
||||
--add-module=/etc/nginx/third-modules/ngx_devel_kit-0.3.0 \
|
||||
--add-module=/etc/nginx/third-modules/lua-nginx-module-0.10.13 \
|
||||
--add-module=/etc/nginx/third-modules/nginx-module-vts \
|
||||
--add-module=/etc/nginx/third-modules/ngx-fancyindex-0.5.2
|
||||
|
||||
# 根据cpu核数
|
||||
make -j2
|
||||
make install
|
||||
|
||||
注意:第三方模块nginx_upstream_check_module lua-nginx-module nginx-module-vts 都是相关插件所必备的依赖。
|
||||
```
|
||||
|
||||
## 条件2:nginx启用check_status配置
|
||||
```
|
||||
[root@aliyun categraf]# cat /etc/nginx/conf.d/nginx-upstream.domains.com.conf
|
||||
server {
|
||||
listen 80;
|
||||
listen 443 ssl;
|
||||
server_name nginx-upstream.domains.com;
|
||||
include /etc/nginx/ssl_conf/domains.com.conf;
|
||||
|
||||
location / {
|
||||
check_status;
|
||||
include /etc/nginx/ip_whitelist.conf;
|
||||
}
|
||||
|
||||
access_log /var/log/nginx/nginx-upstream.domains.com.access.log main;
|
||||
error_log /var/log/nginx/nginx-upstream.domains.com.error.log warn;
|
||||
}
|
||||
```
|
||||
浏览器访问https://nginx-upstream.domains.com?format=json出现:
|
||||

|
||||
|
||||
浏览器访问https://nginx-upstream.domains.com出现:
|
||||

|
||||
|
||||
## 条件3:在需要启用upstream监控的域名配置下进行配置
|
||||
例如:
|
||||
```
|
||||
[root@aliyun upstream_conf]# cat upstream_n9e.conf
|
||||
upstream n9e {
|
||||
server 127.0.0.1:18000 weight=10 max_fails=2 fail_timeout=5s;
|
||||
|
||||
check interval=3000 rise=2 fall=5 timeout=1000 type=tcp default_down=false port=18000;
|
||||
check_http_send "HEAD / HTTP/1.0\r\n\r\n";
|
||||
check_http_expect_alive http_2xx http_3xx;
|
||||
}
|
||||
|
||||
[root@aliyun upstream_conf]# cat upstream_n9e_server_api.conf
|
||||
upstream n9e-server-api {
|
||||
server 127.0.0.1:19000 weight=10 max_fails=2 fail_timeout=5s;
|
||||
|
||||
check interval=3000 rise=2 fall=5 timeout=1000 type=tcp default_down=false port=19000;
|
||||
check_http_send "HEAD / HTTP/1.0\r\n\r\n";
|
||||
check_http_expect_alive http_2xx http_3xx;
|
||||
}
|
||||
|
||||
[root@aliyun upstream_conf]# cat upstream_vm.conf
|
||||
upstream vm {
|
||||
server 127.0.0.1:8428 weight=10 max_fails=2 fail_timeout=5s;
|
||||
keepalive 20;
|
||||
|
||||
check interval=3000 rise=2 fall=5 timeout=1000 type=tcp default_down=false port=8428;
|
||||
check_http_send "HEAD / HTTP/1.0\r\n\r\n";
|
||||
check_http_expect_alive http_2xx http_3xx;
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
# 配置场景
|
||||
```
|
||||
本配置启用或数据定义如下功能:
|
||||
增加自定义标签,可通过自定义标签筛选数据及更加精确的告警推送。
|
||||
响应超时时间为5秒。
|
||||
urls字段填写条件2所定义好的域名。
|
||||
```
|
||||
|
||||
# 修改nginx.toml文件配置
|
||||
```
|
||||
[root@aliyun conf]# cat input.nginx_upstream_check/nginx_upstream_check.toml
|
||||
|
||||
# # collect interval
|
||||
# interval = 15
|
||||
|
||||
[[instances]]
|
||||
# 这个配置最关键,是要给出获取 status 信息的接口地址
|
||||
targets = [
|
||||
"https://nginx-upstream.domains.com/?format=json"
|
||||
]
|
||||
|
||||
# 标签这个配置请注意
|
||||
# 如果 Categraf 和 Nginx 是在一台机器上,target 可能配置的是 127.0.0.1
|
||||
# 如果 Nginx 有多台机器,每台机器都有 Categraf 来采集本机的 Nginx 的 Status 信息
|
||||
# 可能会导致时序数据标签相同,不易区分,当然,Categraf 会自带 ident 标签,该标签标识本机机器名
|
||||
# 如果大家觉得 ident 标签不够用,可以用下面 labels 配置,附加 instance、region 之类的标签
|
||||
|
||||
# # append some labels for series
|
||||
labels = { cloud="my-cloud", region="my-region",azone="az1", product="my-product" }
|
||||
|
||||
# # interval = global.interval * interval_times
|
||||
# interval_times = 1
|
||||
|
||||
## Set http_proxy (categraf uses the system wide proxy settings if it's is not set)
|
||||
# http_proxy = "http://localhost:8888"
|
||||
|
||||
## Interface to use when dialing an address
|
||||
# interface = "eth0"
|
||||
|
||||
## HTTP Request Method
|
||||
# method = "GET"
|
||||
|
||||
## Set timeout (default 5 seconds)
|
||||
# timeout = "5s"
|
||||
|
||||
## Whether to follow redirects from the server (defaults to false)
|
||||
# follow_redirects = false
|
||||
|
||||
## Optional HTTP Basic Auth Credentials
|
||||
# username = "username"
|
||||
# password = "pa$$word"
|
||||
|
||||
## Optional headers
|
||||
# headers = ["X-From", "categraf", "X-Xyz", "abc"]
|
||||
|
||||
## Optional TLS Config
|
||||
# use_tls = false
|
||||
# tls_ca = "/etc/categraf/ca.pem"
|
||||
# tls_cert = "/etc/categraf/cert.pem"
|
||||
# tls_key = "/etc/categraf/key.pem"
|
||||
## Use TLS but skip chain & host verification
|
||||
# insecure_skip_verify = false
|
||||
```
|
||||
|
||||
# 测试配置
|
||||
```
|
||||
./categraf --test --inputs nginx_upstream_check
|
||||
|
||||
```
|
||||
# 重启服务
|
||||
```
|
||||
重启categraf服务生效
|
||||
systemctl daemon-reload && systemctl restart categraf && systemctl status categraf
|
||||
|
||||
查看启动日志是否有错误
|
||||
journalctl -f -n 500 -u categraf | grep "E\!" | grep "W\!"
|
||||
```
|
||||
|
||||
# 检查数据呈现
|
||||
等待1-2分钟后数据就会在图表中展示出来,如图:
|
||||

|
||||
|
||||
|
||||
# 监控告警规则配置
|
||||
```
|
||||
一般查看后端是否异常为关键检查对象,nginx_upstream_check_status_code返回1代表正常,返回2代表异常(实际测试可从上图看出)。
|
||||
nginx_upstream_check_status_code!=1则视为异常需立即告警,级别为一级告警,执行频率为60秒,持续时长为60秒,留观时长2分钟,重复发送频率5分钟,最大发送次数0次,使用企业微信应用及电话语音通道将告警内容发送给系统运维组,此规则运用到周一到周日全天。
|
||||
```
|
||||
|
||||
# 监控图表配置
|
||||
https://github.com/flashcatcloud/categraf/blob/main/inputs/nginx_upstream_check/dashboards.json
|
||||
|
||||
# 故障自愈配置
|
||||
```
|
||||
先略过
|
||||
```
|
||||
@@ -1,29 +0,0 @@
|
||||
[
|
||||
{
|
||||
"collector": "Categraf",
|
||||
"typ": "Nginx_Upstream_Check",
|
||||
"name": "Nginx upstream_check 最近一分钟检测失败次数",
|
||||
"unit": "none",
|
||||
"note": "[文档](https://github.com/flashcatcloud/categraf/blob/main/inputs/nginx_upstream_check/README.md)",
|
||||
"lang": "zh_CN",
|
||||
"expression": "increase(nginx_upstream_check_fall[1m])"
|
||||
},
|
||||
{
|
||||
"collector": "Categraf",
|
||||
"typ": "Nginx_Upstream_Check",
|
||||
"name": "Nginx upstream_check 最近一分钟检测次数",
|
||||
"unit": "none",
|
||||
"note": "[文档](https://github.com/flashcatcloud/categraf/blob/main/inputs/nginx_upstream_check/README.md)",
|
||||
"lang": "zh_CN",
|
||||
"expression": "increase(nginx_upstream_check_rise[1m])"
|
||||
},
|
||||
{
|
||||
"collector": "Categraf",
|
||||
"typ": "Nginx_Upstream_Check",
|
||||
"name": "Nginx upstream_check 检测结果状态码",
|
||||
"unit": "none",
|
||||
"note": "[文档](https://github.com/flashcatcloud/categraf/blob/main/inputs/nginx_upstream_check/README.md)\n\n- 0:unknown\n- 1:up\n- 2:down\n",
|
||||
"lang": "zh_CN",
|
||||
"expression": "nginx_upstream_check_status_code"
|
||||
}
|
||||
]
|
||||
|
Before Width: | Height: | Size: 3.6 KiB After Width: | Height: | Size: 3.6 KiB |
|
Before Width: | Height: | Size: 2.2 KiB |
@@ -1,35 +0,0 @@
|
||||
# 进程总量统计
|
||||
|
||||
如果进程总量太多,比如超过了 CPU core 的 3 倍,就需要关注了。
|
||||
|
||||
## 配置说明
|
||||
|
||||
configuration file: `conf/input.processes/processes.toml`
|
||||
|
||||
默认配置如下(一般维持默认不用动):
|
||||
|
||||
```toml
|
||||
# # collect interval
|
||||
# interval = 15
|
||||
|
||||
# # force use ps command to gather
|
||||
# force_ps = false
|
||||
|
||||
# # force use /proc to gather
|
||||
# force_proc = false
|
||||
```
|
||||
|
||||
有两种采集方式,使用 ps 命令,或者直接读取 `/proc` 目录,默认是后者。如果想强制使用 ps 命令才采集,开启 force_ps 即可:
|
||||
|
||||
```toml
|
||||
force_ps = true
|
||||
```
|
||||
|
||||
## 仪表盘
|
||||
|
||||
夜莺内置了进程总量的仪表盘,克隆到自己的业务组下即可使用。
|
||||
|
||||
## 告警规则
|
||||
|
||||
夜莺内置了进程总量的告警规则,克隆到自己的业务组下即可使用。
|
||||
|
||||
@@ -43,3 +43,7 @@ labels = { instance="n9e-10.23.25.3:6379" }
|
||||
如果一个 redis 集群有 3 个实例,对于业务应用来讲,发起一个请求,可能随机请求到某一个实例上去了,这个是没问题的,但是对于监控 client 而言,显然是希望到所有实例上获取数据的。
|
||||
|
||||
当然,如果多个 redis 实例组成了集群,我们希望有个标识来标识这个集群,这个时候,可以通过 labels 来实现,比如给每个实例增加一个 redis_clus 的标签,值为集群名字即可。
|
||||
|
||||
|
||||
# redis_sentinel
|
||||
forked from [telegraf/redis_sentinel](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/redis_sentinel)
|
||||
|
||||
|
Before Width: | Height: | Size: 3.8 KiB |
@@ -1,3 +0,0 @@
|
||||
# redis_sentinel
|
||||
|
||||
forked from [telegraf/redis_sentinel](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/redis_sentinel)
|
||||
@@ -86,7 +86,7 @@ func BuiltinComponentDels(ctx *ctx.Context, ids []int64) error {
|
||||
return DB(ctx).Where("id in ?", ids).Delete(new(BuiltinComponent)).Error
|
||||
}
|
||||
|
||||
func BuiltinComponentGets(ctx *ctx.Context, query string, limit, offset int) ([]*BuiltinComponent, error) {
|
||||
func BuiltinComponentGets(ctx *ctx.Context, query string) ([]*BuiltinComponent, error) {
|
||||
session := DB(ctx)
|
||||
if query != "" {
|
||||
queryPattern := "%" + query + "%"
|
||||
@@ -95,24 +95,11 @@ func BuiltinComponentGets(ctx *ctx.Context, query string, limit, offset int) ([]
|
||||
|
||||
var lst []*BuiltinComponent
|
||||
|
||||
err := session.Limit(limit).Offset(offset).Find(&lst).Error
|
||||
err := session.Order("ident ASC").Find(&lst).Error
|
||||
|
||||
return lst, err
|
||||
}
|
||||
|
||||
func BuiltinComponentCount(ctx *ctx.Context, query string) (int64, error) {
|
||||
session := DB(ctx).Model(&BuiltinComponent{})
|
||||
if query != "" {
|
||||
queryPattern := "%" + query + "%"
|
||||
session = session.Where("ident LIKE ?", queryPattern)
|
||||
}
|
||||
|
||||
var cnt int64
|
||||
err := session.Count(&cnt).Error
|
||||
|
||||
return cnt, err
|
||||
}
|
||||
|
||||
func BuiltinComponentGet(ctx *ctx.Context, where string, args ...interface{}) (*BuiltinComponent, error) {
|
||||
var lst []*BuiltinComponent
|
||||
err := DB(ctx).Where(where, args...).Find(&lst).Error
|
||||
|
||||
@@ -14,6 +14,7 @@ type BuiltinPayload struct {
|
||||
Component string `json:"component" gorm:"type:varchar(191);not null;index:idx_component,sort:asc;comment:'component of payload'"` // Host MySQL Redis
|
||||
Cate string `json:"cate" gorm:"type:varchar(191);not null;comment:'category of payload'"` // categraf_v1 telegraf_v1
|
||||
Name string `json:"name" gorm:"type:varchar(191);not null;index:idx_name,sort:asc;comment:'name of payload'"` //
|
||||
Tags string `json:"tags" gorm:"type:varchar(191);not null;default:'';comment:'tags of payload'"` // {"host":"
|
||||
Content string `json:"content" gorm:"type:longtext;not null;comment:'content of payload'"`
|
||||
CreatedAt int64 `json:"created_at" gorm:"type:bigint;not null;default:0;comment:'create time'"`
|
||||
CreatedBy string `json:"created_by" gorm:"type:varchar(191);not null;default:'';comment:'creator'"`
|
||||
@@ -105,7 +106,7 @@ func BuiltinPayloadGet(ctx *ctx.Context, where string, args ...interface{}) (*Bu
|
||||
return &bp, nil
|
||||
}
|
||||
|
||||
func BuiltinPayloadGets(ctx *ctx.Context, typ, component, cate, name string, limit, offset int) ([]*BuiltinPayload, error) {
|
||||
func BuiltinPayloadGets(ctx *ctx.Context, typ, component, cate, query string) ([]*BuiltinPayload, error) {
|
||||
session := DB(ctx)
|
||||
if typ != "" {
|
||||
session = session.Where("type = ?", typ)
|
||||
@@ -118,11 +119,36 @@ func BuiltinPayloadGets(ctx *ctx.Context, typ, component, cate, name string, lim
|
||||
session = session.Where("cate = ?", cate)
|
||||
}
|
||||
|
||||
if name != "" {
|
||||
session = session.Where("name like ?", "%"+name+"%")
|
||||
if query != "" {
|
||||
arr := strings.Fields(query)
|
||||
for i := 0; i < len(arr); i++ {
|
||||
qarg := "%" + arr[i] + "%"
|
||||
session = session.Where("name like ? or tags like ?", qarg, qarg)
|
||||
}
|
||||
}
|
||||
|
||||
var lst []*BuiltinPayload
|
||||
err := session.Limit(limit).Offset(offset).Find(&lst).Error
|
||||
err := session.Find(&lst).Error
|
||||
return lst, err
|
||||
}
|
||||
|
||||
// get cates of BuiltinPayload by type and component, return []string
|
||||
func BuiltinPayloadCates(ctx *ctx.Context, typ, component string) ([]string, error) {
|
||||
var cates []string
|
||||
err := DB(ctx).Model(new(BuiltinPayload)).Where("type = ? and component = ?", typ, component).Distinct("cate").Pluck("cate", &cates).Error
|
||||
return cates, err
|
||||
}
|
||||
|
||||
// get components of BuiltinPayload by type and cate, return string
|
||||
func BuiltinPayloadComponents(ctx *ctx.Context, typ, cate string) (string, error) {
|
||||
var components []string
|
||||
err := DB(ctx).Model(new(BuiltinPayload)).Where("type = ? and cate = ?", typ, cate).Distinct("component").Pluck("component", &components).Error
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
if len(components) == 0 {
|
||||
return "", nil
|
||||
}
|
||||
return components[0], nil
|
||||
}
|
||||
|
||||