Compare commits

...

11 Commits

Author SHA1 Message Date
kongfei
ab83ded038 build arm64 artifacts 2022-07-07 14:15:30 +08:00
kongfei
51f82dc476 auto release with github action 2022-07-07 13:52:20 +08:00
Yening Qin
315e0ef903 fix: get clusters by api (#1030) 2022-07-07 12:29:35 +08:00
Ulric Qin
98d5dfff8e add namespace and subsystem prefix for metrics 2022-07-07 12:23:06 +08:00
Ulric Qin
6b4705608b add forward stat 2022-07-07 12:13:45 +08:00
Ulric Qin
5907817cba n9e-server: add http request stat 2022-07-07 10:52:04 +08:00
Ulric Qin
aa97ac54d1 register GaugeSampleQueueSize 2022-07-07 10:17:15 +08:00
Ulric Qin
8fe548aba9 rename mapkey alertname to rulename 2022-07-07 10:06:34 +08:00
Tripitakav
18a9288b75 fix mute bug (#1025)
Co-authored-by: tripitakav <chengzhi.shang@longbridge.sg>
2022-07-07 10:05:39 +08:00
ulricqin
fe82886f09 report sample queue size (#1027)
* report sample queue size

* report sample channel size
2022-07-07 10:00:08 +08:00
xtan
32e6993eea fix: fix event api for service (#1026)
Co-authored-by: tanxiao <tanxiao@asiainfo.com>
2022-07-07 09:58:05 +08:00
10 changed files with 217 additions and 30 deletions

View File

@@ -1,26 +1,32 @@
name: Go
name: Release
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
tags:
- 'v*'
env:
GO_VERSION: 1.18
jobs:
build:
name: Build
goreleaser:
runs-on: ubuntu-latest
steps:
- name: Set up Go 1.17
uses: actions/setup-go@v1
with:
go-version: 1.17
id: go
- name: Check out code into the Go module directory
uses: actions/checkout@v2
- name: Build
run: make
- name: Checkout Source Code
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Setup Go Environment
uses: actions/setup-go@v3
with:
go-version: ${{ env.GO_VERSION }}
- uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Run GoReleaser
uses: goreleaser/goreleaser-action@v3
with:
version: latest
args: release --rm-dist
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

88
.goreleaser.yaml Normal file
View File

@@ -0,0 +1,88 @@
before:
hooks:
# You may remove this if you don't use go modules.
- go mod tidy
snapshot:
name_template: '{{ .Tag }}'
checksum:
name_template: 'checksums.txt'
changelog:
skip: true
builds:
- id: build
hooks:
pre:
- ./fe.sh
main: ./src/
binary: n9e
env:
- CGO_ENABLED=0
goos:
- linux
goarch:
- amd64
- arm64
ldflags:
- -s -w
- -X github.com/didi/nightingale/v5/src/pkg/version.VERSION={{ .Tag }}-{{.Commit}}
archives:
- id: n9e
builds:
- build
format: tar.gz
format_overrides:
- goos: windows
format: zip
name_template: "n9e-v{{ .Version }}-{{ .Os }}-{{ .Arch }}"
wrap_in_directory: false
files:
- docker/*
- etc/*
- pub/*
release:
github:
owner: ccfos
name: nightingale
name_template: "v{{ .Version }}"
dockers:
- image_templates:
- flashcatcloud/nightingale:{{ .Version }}-amd64
goos: linux
goarch: amd64
ids:
- build
dockerfile: docker/Dockerfile.goreleaser
extra_files:
- pub
use: buildx
build_flag_templates:
- "--platform=linux/amd64"
- image_templates:
- flashcatcloud/nightingale:{{ .Version }}-arm64v8
goos: linux
goarch: arm64
ids:
- build
dockerfile: docker/Dockerfile.goreleaser
extra_files:
- pub
use: buildx
build_flag_templates:
- "--platform=linux/arm64/v8"
docker_manifests:
- name_template: flashcatcloud/nightingale:{{ .Version }}
image_templates:
- flashcatcloud/nightingale:{{ .Version }}-amd64
- flashcatcloud/nightingale:{{ .Version }}-arm64v8
- name_template: flashcatcloud/nightingale:latest
image_templates:
- flashcatcloud/nightingale:{{ .Version }}-amd64
- flashcatcloud/nightingale:{{ .Version }}-arm64v8

View File

@@ -0,0 +1,14 @@
FROM --platform=$BUILDPLATFORM python:2
WORKDIR /app
ADD n9e /app
ADD http://download.flashcat.cloud/wait /wait
RUN mkdir -p /app/pub && chmod +x /wait
ADD pub /app/pub/
RUN chmod +x n9e
EXPOSE 19000
EXPOSE 18000
CMD ["/app/n9e", "-h"]

8
fe.sh Executable file
View File

@@ -0,0 +1,8 @@
#!/bin/bash
TAG=$(curl -sX GET https://api.github.com/repos/n9e/fe-v5/releases/latest | awk '/tag_name/{print $4;exit}' FS='[""]')
VERSION=$(echo $TAG | sed 's/v//g')
curl -o n9e-fe-${VERSION}.tar.gz -L https://github.com/n9e/fe-v5/releases/download/${TAG}/n9e-fe-${VERSION}.tar.gz
tar zxvf n9e-fe-${VERSION}.tar.gz

View File

@@ -30,8 +30,9 @@ func matchMute(event *models.AlertCurEvent, mute *models.AlertMute, clock ...int
if ts < mute.Btime || ts > mute.Etime {
return false
}
return matchTags(event.TagsMap, mute.ITags)
tg := event.TagsMap
tg["rulename"] = event.RuleName
return matchTags(tg, mute.ITags)
}
func matchTags(eventTagsMap map[string]string, itags []models.TagFilter) bool {

View File

@@ -314,6 +314,7 @@ func (r RuleEval) judge(vectors []conv.Vector) {
TriggerTime: vectors[i].Timestamp,
TagsMap: tagsMap,
GroupId: r.rule.GroupId,
RuleName: r.rule.Name,
}
bg := memsto.BusiGroupCache.GetByBusiGroupId(r.rule.GroupId)
@@ -321,7 +322,7 @@ func (r RuleEval) judge(vectors []conv.Vector) {
event.GroupName = bg.Name
}
// isMuted only need TriggerTime and TagsMap
// isMuted only need TriggerTime RuleName and TagsMap
if isMuted(event) {
logger.Infof("event_muted: rule_id=%d %s", r.rule.Id, vectors[i].Key)
continue

View File

@@ -4,6 +4,7 @@ import (
"fmt"
"os"
"strings"
"time"
"github.com/gin-contrib/pprof"
"github.com/gin-gonic/gin"
@@ -13,6 +14,8 @@ import (
"github.com/didi/nightingale/v5/src/pkg/aop"
"github.com/didi/nightingale/v5/src/server/config"
"github.com/didi/nightingale/v5/src/server/naming"
promstat "github.com/didi/nightingale/v5/src/server/stat"
)
func New(version string) *gin.Engine {
@@ -66,7 +69,7 @@ func configRoute(r *gin.Engine, version string) {
})
// use apiKey not basic auth
r.POST("/datadog/api/v1/series", datadogSeries)
r.POST("/datadog/api/v1/series", stat(), datadogSeries)
r.POST("/datadog/api/v1/check_run", datadogCheckRun)
r.GET("/datadog/api/v1/validate", datadogValidate)
r.POST("/datadog/api/v1/metadata", datadogMetadata)
@@ -77,10 +80,10 @@ func configRoute(r *gin.Engine, version string) {
r.Use(auth)
}
r.POST("/opentsdb/put", handleOpenTSDB)
r.POST("/openfalcon/push", falconPush)
r.POST("/prometheus/v1/write", remoteWrite)
r.POST("/prometheus/v1/query", queryPromql)
r.POST("/opentsdb/put", stat(), handleOpenTSDB)
r.POST("/openfalcon/push", stat(), falconPush)
r.POST("/prometheus/v1/write", stat(), remoteWrite)
r.POST("/prometheus/v1/query", stat(), queryPromql)
r.GET("/memory/alert-rule", alertRuleGet)
r.GET("/memory/idents", identsGets)
@@ -95,3 +98,16 @@ func configRoute(r *gin.Engine, version string) {
service := r.Group("/v1/n9e")
service.POST("/event", pushEventToQueue)
}
func stat() gin.HandlerFunc {
return func(c *gin.Context) {
start := time.Now()
c.Next()
code := fmt.Sprintf("%d", c.Writer.Status())
method := c.Request.Method
labels := []string{code, c.FullPath(), method}
promstat.RequestDuration.WithLabelValues(labels...).Observe(time.Since(start).Seconds())
}
}

View File

@@ -49,6 +49,36 @@ var (
Name: "alert_queue_size",
Help: "The size of alert queue.",
}, []string{"cluster"})
// 数据转发队列,各个队列的长度
GaugeSampleQueueSize = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "sample_queue_size",
Help: "The size of sample queue.",
}, []string{"cluster", "channel_number"})
// 一些重要的请求,比如接收数据的请求,应该统计一下延迟情况
RequestDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Buckets: []float64{.01, .1, 1},
Name: "http_request_duration_seconds",
Help: "HTTP request latencies in seconds.",
}, []string{"code", "path", "method"},
)
// 发往后端TSDB延迟如何
ForwardDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Buckets: []float64{.1, 1, 10},
Name: "forward_duration_seconds",
Help: "Forward samples to TSDB. latencies in seconds.",
}, []string{"cluster", "channel_number"},
)
)
func Init() {
@@ -59,5 +89,8 @@ func Init() {
CounterSampleTotal,
CounterAlertsTotal,
GaugeAlertQueueSize,
GaugeSampleQueueSize,
RequestDuration,
ForwardDuration,
)
}

View File

@@ -16,6 +16,8 @@ import (
"github.com/prometheus/client_golang/api"
"github.com/prometheus/prometheus/prompb"
"github.com/toolkits/pkg/logger"
promstat "github.com/didi/nightingale/v5/src/server/stat"
)
type WriterType struct {
@@ -153,6 +155,11 @@ func (ws *WritersType) StartConsumer(index int, ch chan *prompb.TimeSeries) {
// post post series to TSDB
// @Author: quzhihao
func (ws *WritersType) post(index int, series []*prompb.TimeSeries) {
start := time.Now()
defer func() {
promstat.ForwardDuration.WithLabelValues(config.C.ClusterName, fmt.Sprint(index)).Observe(time.Since(start).Seconds())
}()
header := map[string]string{"hash": fmt.Sprintf("%s-%d", config.C.Heartbeat.Endpoint, index)}
if len(ws.backends) == 1 {
for key := range ws.backends {
@@ -192,6 +199,8 @@ func Init(opts []config.WriterOptions, globalOpt config.WriterGlobalOpt) error {
go Writers.StartConsumer(i, Writers.chans[i])
}
go reportChanSize()
for i := 0; i < len(opts); i++ {
cli, err := api.NewClient(api.Config{
Address: opts[i].Url,
@@ -226,3 +235,13 @@ func Init(opts []config.WriterOptions, globalOpt config.WriterGlobalOpt) error {
return nil
}
func reportChanSize() {
for {
time.Sleep(time.Second * 3)
for i, c := range Writers.chans {
size := len(c)
promstat.GaugeSampleQueueSize.WithLabelValues(config.C.ClusterName, fmt.Sprint(i)).Set(float64(size))
}
}
}

View File

@@ -87,9 +87,9 @@ type DSReply struct {
Settings struct {
PrometheusAddr string `json:"prometheus.addr"`
PrometheusBasic struct {
PrometheusUser string `json:"promethues.user"`
PrometheusPass string `json:"promethues.password"`
} `json:"promethues.basic"`
PrometheusUser string `json:"prometheus.user"`
PrometheusPass string `json:"prometheus.password"`
} `json:"prometheus.basic"`
PrometheusTimeout int64 `json:"prometheus.timeout"`
} `json:"settings,omitempty"`
} `json:"items"`
@@ -137,6 +137,7 @@ func loadClustersFromAPI() {
logger.Errorf("read response body of %s fail: %v", url, err)
continue
}
logger.Debugf("curl %s success, response: %s", url, string(jsonBytes))
err = json.Unmarshal(jsonBytes, &reply)
if err != nil {