mirror of
https://github.com/ccfos/nightingale.git
synced 2026-03-04 15:08:52 +00:00
Compare commits
17 Commits
docker_rel
...
v5
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
11152b447d | ||
|
|
9ffaefead2 | ||
|
|
307306ca8a | ||
|
|
5b5ef8346b | ||
|
|
b7cc7d6065 | ||
|
|
c77c9de5a7 | ||
|
|
52e0ba2eae | ||
|
|
9957c4e42a | ||
|
|
68ea466f59 | ||
|
|
41adc6f586 | ||
|
|
aa6daffe7b | ||
|
|
f0c28bb271 | ||
|
|
707a35bc06 | ||
|
|
88f1645d3a | ||
|
|
d8255d0cd3 | ||
|
|
fcc26f6410 | ||
|
|
6ff112f5da |
3
.gitignore
vendored
3
.gitignore
vendored
@@ -41,7 +41,6 @@ _test
|
||||
/docker/pub
|
||||
/docker/n9e
|
||||
/docker/mysqldata
|
||||
/etc.local
|
||||
|
||||
.alerts
|
||||
.idea
|
||||
@@ -53,4 +52,4 @@ _test
|
||||
queries.active
|
||||
|
||||
/n9e-*
|
||||
n9e.sql
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ builds:
|
||||
hooks:
|
||||
pre:
|
||||
- ./fe.sh
|
||||
main: ./cmd/center/
|
||||
main: ./src/
|
||||
binary: n9e
|
||||
env:
|
||||
- CGO_ENABLED=0
|
||||
@@ -26,54 +26,12 @@ builds:
|
||||
- arm64
|
||||
ldflags:
|
||||
- -s -w
|
||||
- -X github.com/ccfos/nightingale/v6/pkg/version.Version={{ .Tag }}-{{.Commit}}
|
||||
- id: build-cli
|
||||
main: ./cmd/cli/
|
||||
binary: n9e-cli
|
||||
env:
|
||||
- CGO_ENABLED=0
|
||||
goos:
|
||||
- linux
|
||||
goarch:
|
||||
- amd64
|
||||
- arm64
|
||||
ldflags:
|
||||
- -s -w
|
||||
- -X github.com/ccfos/nightingale/v6/pkg/version.Version={{ .Tag }}-{{.Commit}}
|
||||
- id: build-alert
|
||||
main: ./cmd/alert/
|
||||
binary: n9e-alert
|
||||
env:
|
||||
- CGO_ENABLED=0
|
||||
goos:
|
||||
- linux
|
||||
goarch:
|
||||
- amd64
|
||||
- arm64
|
||||
ldflags:
|
||||
- -s -w
|
||||
- -X github.com/ccfos/nightingale/v6/pkg/version.Version={{ .Tag }}-{{.Commit}}
|
||||
- id: build-pushgw
|
||||
main: ./cmd/pushgw/
|
||||
binary: n9e-pushgw
|
||||
env:
|
||||
- CGO_ENABLED=0
|
||||
goos:
|
||||
- linux
|
||||
goarch:
|
||||
- amd64
|
||||
- arm64
|
||||
ldflags:
|
||||
- -s -w
|
||||
- -X github.com/ccfos/nightingale/v6/pkg/version.Version={{ .Tag }}-{{.Commit}}
|
||||
- -X github.com/didi/nightingale/v5/src/pkg/version.VERSION={{ .Tag }}-{{.Commit}}
|
||||
|
||||
archives:
|
||||
- id: n9e
|
||||
builds:
|
||||
- build
|
||||
- build-cli
|
||||
- build-alert
|
||||
- build-pushgw
|
||||
format: tar.gz
|
||||
format_overrides:
|
||||
- goos: windows
|
||||
@@ -84,9 +42,6 @@ archives:
|
||||
- docker/*
|
||||
- etc/*
|
||||
- pub/*
|
||||
- integrations/*
|
||||
- cli/*
|
||||
- n9e.sql
|
||||
|
||||
release:
|
||||
github:
|
||||
@@ -104,8 +59,6 @@ dockers:
|
||||
dockerfile: docker/Dockerfile.goreleaser
|
||||
extra_files:
|
||||
- pub
|
||||
- etc
|
||||
- integrations
|
||||
use: buildx
|
||||
build_flag_templates:
|
||||
- "--platform=linux/amd64"
|
||||
@@ -115,11 +68,9 @@ dockers:
|
||||
goarch: arm64
|
||||
ids:
|
||||
- build
|
||||
dockerfile: docker/Dockerfile.goreleaser.arm64
|
||||
dockerfile: docker/Dockerfile.goreleaser
|
||||
extra_files:
|
||||
- pub
|
||||
- etc
|
||||
- integrations
|
||||
use: buildx
|
||||
build_flag_templates:
|
||||
- "--platform=linux/arm64/v8"
|
||||
|
||||
2
LICENSE
2
LICENSE
@@ -430,4 +430,4 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
||||
See the License for the specific language governing permissions and
|
||||
|
||||
limitations under the License.
|
||||
limitations under the License.
|
||||
|
||||
47
Makefile
47
Makefile
@@ -1,33 +1,50 @@
|
||||
.PHONY: start build
|
||||
|
||||
NOW = $(shell date -u '+%Y%m%d%I%M%S')
|
||||
|
||||
|
||||
APP = n9e
|
||||
SERVER_BIN = $(APP)
|
||||
ROOT:=$(shell pwd -P)
|
||||
GIT_COMMIT:=$(shell git --work-tree ${ROOT} rev-parse 'HEAD^{commit}')
|
||||
_GIT_VERSION:=$(shell git --work-tree ${ROOT} describe --tags --abbrev=14 "${GIT_COMMIT}^{commit}" 2>/dev/null)
|
||||
TAG=$(shell echo "${_GIT_VERSION}" | awk -F"-" '{print $$1}')
|
||||
RELEASE_VERSION:="$(TAG)-$(GIT_COMMIT)"
|
||||
|
||||
# RELEASE_ROOT = release
|
||||
# RELEASE_SERVER = release/${APP}
|
||||
# GIT_COUNT = $(shell git rev-list --all --count)
|
||||
# GIT_HASH = $(shell git rev-parse --short HEAD)
|
||||
# RELEASE_TAG = $(RELEASE_VERSION).$(GIT_COUNT).$(GIT_HASH)
|
||||
|
||||
all: build
|
||||
|
||||
build:
|
||||
go build -ldflags "-w -s -X github.com/ccfos/nightingale/v6/pkg/version.Version=$(RELEASE_VERSION)" -o n9e ./cmd/center/main.go
|
||||
go build -ldflags "-w -s -X github.com/didi/nightingale/v5/src/pkg/version.VERSION=$(RELEASE_VERSION)" -o $(SERVER_BIN) ./src
|
||||
|
||||
build-alert:
|
||||
go build -ldflags "-w -s -X github.com/ccfos/nightingale/v6/pkg/version.Version=$(RELEASE_VERSION)" -o n9e-alert ./cmd/alert/main.go
|
||||
build-linux:
|
||||
GOOS=linux GOARCH=amd64 go build -ldflags "-w -s -X github.com/didi/nightingale/v5/src/pkg/version.VERSION=$(RELEASE_VERSION)" -o $(SERVER_BIN) ./src
|
||||
|
||||
build-pushgw:
|
||||
go build -ldflags "-w -s -X github.com/ccfos/nightingale/v6/pkg/version.Version=$(RELEASE_VERSION)" -o n9e-pushgw ./cmd/pushgw/main.go
|
||||
# start:
|
||||
# @go run -ldflags "-X main.VERSION=$(RELEASE_TAG)" ./cmd/${APP}/main.go web -c ./configs/config.toml -m ./configs/model.conf --menu ./configs/menu.yaml
|
||||
run_webapi:
|
||||
nohup ./n9e webapi > webapi.log 2>&1 &
|
||||
|
||||
build-cli:
|
||||
go build -ldflags "-w -s -X github.com/ccfos/nightingale/v6/pkg/version.Version=$(RELEASE_VERSION)" -o n9e-cli ./cmd/cli/main.go
|
||||
run_server:
|
||||
nohup ./n9e server > server.log 2>&1 &
|
||||
|
||||
run:
|
||||
nohup ./n9e > n9e.log 2>&1 &
|
||||
# swagger:
|
||||
# @swag init --parseDependency --generalInfo ./cmd/${APP}/main.go --output ./internal/app/swagger
|
||||
|
||||
run_alert:
|
||||
nohup ./n9e-alert > n9e-alert.log 2>&1 &
|
||||
# wire:
|
||||
# @wire gen ./internal/app
|
||||
|
||||
run_pushgw:
|
||||
nohup ./n9e-pushgw > n9e-pushgw.log 2>&1 &
|
||||
# test:
|
||||
# cd ./internal/app/test && go test -v
|
||||
|
||||
release:
|
||||
goreleaser --skip-validate --skip-publish --snapshot
|
||||
# clean:
|
||||
# rm -rf data release $(SERVER_BIN) internal/app/test/data cmd/${APP}/data
|
||||
|
||||
pack: build
|
||||
rm -rf $(APP)-$(RELEASE_VERSION).tar.gz
|
||||
tar -zcvf $(APP)-$(RELEASE_VERSION).tar.gz docker etc $(SERVER_BIN) pub/font pub/index.html pub/assets pub/image
|
||||
|
||||
106
README.md
106
README.md
@@ -15,29 +15,26 @@
|
||||
<img alt="GitHub forks" src="https://img.shields.io/github/forks/ccfos/nightingale">
|
||||
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
|
||||
<img alt="GitHub contributors" src="https://img.shields.io/github/contributors-anon/ccfos/nightingale"/></a>
|
||||
<a href="https://n9e-talk.slack.com/">
|
||||
<img alt="GitHub contributors" src="https://img.shields.io/badge/join%20slack-%23n9e-brightgreen.svg"/></a>
|
||||
<img alt="License" src="https://img.shields.io/badge/license-Apache--2.0-blue"/>
|
||||
</p>
|
||||
<p align="center">
|
||||
<b>All-in-one</b> 的开源观测平台 <br/>
|
||||
<b>All-in-one</b> 的开源云原生监控系统 <br/>
|
||||
<b>开箱即用</b>,集数据采集、可视化、监控告警于一体 <br/>
|
||||
推荐升级您的 <b>Prometheus + AlertManager + Grafana + ELK + Jaeger</b> 组合方案到夜莺!
|
||||
推荐升级您的 <b>Prometheus + AlertManager + Grafana</b> 组合方案到夜莺!
|
||||
</p>
|
||||
|
||||
[English](./README_en.md) | [中文](./README.md)
|
||||
[English](./README_EN.md) | [中文](./README.md)
|
||||
|
||||
|
||||
|
||||
## 功能和特点
|
||||
## Highlighted Features
|
||||
|
||||
- **开箱即用**
|
||||
- 支持 Docker、Helm Chart、云服务等多种部署方式,集数据采集、监控告警、可视化为一体,内置多种监控仪表盘、快捷视图、告警规则模板,导入即可快速使用,**大幅降低云原生监控系统的建设成本、学习成本、使用成本**;
|
||||
- **专业告警**
|
||||
- 可视化的告警配置和管理,支持丰富的告警规则,提供屏蔽规则、订阅规则的配置能力,支持告警多种送达渠道,支持告警自愈、告警事件管理等;
|
||||
- **推荐您使用夜莺的同时,无缝搭配[FlashDuty](https://flashcat.cloud/product/flashcat-duty/),实现告警聚合收敛、认领、升级、排班、协同,让告警的触达既高效,又确保告警处理不遗漏、做到件件有回响**。
|
||||
- **云原生**
|
||||
- 以交钥匙的方式快速构建企业级的云原生监控体系,支持 [Categraf](https://github.com/flashcatcloud/categraf)、Telegraf、Grafana-agent 等多种采集器,支持 Prometheus、VictoriaMetrics、M3DB、ElasticSearch、Jaeger 等多种数据源,兼容支持导入 Grafana 仪表盘,**与云原生生态无缝集成**;
|
||||
- 以交钥匙的方式快速构建企业级的云原生监控体系,支持 [Categraf](https://github.com/flashcatcloud/categraf)、Telegraf、Grafana-agent 等多种采集器,支持 Prometheus、VictoriaMetrics、M3DB、ElasticSearch 等多种数据库,兼容支持导入 Grafana 仪表盘,**与云原生生态无缝集成**;
|
||||
- **高性能 高可用**
|
||||
- 得益于夜莺的多数据源管理引擎,和夜莺引擎侧优秀的架构设计,借助于高性能时序库,可以满足数亿时间线的采集、存储、告警分析场景,节省大量成本;
|
||||
- 夜莺监控组件均可水平扩展,无单点,已在上千家企业部署落地,经受了严苛的生产实践检验。众多互联网头部公司,夜莺集群机器达百台,处理数亿级时间线,重度使用夜莺监控;
|
||||
@@ -46,85 +43,67 @@
|
||||
- **开放社区**
|
||||
- 托管于[中国计算机学会开源发展委员会](https://www.ccf.org.cn/kyfzwyh/),有[快猫星云](https://flashcat.cloud)和众多公司的持续投入,和数千名社区用户的积极参与,以及夜莺监控项目清晰明确的定位,都保证了夜莺开源社区健康、长久的发展。活跃、专业的社区用户也在持续迭代和沉淀更多的最佳实践于产品中;
|
||||
|
||||
## 使用场景
|
||||
1. **如果您希望在一个平台中,统一管理和查看 Metrics、Logging、Tracing 数据,推荐你使用夜莺**:
|
||||
- 请参考阅读:[不止于监控,夜莺 V6 全新升级为开源观测平台](http://flashcat.cloud/blog/nightingale-v6-release/)
|
||||
2. **如果您在使用 Prometheus 过程中,有以下的一个或者多个需求场景,推荐您无缝升级到夜莺**:
|
||||
- Prometheus、Alertmanager、Grafana 等多个系统较为割裂,缺乏统一视图,无法开箱即用;
|
||||
- 通过修改配置文件来管理 Prometheus、Alertmanager 的方式,学习曲线大,协同有难度;
|
||||
- 数据量过大而无法扩展您的 Prometheus 集群;
|
||||
- 生产环境运行多套 Prometheus 集群,面临管理和使用成本高的问题;
|
||||
3. **如果您在使用 Zabbix,有以下的场景,推荐您升级到夜莺**:
|
||||
- 监控的数据量太大,希望有更好的扩展解决方案;
|
||||
- 学习曲线高,多人多团队模式下,希望有更好的协同使用效率;
|
||||
- 微服务和云原生架构下,监控数据的生命周期多变、监控数据维度基数高,Zabbix 数据模型不易适配;
|
||||
- 了解更多Zabbix和夜莺监控的对比,推荐您进一步阅读[Zabbix 和夜莺监控选型对比](https://flashcat.cloud/blog/zabbx-vs-nightingale/)
|
||||
4. **如果您在使用 [Open-Falcon](https://github.com/open-falcon/falcon-plus),我们推荐您升级到夜莺:**
|
||||
- 关于 Open-Falcon 和夜莺的详细介绍,请参考阅读:[云原生监控的十个特点和趋势](http://flashcat.cloud/blog/10-trends-of-cloudnative-monitoring/)
|
||||
- 监控系统和可观测平台的区别,请参考阅读:[从监控系统到可观测平台,Gap有多大
|
||||
](https://flashcat.cloud/blog/gap-of-monitoring-to-o11y/)
|
||||
5. **我们推荐您使用 [Categraf](https://github.com/flashcatcloud/categraf) 作为首选的监控数据采集器**:
|
||||
- [Categraf](https://github.com/flashcatcloud/categraf) 是夜莺监控的默认采集器,采用开放插件机制和 All-in-one 的设计理念,同时支持 metric、log、trace、event 的采集。Categraf 不仅可以采集 CPU、内存、网络等系统层面的指标,也集成了众多开源组件的采集能力,支持K8s生态。Categraf 内置了对应的仪表盘和告警规则,开箱即用。
|
||||
**如果您在使用 Prometheus 过程中,有以下的一个或者多个需求场景,推荐您无缝升级到夜莺**:
|
||||
|
||||
## 文档
|
||||
- Prometheus、Alertmanager、Grafana 等多个系统较为割裂,缺乏统一视图,无法开箱即用;
|
||||
- 通过修改配置文件来管理 Prometheus、Alertmanager 的方式,学习曲线大,协同有难度;
|
||||
- 数据量过大而无法扩展您的 Prometheus 集群;
|
||||
- 生产环境运行多套 Prometheus 集群,面临管理和使用成本高的问题;
|
||||
|
||||
[English Doc](https://n9e.github.io/) | [中文文档](https://flashcat.cloud/docs/)
|
||||
**如果您在使用 Zabbix,有以下的场景,推荐您升级到夜莺**:
|
||||
|
||||
## 产品示意图
|
||||
- 监控的数据量太大,希望有更好的扩展解决方案;
|
||||
- 学习曲线高,多人多团队模式下,希望有更好的协同使用效率;
|
||||
- 微服务和云原生架构下,监控数据的生命周期多变、监控数据维度基数高,Zabbix 数据模型不易适配;
|
||||
|
||||
> 了解更多Zabbix和夜莺监控的对比,推荐您进一步阅读[《Zabbix 和夜莺监控选型对比》](https://flashcat.cloud/blog/zabbx-vs-nightingale/)
|
||||
|
||||
**如果您在使用 [Open-Falcon](https://github.com/open-falcon/falcon-plus),我们推荐您升级到夜莺:**
|
||||
|
||||
- 关于 Open-Falcon 和夜莺的详细介绍,请参考阅读:[《云原生监控的十个特点和趋势》](http://flashcat.cloud/blog/10-trends-of-cloudnative-monitoring/)
|
||||
|
||||
**我们推荐您使用 [Categraf](https://github.com/flashcatcloud/categraf) 作为首选的监控数据采集器**:
|
||||
|
||||
- [Categraf](https://github.com/flashcatcloud/categraf) 是夜莺监控的默认采集器,采用开放插件机制和 All-in-one 的设计理念,同时支持 metric、log、trace、event 的采集。Categraf 不仅可以采集 CPU、内存、网络等系统层面的指标,也集成了众多开源组件的采集能力,支持K8s生态。Categraf 内置了对应的仪表盘和告警规则,开箱即用。
|
||||
|
||||
|
||||
## Getting Started
|
||||
|
||||
[国外文档](https://n9e.github.io/) | [国内文档](http://n9e.flashcat.cloud/)
|
||||
|
||||
## Screenshots
|
||||
|
||||
https://user-images.githubusercontent.com/792850/216888712-2565fcea-9df5-47bd-a49e-d60af9bd76e8.mp4
|
||||
|
||||
## 夜莺架构
|
||||
## Architecture
|
||||
|
||||
<img src="doc/img/arch-product.png" width="600">
|
||||
|
||||
夜莺监控可以接收各种采集器上报的监控数据(比如 [Categraf](https://github.com/flashcatcloud/categraf)、telegraf、grafana-agent、Prometheus),并写入多种流行的时序数据库中(可以支持Prometheus、M3DB、VictoriaMetrics、Thanos、TDEngine等),提供告警规则、屏蔽规则、订阅规则的配置能力,提供监控数据的查看能力,提供告警自愈机制(告警触发之后自动回调某个webhook地址或者执行某个脚本),提供历史告警事件的存储管理、分组查看的能力。
|
||||
|
||||
### 中心汇聚式部署方案
|
||||
<img src="doc/img/arch-system.png" width="600">
|
||||
|
||||

|
||||
夜莺 v5 版本的设计非常简单,核心是 server 和 webapi 两个模块,webapi 无状态,放到中心端,承接前端请求,将用户配置写入数据库;server 是告警引擎和数据转发模块,一般随着时序库走,一个时序库就对应一套 server,每套 server 可以只用一个实例,也可以多个实例组成集群,server 可以接收 Categraf、Telegraf、Grafana-Agent、Datadog-Agent、Falcon-Plugins 上报的数据,写入后端时序库,周期性从数据库同步告警规则,然后查询时序库做告警判断。每套 server 依赖一个 redis。
|
||||
|
||||
夜莺只有一个模块,就是 n9e,可以部署多个 n9e 实例组成集群,n9e 依赖 2 个存储,数据库、Redis,数据库可以使用 MySQL 或 Postgres,自己按需选用。
|
||||
|
||||
n9e 提供的是 HTTP 接口,前面负载均衡可以是 4 层的,也可以是 7 层的。一般就选用 Nginx 就可以了。
|
||||
|
||||
n9e 这个模块接收到数据之后,需要转发给后端的时序库,相关配置是:
|
||||
|
||||
```toml
|
||||
[Pushgw]
|
||||
LabelRewrite = true
|
||||
[[Pushgw.Writers]]
|
||||
Url = "http://127.0.0.1:9090/api/v1/write"
|
||||
```
|
||||
|
||||
> 注意:虽然数据源可以在页面配置了,但是上报转发链路,还是需要在配置文件指定。
|
||||
|
||||
所有机房的 agent( 比如 Categraf、Telegraf、 Grafana-agent、Datadog-agent ),都直接推数据给 n9e,这个架构最为简单,维护成本最低。当然,前提是要求机房之间网络链路比较好,一般有专线。如果网络链路不好,则要使用下面的部署方式了。
|
||||
|
||||
### 边缘下沉式混杂部署方案
|
||||
|
||||

|
||||
|
||||
这个图尝试解释 3 种不同的情形,比如 A 机房和中心网络链路很好,Categraf 可以直接汇报数据给中心 n9e 模块,另一个机房网络链路不好,就需要把时序库下沉部署,时序库下沉了,对应的告警引擎和转发网关也都要跟随下沉,这样数据不会跨机房传输,比较稳定。但是心跳还是需要往中心心跳,要不然在对象列表里看不到机器的 CPU、内存使用率。还有的时候,可能是接入的一个已有的 Prometheus,数据采集没有走 Categraf,那此时只需要把 Prometheus 作为数据源接入夜莺即可,可以在夜莺里看图、配告警规则,但是就是在对象列表里看不到,也不能使用告警自愈的功能,问题也不大,核心功能都不受影响。
|
||||
|
||||
边缘机房,下沉部署时序库、告警引擎、转发网关的时候,要注意,告警引擎需要依赖数据库,因为要同步告警规则,转发网关也要依赖数据库,因为要注册对象到数据库里去,需要打通相关网络,告警引擎和转发网关都不用Redis,所以无需为 Redis 打通网络。
|
||||
|
||||
### VictoriaMetrics 集群架构
|
||||
<img src="doc/img/install-vm.png" width="600">
|
||||
|
||||
如果单机版本的时序数据库(比如 Prometheus) 性能有瓶颈或容灾较差,我们推荐使用 [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics),VictoriaMetrics 架构较为简单,性能优异,易于部署和运维,架构图如上。VictoriaMetrics 更详尽的文档,还请参考其[官网](https://victoriametrics.com/)。
|
||||
|
||||
## 夜莺社区
|
||||
|
||||
## Community
|
||||
|
||||
开源项目要更有生命力,离不开开放的治理架构和源源不断的开发者和用户共同参与,我们致力于建立开放、中立的开源治理架构,吸纳更多来自企业、高校等各方面对云原生监控感兴趣、有热情的开发者,一起打造有活力的夜莺开源社区。关于《夜莺开源项目和社区治理架构(草案)》,请查阅 [COMMUNITY GOVERNANCE](./doc/community-governance.md).
|
||||
|
||||
**我们欢迎您以各种方式参与到夜莺开源项目和开源社区中来,工作包括不限于**:
|
||||
- 补充和完善文档 => [n9e.github.io](https://n9e.github.io/)
|
||||
- 分享您在使用夜莺监控过程中的最佳实践和经验心得 => [文章分享](https://flashcat.cloud/docs/content/flashcat-monitor/nightingale/share/)
|
||||
- 分享您在使用夜莺监控过程中的最佳实践和经验心得 => [文章分享](https://n9e.github.io/docs/prologue/share/)
|
||||
- 提交产品建议 =》 [github issue](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Ffeature&template=enhancement.md)
|
||||
- 提交代码,让夜莺监控更快、更稳、更好用 => [github pull request](https://github.com/didi/nightingale/pulls)
|
||||
|
||||
**尊重、认可和记录每一位贡献者的工作**是夜莺开源社区的第一指导原则,我们提倡**高效的提问**,这既是对开发者时间的尊重,也是对整个社区知识沉淀的贡献:
|
||||
- 提问之前请先查阅 [FAQ](https://www.gitlink.org.cn/ccfos/nightingale/wiki/faq)
|
||||
- 我们使用[论坛](https://answer.flashcat.cloud/)进行交流,有问题可以到这里搜索、提问
|
||||
- 我们使用[GitHub Discussions](https://github.com/ccfos/nightingale/discussions)作为交流论坛,有问题可以到这里搜索、提问
|
||||
- 我们也推荐你加入微信群,和其他夜莺用户交流经验 (请先加好友:[picobyte](https://www.gitlink.org.cn/UlricQin/gist/tree/master/self.jpeg) 备注:夜莺加群+姓名+公司)
|
||||
|
||||
|
||||
@@ -143,6 +122,7 @@ Url = "http://127.0.0.1:9090/api/v1/write"
|
||||
## License
|
||||
[Apache License V2.0](https://github.com/didi/nightingale/blob/main/LICENSE)
|
||||
|
||||
## 加入交流群
|
||||
## Contact Us
|
||||
推荐您关注夜莺监控公众号,及时获取相关产品和社区动态:
|
||||
|
||||
<img src="doc/img/wecom.png" width="120">
|
||||
<img src="doc/img/n9e-vx-new.png" width="120">
|
||||
|
||||
68
README_EN.md
Normal file
68
README_EN.md
Normal file
@@ -0,0 +1,68 @@
|
||||
<img src="doc/img/ccf-n9e.png" width="240">
|
||||
|
||||
Nightingale is an enterprise-level cloud-native monitoring system, which can be used as drop-in replacement of Prometheus for alerting and management.
|
||||
|
||||
[English](./README_EN.md) | [中文](./README.md)
|
||||
|
||||
## Introduction
|
||||
Nightingale is an cloud-native monitoring system by All-In-On design, support enterprise-class functional features with an out-of-the-box experience. We recommend upgrading your `Prometheus` + `AlertManager` + `Grafana` combo solution to Nightingale.
|
||||
|
||||
- **Multiple prometheus data sources management**: manage all alerts and dashboards in one centralized visually view;
|
||||
- **Out-of-the-box alert rule**: built-in multiple alert rules, reuse alert rules template by one-click import with detailed explanation of metrics;
|
||||
- **Multiple modes for visualizing data**: out-of-the-box dashboards, instance customize views, expression browser and Grafana integration;
|
||||
- **Multiple collection clients**: support using Promethues Exporter、Telegraf、Datadog Agent to collecting metrics;
|
||||
- **Integration of multiple storage**: support Prometheus, M3DB, VictoriaMetrics, Influxdb, TDEngine as storage solutions, and original support for PromQL;
|
||||
- **Fault self-healing**: support the ability to self-heal from failures by configuring webhook;
|
||||
|
||||
#### If you are using Prometheus and have one or more of the following requirement scenarios, it is recommended that you upgrade to Nightingale:
|
||||
|
||||
- Multiple systems such as Prometheus, Alertmanager, Grafana, etc. are fragmented and lack a unified view and cannot be used out of the box;
|
||||
- The way to manage Prometheus and Alertmanager by modifying configuration files has a big learning curve and is difficult to collaborate;
|
||||
- Too much data to scale-up your Prometheus cluster;
|
||||
- Multiple Prometheus clusters running in production environments, which faced high management and usage costs;
|
||||
|
||||
#### If you are using Zabbix and have the following scenarios, it is recommended that you upgrade to Nightingale:
|
||||
|
||||
- Monitoring too much data and wanting a better scalable solution;
|
||||
- A high learning curve and a desire for better efficiency of collaborative use in a multi-person, multi-team model;
|
||||
- Microservice and cloud-native architectures with variable monitoring data lifecycles and high monitoring data dimension bases, which are not easily adaptable to the Zabbix data model;
|
||||
|
||||
|
||||
#### If you are using [open-falcon](https://github.com/open-falcon/falcon-plus), we recommend you to upgrade to Nightingale:
|
||||
- For more information about open-falcon and Nightingale, please refer to read [Ten features and trends of cloud-native monitoring](https://mp.weixin.qq.com/s?__biz=MzkzNjI5OTM5Nw==&mid=2247483738&idx=1&sn=e8bdbb974a2cd003c1abcc2b5405dd18&chksm=c2a19fb0f5d616a63185cd79277a79a6b80118ef2185890d0683d2bb20451bd9303c78d083c5#rd)。
|
||||
|
||||
## Quickstart
|
||||
- [n9e.github.io/quickstart](https://n9e.github.io/docs/install/compose/)
|
||||
|
||||
## Documentation
|
||||
- [n9e.github.io](https://n9e.github.io/)
|
||||
|
||||
## Example of use
|
||||
|
||||
<img src="doc/img/intro.gif" width="680">
|
||||
|
||||
## System Architecture
|
||||
#### A typical Nightingale deployment architecture:
|
||||
<img src="doc/img/arch-system.png" width="680">
|
||||
|
||||
#### Typical deployment architecture using VictoriaMetrics as storage:
|
||||
<img src="doc/img/install-vm.png" width="680">
|
||||
|
||||
## Contact us and feedback questions
|
||||
- We recommend that you use [github issue](https://github.com/didi/nightingale/issues) as the preferred channel for issue feedback and requirement submission;
|
||||
- You can join our WeChat group
|
||||
|
||||
<img src="doc/img/n9e-vx-new.png" width="180">
|
||||
|
||||
|
||||
## Contributing
|
||||
We welcome your participation in the Nightingale open source project and open source community in a variety of ways:
|
||||
- Feedback on problems and bugs => [github issue](https://github.com/didi/nightingale/issues)
|
||||
- Additional and improved documentation => [n9e.github.io](https://n9e.github.io/)
|
||||
- Share your best practices and insights on using Nightingale => [User Story](https://github.com/didi/nightingale/issues/897)
|
||||
- Join our community events => [Nightingale wechat group](https://s3-gz01.didistatic.com/n9e-pub/image/n9e-wx.png)
|
||||
- Submit code to make Nightingale better =>[github PR](https://github.com/didi/nightingale/pulls)
|
||||
|
||||
|
||||
## License
|
||||
Nightingale with [Apache License V2.0](https://github.com/didi/nightingale/blob/main/LICENSE) open source license.
|
||||
104
README_en.md
104
README_en.md
@@ -1,104 +0,0 @@
|
||||
<p align="center">
|
||||
<a href="https://github.com/ccfos/nightingale">
|
||||
<img src="doc/img/nightingale_logo_h.png" alt="nightingale - cloud native monitoring" width="240" /></a>
|
||||
</p>
|
||||
|
||||
<p align="center">
|
||||
<img alt="GitHub latest release" src="https://img.shields.io/github/v/release/ccfos/nightingale"/>
|
||||
<a href="https://n9e.github.io">
|
||||
<img alt="Docs" src="https://img.shields.io/badge/docs-get%20started-brightgreen"/></a>
|
||||
<a href="https://hub.docker.com/u/flashcatcloud">
|
||||
<img alt="Docker pulls" src="https://img.shields.io/docker/pulls/flashcatcloud/nightingale"/></a>
|
||||
<img alt="GitHub Repo stars" src="https://img.shields.io/github/stars/ccfos/nightingale">
|
||||
<img alt="GitHub Repo issues" src="https://img.shields.io/github/issues/ccfos/nightingale">
|
||||
<img alt="GitHub Repo issues closed" src="https://img.shields.io/github/issues-closed/ccfos/nightingale">
|
||||
<img alt="GitHub forks" src="https://img.shields.io/github/forks/ccfos/nightingale">
|
||||
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
|
||||
<img alt="GitHub contributors" src="https://img.shields.io/github/contributors-anon/ccfos/nightingale"/></a>
|
||||
<a href="https://n9e-talk.slack.com/">
|
||||
<img alt="GitHub contributors" src="https://img.shields.io/badge/join%20slack-%23n9e-brightgreen.svg"/></a>
|
||||
<img alt="License" src="https://img.shields.io/badge/license-Apache--2.0-blue"/>
|
||||
</p>
|
||||
<p align="center">
|
||||
An open-source cloud-native monitoring system that is <b>all-in-one</b> <br/>
|
||||
<b>Out-of-the-box</b>, it integrates data collection, visualization, and monitoring alert <br/>
|
||||
We recommend upgrading your <b>Prometheus + AlertManager + Grafana</b> combination to Nightingale!
|
||||
</p>
|
||||
|
||||
[English](./README.md) | [中文](./README_ZH.md)
|
||||
|
||||
|
||||
## Highlighted Features
|
||||
|
||||
- **Out-of-the-box**
|
||||
- Supports multiple deployment methods such as **Docker, Helm Chart, and cloud services**, integrates data collection, monitoring, and alerting into one system, and comes with various monitoring dashboards, quick views, and alert rule templates. **It greatly reduces the construction cost, learning cost, and usage cost of cloud-native monitoring systems**.
|
||||
- **Professional Alerting**
|
||||
- Provides visual alert configuration and management, supports various alert rules, offers the ability to configure silence and subscription rules, supports multiple alert delivery channels, and has features such as alert self-healing and event management.
|
||||
- **Cloud-Native**
|
||||
- Quickly builds an enterprise-level cloud-native monitoring system through a turnkey approach, supports multiple collectors such as [Categraf](https://github.com/flashcatcloud/categraf), Telegraf, and Grafana-agent, supports multiple data sources such as Prometheus, VictoriaMetrics, M3DB, ElasticSearch, and Jaeger, and is compatible with importing Grafana dashboards. **It seamlessly integrates with the cloud-native ecosystem**.
|
||||
- **High Performance and High Availability**
|
||||
- Due to the multi-data-source management engine of Nightingale and its excellent architecture design, and utilizing a high-performance time-series database, it can handle data collection, storage, and alert analysis scenarios with billions of time-series data, saving a lot of costs.
|
||||
- Nightingale components can be horizontally scaled with no single point of failure. It has been deployed in thousands of enterprises and tested in harsh production practices. Many leading Internet companies have used Nightingale for cluster machines with hundreds of nodes, processing billions of time-series data.
|
||||
- **Flexible Extension and Centralized Management**
|
||||
- Nightingale can be deployed on a 1-core 1G cloud host, deployed in a cluster of hundreds of machines, or run in Kubernetes. Time-series databases, alert engines, and other components can also be decentralized to various data centers and regions, balancing edge deployment with centralized management. **It solves the problem of data fragmentation and lack of unified views**.
|
||||
|
||||
|
||||
#### If you are using Prometheus and have one or more of the following requirement scenarios, it is recommended that you upgrade to Nightingale:
|
||||
|
||||
- Multiple systems such as Prometheus, Alertmanager, Grafana, etc. are fragmented and lack a unified view and cannot be used out of the box;
|
||||
- The way to manage Prometheus and Alertmanager by modifying configuration files has a big learning curve and is difficult to collaborate;
|
||||
- Too much data to scale-up your Prometheus cluster;
|
||||
- Multiple Prometheus clusters running in production environments, which faced high management and usage costs;
|
||||
|
||||
#### If you are using Zabbix and have the following scenarios, it is recommended that you upgrade to Nightingale:
|
||||
|
||||
- Monitoring too much data and wanting a better scalable solution;
|
||||
- A high learning curve and a desire for better efficiency of collaborative use in a multi-person, multi-team model;
|
||||
- Microservice and cloud-native architectures with variable monitoring data lifecycles and high monitoring data dimension bases, which are not easily adaptable to the Zabbix data model;
|
||||
|
||||
|
||||
#### If you are using [open-falcon](https://github.com/open-falcon/falcon-plus), we recommend you to upgrade to Nightingale:
|
||||
- For more information about open-falcon and Nightingale, please refer to read [Ten features and trends of cloud-native monitoring](https://mp.weixin.qq.com/s?__biz=MzkzNjI5OTM5Nw==&mid=2247483738&idx=1&sn=e8bdbb974a2cd003c1abcc2b5405dd18&chksm=c2a19fb0f5d616a63185cd79277a79a6b80118ef2185890d0683d2bb20451bd9303c78d083c5#rd)。
|
||||
|
||||
## Getting Started
|
||||
|
||||
[English Doc](https://n9e.github.io/) | [中文文档](http://n9e.flashcat.cloud/)
|
||||
|
||||
## Screenshots
|
||||
|
||||
https://user-images.githubusercontent.com/792850/216888712-2565fcea-9df5-47bd-a49e-d60af9bd76e8.mp4
|
||||
|
||||
## Architecture
|
||||
|
||||
<img src="doc/img/arch-product.png" width="600">
|
||||
|
||||
Nightingale monitoring can receive monitoring data reported by various collectors (such as [Categraf](https://github.com/flashcatcloud/categraf) , telegraf, grafana-agent, Prometheus, etc.) and write them to various popular time-series databases (such as Prometheus, M3DB, VictoriaMetrics, Thanos, TDEngine, etc.). It provides configuration capabilities for alert rules, silence rules, and subscription rules, as well as the ability to view monitoring data. It also provides automatic alarm self-healing mechanisms (such as automatically calling back to a webhook address or executing a script after an alarm is triggered), and the ability to store and manage historical alarm events and view them in groups.
|
||||
|
||||
If the performance of a standalone time-series database (such as Prometheus) has bottlenecks or poor disaster recovery, we recommend using [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics). The VictoriaMetrics architecture is relatively simple, has excellent performance, and is easy to deploy and maintain. The architecture diagram is as shown above. For more detailed documentation on VictoriaMetrics, please refer to its [official website](https://victoriametrics.com/).
|
||||
|
||||
**We welcome you to participate in the Nightingale open-source project and community in various ways, including but not limited to**:
|
||||
- Adding and improving documentation => [n9e.github.io](https://n9e.github.io/)
|
||||
- Sharing your best practices and experience in using Nightingale monitoring => [Article sharing]((https://n9e.github.io/docs/prologue/share/))
|
||||
- Submitting product suggestions => [github issue](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Ffeature&template=enhancement.md)
|
||||
- Submitting code to make Nightingale monitoring faster, more stable, and easier to use => [github pull request](https://github.com/didi/nightingale/pulls)
|
||||
|
||||
|
||||
**Respecting, recognizing, and recording the work of every contributor** is the first guiding principle of the Nightingale open-source community. We advocate effective questioning, which not only respects the developer's time but also contributes to the accumulation of knowledge in the entire community
|
||||
- Before asking a question, please first refer to the [FAQ](https://www.gitlink.org.cn/ccfos/nightingale/wiki/faq)
|
||||
- We use [GitHub Discussions](https://github.com/ccfos/nightingale/discussions) as the communication forum. You can search and ask questions here.
|
||||
- We also recommend that you join ours [Slack channel](https://n9e-talk.slack.com/) to exchange experiences with other Nightingale users.
|
||||
|
||||
|
||||
## Who is using Nightingale
|
||||
You can register your usage and share your experience by posting on **[Who is Using Nightingale](https://github.com/ccfos/nightingale/issues/897)**.
|
||||
|
||||
## Stargazers over time
|
||||
[](https://starchart.cc/ccfos/nightingale)
|
||||
|
||||
## Contributors
|
||||
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
|
||||
<img src="https://contrib.rocks/image?repo=ccfos/nightingale" />
|
||||
</a>
|
||||
|
||||
## License
|
||||
[Apache License V2.0](https://github.com/didi/nightingale/blob/main/LICENSE)
|
||||
@@ -1,73 +0,0 @@
|
||||
package aconf
|
||||
|
||||
import (
|
||||
"path"
|
||||
|
||||
"github.com/toolkits/pkg/runner"
|
||||
)
|
||||
|
||||
type Alert struct {
|
||||
EngineDelay int64
|
||||
Heartbeat HeartbeatConfig
|
||||
Alerting Alerting
|
||||
}
|
||||
|
||||
type SMTPConfig struct {
|
||||
Host string
|
||||
Port int
|
||||
User string
|
||||
Pass string
|
||||
From string
|
||||
InsecureSkipVerify bool
|
||||
Batch int
|
||||
}
|
||||
|
||||
type HeartbeatConfig struct {
|
||||
IP string
|
||||
Interval int64
|
||||
Endpoint string
|
||||
EngineName string
|
||||
}
|
||||
|
||||
type Alerting struct {
|
||||
Timeout int64
|
||||
TemplatesDir string
|
||||
NotifyConcurrency int
|
||||
}
|
||||
|
||||
type CallPlugin struct {
|
||||
Enable bool
|
||||
PluginPath string
|
||||
Caller string
|
||||
}
|
||||
|
||||
type RedisPub struct {
|
||||
Enable bool
|
||||
ChannelPrefix string
|
||||
ChannelKey string
|
||||
}
|
||||
|
||||
type Ibex struct {
|
||||
Address string
|
||||
BasicAuthUser string
|
||||
BasicAuthPass string
|
||||
Timeout int64
|
||||
}
|
||||
|
||||
func (a *Alert) PreCheck() {
|
||||
if a.Alerting.TemplatesDir == "" {
|
||||
a.Alerting.TemplatesDir = path.Join(runner.Cwd, "etc", "template")
|
||||
}
|
||||
|
||||
if a.Alerting.NotifyConcurrency == 0 {
|
||||
a.Alerting.NotifyConcurrency = 10
|
||||
}
|
||||
|
||||
if a.Heartbeat.Interval == 0 {
|
||||
a.Heartbeat.Interval = 1000
|
||||
}
|
||||
|
||||
if a.Heartbeat.EngineName == "" {
|
||||
a.Heartbeat.EngineName = "default"
|
||||
}
|
||||
}
|
||||
103
alert/alert.go
103
alert/alert.go
@@ -1,103 +0,0 @@
|
||||
package alert
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/aconf"
|
||||
"github.com/ccfos/nightingale/v6/alert/astats"
|
||||
"github.com/ccfos/nightingale/v6/alert/dispatch"
|
||||
"github.com/ccfos/nightingale/v6/alert/eval"
|
||||
"github.com/ccfos/nightingale/v6/alert/naming"
|
||||
"github.com/ccfos/nightingale/v6/alert/process"
|
||||
"github.com/ccfos/nightingale/v6/alert/queue"
|
||||
"github.com/ccfos/nightingale/v6/alert/record"
|
||||
"github.com/ccfos/nightingale/v6/alert/router"
|
||||
"github.com/ccfos/nightingale/v6/alert/sender"
|
||||
"github.com/ccfos/nightingale/v6/conf"
|
||||
"github.com/ccfos/nightingale/v6/memsto"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/httpx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/logx"
|
||||
"github.com/ccfos/nightingale/v6/prom"
|
||||
"github.com/ccfos/nightingale/v6/pushgw/pconf"
|
||||
"github.com/ccfos/nightingale/v6/pushgw/writer"
|
||||
"github.com/ccfos/nightingale/v6/storage"
|
||||
)
|
||||
|
||||
func Initialize(configDir string, cryptoKey string) (func(), error) {
|
||||
config, err := conf.InitConfig(configDir, cryptoKey)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to init config: %v", err)
|
||||
}
|
||||
|
||||
logxClean, err := logx.Init(config.Log)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
db, err := storage.New(config.DB)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ctx := ctx.NewContext(context.Background(), db)
|
||||
|
||||
redis, err := storage.NewRedis(config.Redis)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
syncStats := memsto.NewSyncStats()
|
||||
alertStats := astats.NewSyncStats()
|
||||
|
||||
targetCache := memsto.NewTargetCache(ctx, syncStats, redis)
|
||||
busiGroupCache := memsto.NewBusiGroupCache(ctx, syncStats)
|
||||
alertMuteCache := memsto.NewAlertMuteCache(ctx, syncStats)
|
||||
alertRuleCache := memsto.NewAlertRuleCache(ctx, syncStats)
|
||||
notifyConfigCache := memsto.NewNotifyConfigCache(ctx)
|
||||
dsCache := memsto.NewDatasourceCache(ctx, syncStats)
|
||||
|
||||
promClients := prom.NewPromClient(ctx, config.Alert.Heartbeat)
|
||||
|
||||
externalProcessors := process.NewExternalProcessors()
|
||||
|
||||
Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache, alertRuleCache, notifyConfigCache, dsCache, ctx, promClients, false)
|
||||
|
||||
r := httpx.GinEngine(config.Global.RunMode, config.HTTP)
|
||||
rt := router.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors)
|
||||
rt.Config(r)
|
||||
|
||||
httpClean := httpx.Init(config.HTTP, r)
|
||||
|
||||
return func() {
|
||||
logxClean()
|
||||
httpClean()
|
||||
}, nil
|
||||
}
|
||||
|
||||
func Start(alertc aconf.Alert, pushgwc pconf.Pushgw, syncStats *memsto.Stats, alertStats *astats.Stats, externalProcessors *process.ExternalProcessorsType, targetCache *memsto.TargetCacheType, busiGroupCache *memsto.BusiGroupCacheType,
|
||||
alertMuteCache *memsto.AlertMuteCacheType, alertRuleCache *memsto.AlertRuleCacheType, notifyConfigCache *memsto.NotifyConfigCacheType, datasourceCache *memsto.DatasourceCacheType, ctx *ctx.Context, promClients *prom.PromClientMap, isCenter bool) {
|
||||
userCache := memsto.NewUserCache(ctx, syncStats)
|
||||
userGroupCache := memsto.NewUserGroupCache(ctx, syncStats)
|
||||
alertSubscribeCache := memsto.NewAlertSubscribeCache(ctx, syncStats)
|
||||
recordingRuleCache := memsto.NewRecordingRuleCache(ctx, syncStats)
|
||||
|
||||
go models.InitNotifyConfig(ctx, alertc.Alerting.TemplatesDir)
|
||||
|
||||
naming := naming.NewNaming(ctx, alertc.Heartbeat, isCenter)
|
||||
|
||||
writers := writer.NewWriters(pushgwc)
|
||||
record.NewScheduler(alertc, recordingRuleCache, promClients, writers, alertStats)
|
||||
|
||||
eval.NewScheduler(isCenter, alertc, externalProcessors, alertRuleCache, targetCache, busiGroupCache, alertMuteCache, datasourceCache, promClients, naming, ctx, alertStats)
|
||||
|
||||
dp := dispatch.NewDispatch(alertRuleCache, userCache, userGroupCache, alertSubscribeCache, targetCache, notifyConfigCache, alertc.Alerting, ctx)
|
||||
consumer := dispatch.NewConsumer(alertc.Alerting, ctx, dp)
|
||||
|
||||
go dp.ReloadTpls()
|
||||
go consumer.LoopConsume()
|
||||
|
||||
go queue.ReportQueueSize(alertStats)
|
||||
go sender.StartEmailSender(notifyConfigCache.GetSMTP()) // todo
|
||||
}
|
||||
@@ -1,45 +0,0 @@
|
||||
package common
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
)
|
||||
|
||||
func RuleKey(datasourceId, id int64) string {
|
||||
return fmt.Sprintf("alert-%d-%d", datasourceId, id)
|
||||
}
|
||||
|
||||
func MatchTags(eventTagsMap map[string]string, itags []models.TagFilter) bool {
|
||||
for _, filter := range itags {
|
||||
value, has := eventTagsMap[filter.Key]
|
||||
if !has {
|
||||
return false
|
||||
}
|
||||
if !matchTag(value, filter) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func matchTag(value string, filter models.TagFilter) bool {
|
||||
switch filter.Func {
|
||||
case "==":
|
||||
return filter.Value == value
|
||||
case "!=":
|
||||
return filter.Value != value
|
||||
case "in":
|
||||
_, has := filter.Vset[value]
|
||||
return has
|
||||
case "not in":
|
||||
_, has := filter.Vset[value]
|
||||
return !has
|
||||
case "=~":
|
||||
return filter.Regexp.MatchString(value)
|
||||
case "!~":
|
||||
return !filter.Regexp.MatchString(value)
|
||||
}
|
||||
// unexpect func
|
||||
return false
|
||||
}
|
||||
@@ -1,263 +0,0 @@
|
||||
package dispatch
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"html/template"
|
||||
"strconv"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/aconf"
|
||||
"github.com/ccfos/nightingale/v6/alert/common"
|
||||
"github.com/ccfos/nightingale/v6/alert/sender"
|
||||
"github.com/ccfos/nightingale/v6/memsto"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
type Dispatch struct {
|
||||
alertRuleCache *memsto.AlertRuleCacheType
|
||||
userCache *memsto.UserCacheType
|
||||
userGroupCache *memsto.UserGroupCacheType
|
||||
alertSubscribeCache *memsto.AlertSubscribeCacheType
|
||||
targetCache *memsto.TargetCacheType
|
||||
notifyConfigCache *memsto.NotifyConfigCacheType
|
||||
|
||||
alerting aconf.Alerting
|
||||
|
||||
senders map[string]sender.Sender
|
||||
tpls map[string]*template.Template
|
||||
|
||||
ctx *ctx.Context
|
||||
|
||||
RwLock sync.RWMutex
|
||||
}
|
||||
|
||||
// 创建一个 Notify 实例
|
||||
func NewDispatch(alertRuleCache *memsto.AlertRuleCacheType, userCache *memsto.UserCacheType, userGroupCache *memsto.UserGroupCacheType,
|
||||
alertSubscribeCache *memsto.AlertSubscribeCacheType, targetCache *memsto.TargetCacheType, notifyConfigCache *memsto.NotifyConfigCacheType,
|
||||
alerting aconf.Alerting, ctx *ctx.Context) *Dispatch {
|
||||
notify := &Dispatch{
|
||||
alertRuleCache: alertRuleCache,
|
||||
userCache: userCache,
|
||||
userGroupCache: userGroupCache,
|
||||
alertSubscribeCache: alertSubscribeCache,
|
||||
targetCache: targetCache,
|
||||
notifyConfigCache: notifyConfigCache,
|
||||
|
||||
alerting: alerting,
|
||||
|
||||
senders: make(map[string]sender.Sender),
|
||||
tpls: make(map[string]*template.Template),
|
||||
|
||||
ctx: ctx,
|
||||
}
|
||||
return notify
|
||||
}
|
||||
|
||||
func (e *Dispatch) ReloadTpls() error {
|
||||
err := e.relaodTpls()
|
||||
if err != nil {
|
||||
logger.Error("failed to reload tpls: %v", err)
|
||||
}
|
||||
|
||||
duration := time.Duration(9000) * time.Millisecond
|
||||
for {
|
||||
time.Sleep(duration)
|
||||
if err := e.relaodTpls(); err != nil {
|
||||
logger.Warning("failed to reload tpls:", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (e *Dispatch) relaodTpls() error {
|
||||
tmpTpls, err := models.ListTpls(e.ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
smtp := e.notifyConfigCache.GetSMTP()
|
||||
|
||||
senders := map[string]sender.Sender{
|
||||
models.Email: sender.NewSender(models.Email, tmpTpls, smtp),
|
||||
models.Dingtalk: sender.NewSender(models.Dingtalk, tmpTpls, smtp),
|
||||
models.Wecom: sender.NewSender(models.Wecom, tmpTpls, smtp),
|
||||
models.Feishu: sender.NewSender(models.Feishu, tmpTpls, smtp),
|
||||
models.Mm: sender.NewSender(models.Mm, tmpTpls, smtp),
|
||||
models.Telegram: sender.NewSender(models.Telegram, tmpTpls, smtp),
|
||||
}
|
||||
|
||||
e.RwLock.Lock()
|
||||
e.tpls = tmpTpls
|
||||
e.senders = senders
|
||||
e.RwLock.Unlock()
|
||||
return nil
|
||||
}
|
||||
|
||||
// HandleEventNotify 处理event事件的主逻辑
|
||||
// event: 告警/恢复事件
|
||||
// isSubscribe: 告警事件是否由subscribe的配置产生
|
||||
func (e *Dispatch) HandleEventNotify(event *models.AlertCurEvent, isSubscribe bool) {
|
||||
rule := e.alertRuleCache.Get(event.RuleId)
|
||||
if rule == nil {
|
||||
return
|
||||
}
|
||||
fillUsers(event, e.userCache, e.userGroupCache)
|
||||
|
||||
var (
|
||||
// 处理事件到 notifyTarget 关系,处理的notifyTarget用OrMerge进行合并
|
||||
handlers []NotifyTargetDispatch
|
||||
|
||||
// 额外去掉一些订阅,处理的notifyTarget用AndMerge进行合并, 如设置 channel=false,合并后不通过这个channel发送
|
||||
// 如果实现了相关 Dispatch,可以添加到interceptors中
|
||||
interceptorHandlers []NotifyTargetDispatch
|
||||
)
|
||||
if isSubscribe {
|
||||
handlers = []NotifyTargetDispatch{NotifyGroupDispatch, EventCallbacksDispatch}
|
||||
} else {
|
||||
handlers = []NotifyTargetDispatch{NotifyGroupDispatch, GlobalWebhookDispatch, EventCallbacksDispatch}
|
||||
}
|
||||
|
||||
notifyTarget := NewNotifyTarget()
|
||||
// 处理订阅关系使用OrMerge
|
||||
for _, handler := range handlers {
|
||||
notifyTarget.OrMerge(handler(rule, event, notifyTarget, e))
|
||||
}
|
||||
|
||||
// 处理移除订阅关系的逻辑,比如员工离职,临时静默某个通道的策略等
|
||||
for _, handler := range interceptorHandlers {
|
||||
notifyTarget.AndMerge(handler(rule, event, notifyTarget, e))
|
||||
}
|
||||
|
||||
// 处理事件发送,这里用一个goroutine处理一个event的所有发送事件
|
||||
go e.Send(rule, event, notifyTarget, isSubscribe)
|
||||
|
||||
// 如果是不是订阅规则出现的event, 则需要处理订阅规则的event
|
||||
if !isSubscribe {
|
||||
e.handleSubs(event)
|
||||
}
|
||||
}
|
||||
|
||||
func (e *Dispatch) handleSubs(event *models.AlertCurEvent) {
|
||||
// handle alert subscribes
|
||||
subscribes := make([]*models.AlertSubscribe, 0)
|
||||
// rule specific subscribes
|
||||
if subs, has := e.alertSubscribeCache.Get(event.RuleId); has {
|
||||
subscribes = append(subscribes, subs...)
|
||||
}
|
||||
// global subscribes
|
||||
if subs, has := e.alertSubscribeCache.Get(0); has {
|
||||
subscribes = append(subscribes, subs...)
|
||||
}
|
||||
|
||||
for _, sub := range subscribes {
|
||||
e.handleSub(sub, *event)
|
||||
}
|
||||
}
|
||||
|
||||
// handleSub 处理订阅规则的event,注意这里event要使用值传递,因为后面会修改event的状态
|
||||
func (e *Dispatch) handleSub(sub *models.AlertSubscribe, event models.AlertCurEvent) {
|
||||
if sub.IsDisabled() || !sub.MatchCluster(event.DatasourceId) {
|
||||
return
|
||||
}
|
||||
if !common.MatchTags(event.TagsMap, sub.ITags) {
|
||||
return
|
||||
}
|
||||
if sub.ForDuration > (event.TriggerTime - event.FirstTriggerTime) {
|
||||
return
|
||||
}
|
||||
sub.ModifyEvent(&event)
|
||||
LogEvent(&event, "subscribe")
|
||||
e.HandleEventNotify(&event, true)
|
||||
}
|
||||
|
||||
func (e *Dispatch) Send(rule *models.AlertRule, event *models.AlertCurEvent, notifyTarget *NotifyTarget, isSubscribe bool) {
|
||||
for channel, uids := range notifyTarget.ToChannelUserMap() {
|
||||
ctx := sender.BuildMessageContext(rule, event, uids, e.userCache)
|
||||
e.RwLock.RLock()
|
||||
s := e.senders[channel]
|
||||
e.RwLock.RUnlock()
|
||||
if s == nil {
|
||||
logger.Warningf("no sender for channel: %s", channel)
|
||||
continue
|
||||
}
|
||||
logger.Debugf("send event: %s, channel: %s", event.Hash, channel)
|
||||
for i := 0; i < len(ctx.Users); i++ {
|
||||
logger.Debug("send event to user: ", ctx.Users[i])
|
||||
}
|
||||
s.Send(ctx)
|
||||
}
|
||||
|
||||
// handle event callbacks
|
||||
sender.SendCallbacks(e.ctx, notifyTarget.ToCallbackList(), event, e.targetCache, e.notifyConfigCache.GetIbex())
|
||||
|
||||
// handle global webhooks
|
||||
sender.SendWebhooks(notifyTarget.ToWebhookList(), event)
|
||||
|
||||
// handle plugin call
|
||||
go sender.MayPluginNotify(e.genNoticeBytes(event), e.notifyConfigCache.GetNotifyScript())
|
||||
}
|
||||
|
||||
type Notice struct {
|
||||
Event *models.AlertCurEvent `json:"event"`
|
||||
Tpls map[string]string `json:"tpls"`
|
||||
}
|
||||
|
||||
func (e *Dispatch) genNoticeBytes(event *models.AlertCurEvent) []byte {
|
||||
// build notice body with templates
|
||||
ntpls := make(map[string]string)
|
||||
|
||||
e.RwLock.RLock()
|
||||
defer e.RwLock.RUnlock()
|
||||
for filename, tpl := range e.tpls {
|
||||
var body bytes.Buffer
|
||||
if err := tpl.Execute(&body, event); err != nil {
|
||||
ntpls[filename] = err.Error()
|
||||
} else {
|
||||
ntpls[filename] = body.String()
|
||||
}
|
||||
}
|
||||
|
||||
notice := Notice{Event: event, Tpls: ntpls}
|
||||
stdinBytes, err := json.Marshal(notice)
|
||||
if err != nil {
|
||||
logger.Errorf("event_notify: failed to marshal notice: %v", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
return stdinBytes
|
||||
}
|
||||
|
||||
// for alerting
|
||||
func fillUsers(ce *models.AlertCurEvent, uc *memsto.UserCacheType, ugc *memsto.UserGroupCacheType) {
|
||||
gids := make([]int64, 0, len(ce.NotifyGroupsJSON))
|
||||
for i := 0; i < len(ce.NotifyGroupsJSON); i++ {
|
||||
gid, err := strconv.ParseInt(ce.NotifyGroupsJSON[i], 10, 64)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
gids = append(gids, gid)
|
||||
}
|
||||
|
||||
ce.NotifyGroupsObj = ugc.GetByUserGroupIds(gids)
|
||||
|
||||
uids := make(map[int64]struct{})
|
||||
for i := 0; i < len(ce.NotifyGroupsObj); i++ {
|
||||
ug := ce.NotifyGroupsObj[i]
|
||||
for j := 0; j < len(ug.UserIds); j++ {
|
||||
uids[ug.UserIds[j]] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
ce.NotifyUsersObj = uc.GetByUserIds(mapKeys(uids))
|
||||
}
|
||||
|
||||
func mapKeys(m map[int64]struct{}) []int64 {
|
||||
lst := make([]int64, 0, len(m))
|
||||
for k := range m {
|
||||
lst = append(lst, k)
|
||||
}
|
||||
return lst
|
||||
}
|
||||
@@ -1,33 +0,0 @@
|
||||
package dispatch
|
||||
|
||||
// NotifyChannels channelKey -> bool
|
||||
type NotifyChannels map[string]bool
|
||||
|
||||
func NewNotifyChannels(channels []string) NotifyChannels {
|
||||
nc := make(NotifyChannels)
|
||||
for _, ch := range channels {
|
||||
nc[ch] = true
|
||||
}
|
||||
return nc
|
||||
}
|
||||
|
||||
func (nc NotifyChannels) OrMerge(other NotifyChannels) {
|
||||
nc.merge(other, func(a, b bool) bool { return a || b })
|
||||
}
|
||||
|
||||
func (nc NotifyChannels) AndMerge(other NotifyChannels) {
|
||||
nc.merge(other, func(a, b bool) bool { return a && b })
|
||||
}
|
||||
|
||||
func (nc NotifyChannels) merge(other NotifyChannels, f func(bool, bool) bool) {
|
||||
if other == nil {
|
||||
return
|
||||
}
|
||||
for k, v := range other {
|
||||
if curV, has := nc[k]; has {
|
||||
nc[k] = f(curV, v)
|
||||
} else {
|
||||
nc[k] = v
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,134 +0,0 @@
|
||||
package dispatch
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
)
|
||||
|
||||
// NotifyTarget 维护所有需要发送的目标 用户-通道/回调/钩子信息,用map维护的数据结构具有去重功能
|
||||
type NotifyTarget struct {
|
||||
userMap map[int64]NotifyChannels
|
||||
webhooks map[string]*models.Webhook
|
||||
callbacks map[string]struct{}
|
||||
}
|
||||
|
||||
func NewNotifyTarget() *NotifyTarget {
|
||||
return &NotifyTarget{
|
||||
userMap: make(map[int64]NotifyChannels),
|
||||
webhooks: make(map[string]*models.Webhook),
|
||||
callbacks: make(map[string]struct{}),
|
||||
}
|
||||
}
|
||||
|
||||
// OrMerge 将 channelMap 按照 or 的方式合并,方便实现多种组合的策略,比如根据某个 tag 进行路由等
|
||||
func (s *NotifyTarget) OrMerge(other *NotifyTarget) {
|
||||
s.merge(other, NotifyChannels.OrMerge)
|
||||
}
|
||||
|
||||
// AndMerge 将 channelMap 中的 bool 值按照 and 的逻辑进行合并,可以单独将人/通道维度的通知移除
|
||||
// 常用的场景有:
|
||||
// 1. 人员离职了不需要发送告警了
|
||||
// 2. 某个告警通道进行维护,暂时不需要发送告警了
|
||||
// 3. 业务值班的重定向逻辑,将高等级的告警额外发送给应急人员等
|
||||
// 可以结合业务需求自己实现router
|
||||
func (s *NotifyTarget) AndMerge(other *NotifyTarget) {
|
||||
s.merge(other, NotifyChannels.AndMerge)
|
||||
}
|
||||
|
||||
func (s *NotifyTarget) merge(other *NotifyTarget, f func(NotifyChannels, NotifyChannels)) {
|
||||
if other == nil {
|
||||
return
|
||||
}
|
||||
for k, v := range other.userMap {
|
||||
if curV, has := s.userMap[k]; has {
|
||||
f(curV, v)
|
||||
} else {
|
||||
s.userMap[k] = v
|
||||
}
|
||||
}
|
||||
for k, v := range other.webhooks {
|
||||
s.webhooks[k] = v
|
||||
}
|
||||
for k, v := range other.callbacks {
|
||||
s.callbacks[k] = v
|
||||
}
|
||||
}
|
||||
|
||||
// ToChannelUserMap userMap(map[uid][channel]bool) 转换为 map[channel][]uid 的结构
|
||||
func (s *NotifyTarget) ToChannelUserMap() map[string][]int64 {
|
||||
m := make(map[string][]int64)
|
||||
for uid, nc := range s.userMap {
|
||||
for ch, send := range nc {
|
||||
if send {
|
||||
m[ch] = append(m[ch], uid)
|
||||
}
|
||||
}
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
func (s *NotifyTarget) ToCallbackList() []string {
|
||||
callbacks := make([]string, 0, len(s.callbacks))
|
||||
for cb := range s.callbacks {
|
||||
callbacks = append(callbacks, cb)
|
||||
}
|
||||
return callbacks
|
||||
}
|
||||
|
||||
func (s *NotifyTarget) ToWebhookList() []*models.Webhook {
|
||||
webhooks := make([]*models.Webhook, 0, len(s.webhooks))
|
||||
for _, wh := range s.webhooks {
|
||||
webhooks = append(webhooks, wh)
|
||||
}
|
||||
return webhooks
|
||||
}
|
||||
|
||||
// Dispatch 抽象由告警事件到信息接收者的路由策略
|
||||
// rule: 告警规则
|
||||
// event: 告警事件
|
||||
// prev: 前一次路由结果, Dispatch 的实现可以直接修改 prev, 也可以返回一个新的 NotifyTarget 用于 AndMerge/OrMerge
|
||||
type NotifyTargetDispatch func(rule *models.AlertRule, event *models.AlertCurEvent, prev *NotifyTarget, dispatch *Dispatch) *NotifyTarget
|
||||
|
||||
// GroupDispatch 处理告警规则的组订阅关系
|
||||
func NotifyGroupDispatch(rule *models.AlertRule, event *models.AlertCurEvent, prev *NotifyTarget, dispatch *Dispatch) *NotifyTarget {
|
||||
groupIds := make([]int64, 0, len(event.NotifyGroupsJSON))
|
||||
for _, groupId := range event.NotifyGroupsJSON {
|
||||
gid, err := strconv.ParseInt(groupId, 10, 64)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
groupIds = append(groupIds, gid)
|
||||
}
|
||||
|
||||
groups := dispatch.userGroupCache.GetByUserGroupIds(groupIds)
|
||||
NotifyTarget := NewNotifyTarget()
|
||||
for _, group := range groups {
|
||||
for _, userId := range group.UserIds {
|
||||
NotifyTarget.userMap[userId] = NewNotifyChannels(event.NotifyChannelsJSON)
|
||||
}
|
||||
}
|
||||
return NotifyTarget
|
||||
}
|
||||
|
||||
func GlobalWebhookDispatch(rule *models.AlertRule, event *models.AlertCurEvent, prev *NotifyTarget, dispatch *Dispatch) *NotifyTarget {
|
||||
webhooks := dispatch.notifyConfigCache.GetWebhooks()
|
||||
NotifyTarget := NewNotifyTarget()
|
||||
for _, webhook := range webhooks {
|
||||
if !webhook.Enable {
|
||||
continue
|
||||
}
|
||||
NotifyTarget.webhooks[webhook.Url] = webhook
|
||||
}
|
||||
return NotifyTarget
|
||||
}
|
||||
|
||||
func EventCallbacksDispatch(rule *models.AlertRule, event *models.AlertCurEvent, prev *NotifyTarget, dispatch *Dispatch) *NotifyTarget {
|
||||
for _, c := range event.CallbacksJSON {
|
||||
if c == "" {
|
||||
continue
|
||||
}
|
||||
prev.callbacks[c] = struct{}{}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -1,174 +0,0 @@
|
||||
package eval
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/aconf"
|
||||
"github.com/ccfos/nightingale/v6/alert/astats"
|
||||
"github.com/ccfos/nightingale/v6/alert/naming"
|
||||
"github.com/ccfos/nightingale/v6/alert/process"
|
||||
"github.com/ccfos/nightingale/v6/memsto"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/prom"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
type Scheduler struct {
|
||||
isCenter bool
|
||||
// key: hash
|
||||
alertRules map[string]*AlertRuleWorker
|
||||
|
||||
ExternalProcessors *process.ExternalProcessorsType
|
||||
|
||||
aconf aconf.Alert
|
||||
|
||||
alertRuleCache *memsto.AlertRuleCacheType
|
||||
targetCache *memsto.TargetCacheType
|
||||
busiGroupCache *memsto.BusiGroupCacheType
|
||||
alertMuteCache *memsto.AlertMuteCacheType
|
||||
datasourceCache *memsto.DatasourceCacheType
|
||||
|
||||
promClients *prom.PromClientMap
|
||||
|
||||
naming *naming.Naming
|
||||
|
||||
ctx *ctx.Context
|
||||
stats *astats.Stats
|
||||
}
|
||||
|
||||
func NewScheduler(isCenter bool, aconf aconf.Alert, externalProcessors *process.ExternalProcessorsType, arc *memsto.AlertRuleCacheType, targetCache *memsto.TargetCacheType,
|
||||
busiGroupCache *memsto.BusiGroupCacheType, alertMuteCache *memsto.AlertMuteCacheType, datasourceCache *memsto.DatasourceCacheType, promClients *prom.PromClientMap, naming *naming.Naming,
|
||||
ctx *ctx.Context, stats *astats.Stats) *Scheduler {
|
||||
scheduler := &Scheduler{
|
||||
isCenter: isCenter,
|
||||
aconf: aconf,
|
||||
alertRules: make(map[string]*AlertRuleWorker),
|
||||
|
||||
ExternalProcessors: externalProcessors,
|
||||
|
||||
alertRuleCache: arc,
|
||||
targetCache: targetCache,
|
||||
busiGroupCache: busiGroupCache,
|
||||
alertMuteCache: alertMuteCache,
|
||||
datasourceCache: datasourceCache,
|
||||
|
||||
promClients: promClients,
|
||||
naming: naming,
|
||||
|
||||
ctx: ctx,
|
||||
stats: stats,
|
||||
}
|
||||
|
||||
go scheduler.LoopSyncRules(context.Background())
|
||||
return scheduler
|
||||
}
|
||||
|
||||
func (s *Scheduler) LoopSyncRules(ctx context.Context) {
|
||||
time.Sleep(time.Duration(s.aconf.EngineDelay) * time.Second)
|
||||
duration := 9000 * time.Millisecond
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-time.After(duration):
|
||||
s.syncAlertRules()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Scheduler) syncAlertRules() {
|
||||
ids := s.alertRuleCache.GetRuleIds()
|
||||
alertRuleWorkers := make(map[string]*AlertRuleWorker)
|
||||
externalRuleWorkers := make(map[string]*process.Processor)
|
||||
for _, id := range ids {
|
||||
rule := s.alertRuleCache.Get(id)
|
||||
if rule == nil {
|
||||
continue
|
||||
}
|
||||
if rule.IsPrometheusRule() {
|
||||
datasourceIds := s.promClients.Hit(rule.DatasourceIdsJson)
|
||||
for _, dsId := range datasourceIds {
|
||||
if !naming.DatasourceHashRing.IsHit(dsId, fmt.Sprintf("%d", rule.Id), s.aconf.Heartbeat.Endpoint) {
|
||||
continue
|
||||
}
|
||||
ds := s.datasourceCache.GetById(dsId)
|
||||
if ds == nil {
|
||||
logger.Debugf("datasource %d not found", dsId)
|
||||
continue
|
||||
}
|
||||
|
||||
if ds.Status != "enabled" {
|
||||
logger.Debugf("datasource %d status is %s", dsId, ds.Status)
|
||||
continue
|
||||
}
|
||||
processor := process.NewProcessor(rule, dsId, s.alertRuleCache, s.targetCache, s.busiGroupCache, s.alertMuteCache, s.datasourceCache, s.promClients, s.ctx, s.stats)
|
||||
|
||||
alertRule := NewAlertRuleWorker(rule, dsId, processor, s.promClients, s.ctx)
|
||||
alertRuleWorkers[alertRule.Hash()] = alertRule
|
||||
}
|
||||
} else if rule.IsHostRule() && s.isCenter {
|
||||
// all host rule will be processed by center instance
|
||||
if !naming.DatasourceHashRing.IsHit(naming.HostDatasource, fmt.Sprintf("%d", rule.Id), s.aconf.Heartbeat.Endpoint) {
|
||||
continue
|
||||
}
|
||||
processor := process.NewProcessor(rule, 0, s.alertRuleCache, s.targetCache, s.busiGroupCache, s.alertMuteCache, s.datasourceCache, s.promClients, s.ctx, s.stats)
|
||||
alertRule := NewAlertRuleWorker(rule, 0, processor, s.promClients, s.ctx)
|
||||
alertRuleWorkers[alertRule.Hash()] = alertRule
|
||||
} else {
|
||||
// 如果 rule 不是通过 prometheus engine 来告警的,则创建为 externalRule
|
||||
// if rule is not processed by prometheus engine, create it as externalRule
|
||||
for _, dsId := range rule.DatasourceIdsJson {
|
||||
ds := s.datasourceCache.GetById(dsId)
|
||||
if ds == nil {
|
||||
logger.Debugf("datasource %d not found", dsId)
|
||||
continue
|
||||
}
|
||||
|
||||
if ds.Status != "enabled" {
|
||||
logger.Debugf("datasource %d status is %s", dsId, ds.Status)
|
||||
continue
|
||||
}
|
||||
processor := process.NewProcessor(rule, dsId, s.alertRuleCache, s.targetCache, s.busiGroupCache, s.alertMuteCache, s.datasourceCache, s.promClients, s.ctx, s.stats)
|
||||
externalRuleWorkers[processor.Key()] = processor
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for hash, rule := range alertRuleWorkers {
|
||||
if _, has := s.alertRules[hash]; !has {
|
||||
rule.Prepare()
|
||||
rule.Start()
|
||||
s.alertRules[hash] = rule
|
||||
}
|
||||
}
|
||||
|
||||
for hash, rule := range s.alertRules {
|
||||
if _, has := alertRuleWorkers[hash]; !has {
|
||||
rule.Stop()
|
||||
delete(s.alertRules, hash)
|
||||
}
|
||||
}
|
||||
|
||||
s.ExternalProcessors.ExternalLock.Lock()
|
||||
for key, processor := range externalRuleWorkers {
|
||||
if curProcessor, has := s.ExternalProcessors.Processors[key]; has {
|
||||
// rule存在,且hash一致,认为没有变更,这里可以根据需求单独实现一个关联数据更多的hash函数
|
||||
if processor.Hash() == curProcessor.Hash() {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// 现有规则中没有rule以及有rule但hash不一致的场景,需要触发rule的update
|
||||
processor.RecoverAlertCurEventFromDb()
|
||||
s.ExternalProcessors.Processors[key] = processor
|
||||
}
|
||||
|
||||
for key := range s.ExternalProcessors.Processors {
|
||||
if _, has := externalRuleWorkers[key]; !has {
|
||||
delete(s.ExternalProcessors.Processors, key)
|
||||
}
|
||||
}
|
||||
s.ExternalProcessors.ExternalLock.Unlock()
|
||||
}
|
||||
@@ -1,268 +0,0 @@
|
||||
package eval
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/common"
|
||||
"github.com/ccfos/nightingale/v6/alert/process"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
promsdk "github.com/ccfos/nightingale/v6/pkg/prom"
|
||||
"github.com/ccfos/nightingale/v6/prom"
|
||||
|
||||
"github.com/toolkits/pkg/logger"
|
||||
"github.com/toolkits/pkg/str"
|
||||
)
|
||||
|
||||
type AlertRuleWorker struct {
|
||||
datasourceId int64
|
||||
quit chan struct{}
|
||||
inhibit bool
|
||||
severity int
|
||||
|
||||
rule *models.AlertRule
|
||||
|
||||
processor *process.Processor
|
||||
|
||||
promClients *prom.PromClientMap
|
||||
ctx *ctx.Context
|
||||
}
|
||||
|
||||
func NewAlertRuleWorker(rule *models.AlertRule, datasourceId int64, processor *process.Processor, promClients *prom.PromClientMap, ctx *ctx.Context) *AlertRuleWorker {
|
||||
arw := &AlertRuleWorker{
|
||||
datasourceId: datasourceId,
|
||||
quit: make(chan struct{}),
|
||||
rule: rule,
|
||||
processor: processor,
|
||||
|
||||
promClients: promClients,
|
||||
ctx: ctx,
|
||||
}
|
||||
|
||||
return arw
|
||||
}
|
||||
|
||||
func (arw *AlertRuleWorker) Key() string {
|
||||
return common.RuleKey(arw.datasourceId, arw.rule.Id)
|
||||
}
|
||||
|
||||
func (arw *AlertRuleWorker) Hash() string {
|
||||
return str.MD5(fmt.Sprintf("%d_%d_%s_%d",
|
||||
arw.rule.Id,
|
||||
arw.rule.PromEvalInterval,
|
||||
arw.rule.RuleConfig,
|
||||
arw.datasourceId,
|
||||
))
|
||||
}
|
||||
|
||||
func (arw *AlertRuleWorker) Prepare() {
|
||||
arw.processor.RecoverAlertCurEventFromDb()
|
||||
}
|
||||
|
||||
func (arw *AlertRuleWorker) Start() {
|
||||
logger.Infof("eval:%s started", arw.Key())
|
||||
interval := arw.rule.PromEvalInterval
|
||||
if interval <= 0 {
|
||||
interval = 10
|
||||
}
|
||||
go func() {
|
||||
for {
|
||||
select {
|
||||
case <-arw.quit:
|
||||
return
|
||||
default:
|
||||
arw.Eval()
|
||||
time.Sleep(time.Duration(interval) * time.Second)
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
func (arw *AlertRuleWorker) Eval() {
|
||||
cachedRule := arw.rule
|
||||
if cachedRule == nil {
|
||||
//logger.Errorf("rule_eval:%s rule not found", arw.Key())
|
||||
return
|
||||
}
|
||||
|
||||
typ := cachedRule.GetRuleType()
|
||||
var lst []common.AnomalyPoint
|
||||
switch typ {
|
||||
case models.PROMETHEUS:
|
||||
lst = arw.GetPromAnomalyPoint(cachedRule.RuleConfig)
|
||||
case models.HOST:
|
||||
lst = arw.GetHostAnomalyPoint(cachedRule.RuleConfig)
|
||||
default:
|
||||
return
|
||||
}
|
||||
|
||||
if arw.processor == nil {
|
||||
logger.Warningf("rule_eval:%s processor is nil", arw.Key())
|
||||
return
|
||||
}
|
||||
|
||||
arw.processor.Handle(lst, "inner", arw.inhibit)
|
||||
}
|
||||
|
||||
func (arw *AlertRuleWorker) Stop() {
|
||||
logger.Infof("%s stopped", arw.Key())
|
||||
close(arw.quit)
|
||||
}
|
||||
|
||||
func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) []common.AnomalyPoint {
|
||||
var lst []common.AnomalyPoint
|
||||
var severity int
|
||||
|
||||
var rule *models.PromRuleConfig
|
||||
if err := json.Unmarshal([]byte(ruleConfig), &rule); err != nil {
|
||||
logger.Errorf("rule_eval:%s rule_config:%s, error:%v", arw.Key(), ruleConfig, err)
|
||||
return lst
|
||||
}
|
||||
|
||||
if rule == nil {
|
||||
logger.Errorf("rule_eval:%s rule_config:%s, error:rule is nil", arw.Key(), ruleConfig)
|
||||
return lst
|
||||
}
|
||||
|
||||
arw.inhibit = rule.Inhibit
|
||||
for _, query := range rule.Queries {
|
||||
if query.Severity < severity {
|
||||
arw.severity = query.Severity
|
||||
}
|
||||
|
||||
promql := strings.TrimSpace(query.PromQl)
|
||||
if promql == "" {
|
||||
logger.Errorf("rule_eval:%s promql is blank", arw.Key())
|
||||
continue
|
||||
}
|
||||
|
||||
if arw.promClients.IsNil(arw.datasourceId) {
|
||||
logger.Errorf("rule_eval:%s error reader client is nil", arw.Key())
|
||||
continue
|
||||
}
|
||||
|
||||
readerClient := arw.promClients.GetCli(arw.datasourceId)
|
||||
|
||||
var warnings promsdk.Warnings
|
||||
value, warnings, err := readerClient.Query(context.Background(), promql, time.Now())
|
||||
if err != nil {
|
||||
logger.Errorf("rule_eval:%s promql:%s, error:%v", arw.Key(), promql, err)
|
||||
continue
|
||||
}
|
||||
|
||||
if len(warnings) > 0 {
|
||||
logger.Errorf("rule_eval:%s promql:%s, warnings:%v", arw.Key(), promql, warnings)
|
||||
continue
|
||||
}
|
||||
|
||||
logger.Debugf("rule_eval:%s query:%+v, value:%v", arw.Key(), query, value)
|
||||
points := common.ConvertAnomalyPoints(value)
|
||||
for i := 0; i < len(points); i++ {
|
||||
points[i].Severity = query.Severity
|
||||
}
|
||||
lst = append(lst, points...)
|
||||
}
|
||||
return lst
|
||||
}
|
||||
|
||||
func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) []common.AnomalyPoint {
|
||||
var lst []common.AnomalyPoint
|
||||
var severity int
|
||||
|
||||
var rule *models.HostRuleConfig
|
||||
if err := json.Unmarshal([]byte(ruleConfig), &rule); err != nil {
|
||||
logger.Errorf("rule_eval:%s rule_config:%s, error:%v", arw.Key(), ruleConfig, err)
|
||||
return lst
|
||||
}
|
||||
|
||||
if rule == nil {
|
||||
logger.Errorf("rule_eval:%s rule_config:%s, error:rule is nil", arw.Key(), ruleConfig)
|
||||
return lst
|
||||
}
|
||||
|
||||
arw.inhibit = rule.Inhibit
|
||||
now := time.Now().Unix()
|
||||
for _, trigger := range rule.Triggers {
|
||||
if trigger.Severity < severity {
|
||||
arw.severity = trigger.Severity
|
||||
}
|
||||
|
||||
query := models.GetHostsQuery(rule.Queries)
|
||||
switch trigger.Type {
|
||||
case "target_miss":
|
||||
t := now - int64(trigger.Duration)
|
||||
targets, err := models.MissTargetGetsByFilter(arw.ctx, query, t)
|
||||
if err != nil {
|
||||
logger.Errorf("rule_eval:%s query:%v, error:%v", arw.Key(), query, err)
|
||||
continue
|
||||
}
|
||||
for _, target := range targets {
|
||||
m := make(map[string]string)
|
||||
target.FillTagsMap()
|
||||
for k, v := range target.TagsMap {
|
||||
m[k] = v
|
||||
}
|
||||
m["ident"] = target.Ident
|
||||
|
||||
bg := arw.processor.BusiGroupCache.GetByBusiGroupId(target.GroupId)
|
||||
if bg != nil && bg.LabelEnable == 1 {
|
||||
m["busigroup"] = bg.LabelValue
|
||||
}
|
||||
|
||||
lst = append(lst, common.NewAnomalyPoint(trigger.Type, m, now, float64(now-target.UpdateAt), trigger.Severity))
|
||||
}
|
||||
case "offset":
|
||||
targets, err := models.TargetGetsByFilter(arw.ctx, query, 0, 0)
|
||||
if err != nil {
|
||||
logger.Errorf("rule_eval:%s query:%v, error:%v", arw.Key(), query, err)
|
||||
continue
|
||||
}
|
||||
var targetMap = make(map[string]*models.Target)
|
||||
for _, target := range targets {
|
||||
targetMap[target.Ident] = target
|
||||
}
|
||||
|
||||
hostOffsetMap := arw.processor.TargetCache.GetOffsetHost(targets, now, int64(trigger.Duration))
|
||||
for host, offset := range hostOffsetMap {
|
||||
m := make(map[string]string)
|
||||
target, exists := targetMap[host]
|
||||
if exists {
|
||||
target.FillTagsMap()
|
||||
for k, v := range target.TagsMap {
|
||||
m[k] = v
|
||||
}
|
||||
}
|
||||
m["ident"] = host
|
||||
|
||||
bg := arw.processor.BusiGroupCache.GetByBusiGroupId(target.GroupId)
|
||||
if bg != nil && bg.LabelEnable == 1 {
|
||||
m["busigroup"] = bg.LabelValue
|
||||
}
|
||||
|
||||
lst = append(lst, common.NewAnomalyPoint(trigger.Type, m, now, float64(offset), trigger.Severity))
|
||||
}
|
||||
case "pct_target_miss":
|
||||
t := now - int64(trigger.Duration)
|
||||
count, err := models.MissTargetCountByFilter(arw.ctx, query, t)
|
||||
if err != nil {
|
||||
logger.Errorf("rule_eval:%s query:%v, error:%v", arw.Key(), query, err)
|
||||
continue
|
||||
}
|
||||
|
||||
total, err := models.TargetCountByFilter(arw.ctx, query)
|
||||
if err != nil {
|
||||
logger.Errorf("rule_eval:%s query:%v, error:%v", arw.Key(), query, err)
|
||||
continue
|
||||
}
|
||||
pct := float64(count) / float64(total) * 100
|
||||
if pct >= float64(trigger.Percent) {
|
||||
lst = append(lst, common.NewAnomalyPoint(trigger.Type, nil, now, pct, trigger.Severity))
|
||||
}
|
||||
}
|
||||
}
|
||||
return lst
|
||||
}
|
||||
@@ -1,65 +0,0 @@
|
||||
package naming
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/toolkits/pkg/consistent"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
const NodeReplicas = 500
|
||||
|
||||
type DatasourceHashRingType struct {
|
||||
sync.RWMutex
|
||||
Rings map[int64]*consistent.Consistent
|
||||
}
|
||||
|
||||
// for alert_rule sharding
|
||||
var HostDatasource int64 = 100000
|
||||
var DatasourceHashRing = DatasourceHashRingType{Rings: make(map[int64]*consistent.Consistent)}
|
||||
|
||||
func NewConsistentHashRing(replicas int32, nodes []string) *consistent.Consistent {
|
||||
ret := consistent.New()
|
||||
ret.NumberOfReplicas = int(replicas)
|
||||
for i := 0; i < len(nodes); i++ {
|
||||
ret.Add(nodes[i])
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
func RebuildConsistentHashRing(datasourceId int64, nodes []string) {
|
||||
r := consistent.New()
|
||||
r.NumberOfReplicas = NodeReplicas
|
||||
for i := 0; i < len(nodes); i++ {
|
||||
r.Add(nodes[i])
|
||||
}
|
||||
|
||||
DatasourceHashRing.Set(datasourceId, r)
|
||||
logger.Infof("hash ring %d rebuild %+v", datasourceId, r.Members())
|
||||
}
|
||||
|
||||
func (chr *DatasourceHashRingType) GetNode(datasourceId int64, pk string) (string, error) {
|
||||
chr.RLock()
|
||||
defer chr.RUnlock()
|
||||
_, exists := chr.Rings[datasourceId]
|
||||
if !exists {
|
||||
chr.Rings[datasourceId] = NewConsistentHashRing(int32(NodeReplicas), []string{})
|
||||
}
|
||||
|
||||
return chr.Rings[datasourceId].Get(pk)
|
||||
}
|
||||
|
||||
func (chr *DatasourceHashRingType) IsHit(datasourceId int64, pk string, currentNode string) bool {
|
||||
node, err := chr.GetNode(datasourceId, pk)
|
||||
if err != nil {
|
||||
logger.Debugf("datasource id:%d pk:%s failed to get node from hashring:%v", datasourceId, pk, err)
|
||||
return false
|
||||
}
|
||||
return node == currentNode
|
||||
}
|
||||
|
||||
func (chr *DatasourceHashRingType) Set(datasourceId int64, r *consistent.Consistent) {
|
||||
chr.RLock()
|
||||
defer chr.RUnlock()
|
||||
chr.Rings[datasourceId] = r
|
||||
}
|
||||
@@ -1,151 +0,0 @@
|
||||
package naming
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/aconf"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
type Naming struct {
|
||||
ctx *ctx.Context
|
||||
heartbeatConfig aconf.HeartbeatConfig
|
||||
isCenter bool
|
||||
}
|
||||
|
||||
func NewNaming(ctx *ctx.Context, heartbeat aconf.HeartbeatConfig, isCenter bool) *Naming {
|
||||
naming := &Naming{
|
||||
ctx: ctx,
|
||||
heartbeatConfig: heartbeat,
|
||||
isCenter: isCenter,
|
||||
}
|
||||
naming.Heartbeats()
|
||||
return naming
|
||||
}
|
||||
|
||||
// local servers
|
||||
var localss map[int64]string
|
||||
|
||||
func (n *Naming) Heartbeats() error {
|
||||
localss = make(map[int64]string)
|
||||
if err := n.heartbeat(); err != nil {
|
||||
fmt.Println("failed to heartbeat:", err)
|
||||
return err
|
||||
}
|
||||
|
||||
go n.loopHeartbeat()
|
||||
go n.loopDeleteInactiveInstances()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (n *Naming) loopDeleteInactiveInstances() {
|
||||
interval := time.Duration(10) * time.Minute
|
||||
for {
|
||||
time.Sleep(interval)
|
||||
n.DeleteInactiveInstances()
|
||||
}
|
||||
}
|
||||
|
||||
func (n *Naming) DeleteInactiveInstances() {
|
||||
err := models.DB(n.ctx).Where("clock < ?", time.Now().Unix()-600).Delete(new(models.AlertingEngines)).Error
|
||||
if err != nil {
|
||||
logger.Errorf("delete inactive instances err:%v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func (n *Naming) loopHeartbeat() {
|
||||
interval := time.Duration(n.heartbeatConfig.Interval) * time.Millisecond
|
||||
for {
|
||||
time.Sleep(interval)
|
||||
if err := n.heartbeat(); err != nil {
|
||||
logger.Warning(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (n *Naming) heartbeat() error {
|
||||
var datasourceIds []int64
|
||||
var err error
|
||||
|
||||
// 在页面上维护实例和集群的对应关系
|
||||
datasourceIds, err = models.GetDatasourceIdsByClusterName(n.ctx, n.heartbeatConfig.EngineName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if len(datasourceIds) == 0 {
|
||||
err := models.AlertingEngineHeartbeatWithCluster(n.ctx, n.heartbeatConfig.Endpoint, n.heartbeatConfig.EngineName, 0)
|
||||
if err != nil {
|
||||
logger.Warningf("heartbeat with cluster %s err:%v", "", err)
|
||||
}
|
||||
} else {
|
||||
for i := 0; i < len(datasourceIds); i++ {
|
||||
err := models.AlertingEngineHeartbeatWithCluster(n.ctx, n.heartbeatConfig.Endpoint, n.heartbeatConfig.EngineName, datasourceIds[i])
|
||||
if err != nil {
|
||||
logger.Warningf("heartbeat with cluster %d err:%v", datasourceIds[i], err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for i := 0; i < len(datasourceIds); i++ {
|
||||
servers, err := n.ActiveServers(datasourceIds[i])
|
||||
if err != nil {
|
||||
logger.Warningf("hearbeat %d get active server err:%v", datasourceIds[i], err)
|
||||
continue
|
||||
}
|
||||
|
||||
sort.Strings(servers)
|
||||
newss := strings.Join(servers, " ")
|
||||
|
||||
oldss, exists := localss[datasourceIds[i]]
|
||||
if exists && oldss == newss {
|
||||
continue
|
||||
}
|
||||
|
||||
RebuildConsistentHashRing(datasourceIds[i], servers)
|
||||
localss[datasourceIds[i]] = newss
|
||||
}
|
||||
|
||||
if n.isCenter {
|
||||
// 如果是中心节点,还需要处理 host 类型的告警规则,host 类型告警规则,和数据源无关,想复用下数据源的 hash ring,想用一个虚假的数据源 id 来处理
|
||||
// if is center node, we need to handle host type alerting rules, host type alerting rules are not related to datasource, we want to reuse the hash ring of datasource, we want to use a fake datasource id to handle it
|
||||
err := models.AlertingEngineHeartbeatWithCluster(n.ctx, n.heartbeatConfig.Endpoint, n.heartbeatConfig.EngineName, HostDatasource)
|
||||
if err != nil {
|
||||
logger.Warningf("heartbeat with cluster %s err:%v", "", err)
|
||||
}
|
||||
|
||||
servers, err := n.ActiveServers(HostDatasource)
|
||||
if err != nil {
|
||||
logger.Warningf("hearbeat %d get active server err:%v", HostDatasource, err)
|
||||
return nil
|
||||
}
|
||||
|
||||
sort.Strings(servers)
|
||||
newss := strings.Join(servers, " ")
|
||||
|
||||
oldss, exists := localss[HostDatasource]
|
||||
if exists && oldss == newss {
|
||||
return nil
|
||||
}
|
||||
|
||||
RebuildConsistentHashRing(HostDatasource, servers)
|
||||
localss[HostDatasource] = newss
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (n *Naming) ActiveServers(datasourceId int64) ([]string, error) {
|
||||
if datasourceId == -1 {
|
||||
return nil, fmt.Errorf("cluster is empty")
|
||||
}
|
||||
|
||||
// 30秒内有心跳,就认为是活的
|
||||
return models.AlertingEngineGetsInstances(n.ctx, "datasource_id = ? and clock > ?", datasourceId, time.Now().Unix()-30)
|
||||
}
|
||||
@@ -1,74 +0,0 @@
|
||||
package process
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
)
|
||||
|
||||
type AlertCurEventMap struct {
|
||||
sync.RWMutex
|
||||
Data map[string]*models.AlertCurEvent
|
||||
}
|
||||
|
||||
func NewAlertCurEventMap(data map[string]*models.AlertCurEvent) *AlertCurEventMap {
|
||||
if data == nil {
|
||||
return &AlertCurEventMap{
|
||||
Data: make(map[string]*models.AlertCurEvent),
|
||||
}
|
||||
}
|
||||
return &AlertCurEventMap{
|
||||
Data: data,
|
||||
}
|
||||
}
|
||||
|
||||
func (a *AlertCurEventMap) SetAll(data map[string]*models.AlertCurEvent) {
|
||||
a.Lock()
|
||||
defer a.Unlock()
|
||||
a.Data = data
|
||||
}
|
||||
|
||||
func (a *AlertCurEventMap) Set(key string, value *models.AlertCurEvent) {
|
||||
a.Lock()
|
||||
defer a.Unlock()
|
||||
a.Data[key] = value
|
||||
}
|
||||
|
||||
func (a *AlertCurEventMap) Get(key string) (*models.AlertCurEvent, bool) {
|
||||
a.RLock()
|
||||
defer a.RUnlock()
|
||||
event, exists := a.Data[key]
|
||||
return event, exists
|
||||
}
|
||||
|
||||
func (a *AlertCurEventMap) UpdateLastEvalTime(key string, lastEvalTime int64) {
|
||||
a.Lock()
|
||||
defer a.Unlock()
|
||||
event, exists := a.Data[key]
|
||||
if !exists {
|
||||
return
|
||||
}
|
||||
event.LastEvalTime = lastEvalTime
|
||||
}
|
||||
|
||||
func (a *AlertCurEventMap) Delete(key string) {
|
||||
a.Lock()
|
||||
defer a.Unlock()
|
||||
delete(a.Data, key)
|
||||
}
|
||||
|
||||
func (a *AlertCurEventMap) Keys() []string {
|
||||
a.RLock()
|
||||
defer a.RUnlock()
|
||||
keys := make([]string, 0, len(a.Data))
|
||||
for k := range a.Data {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
return keys
|
||||
}
|
||||
|
||||
func (a *AlertCurEventMap) GetAll() map[string]*models.AlertCurEvent {
|
||||
a.RLock()
|
||||
defer a.RUnlock()
|
||||
return a.Data
|
||||
}
|
||||
@@ -1,441 +0,0 @@
|
||||
package process
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"html/template"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/astats"
|
||||
"github.com/ccfos/nightingale/v6/alert/common"
|
||||
"github.com/ccfos/nightingale/v6/alert/dispatch"
|
||||
"github.com/ccfos/nightingale/v6/alert/mute"
|
||||
"github.com/ccfos/nightingale/v6/alert/queue"
|
||||
"github.com/ccfos/nightingale/v6/memsto"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/tplx"
|
||||
"github.com/ccfos/nightingale/v6/prom"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
"github.com/toolkits/pkg/str"
|
||||
)
|
||||
|
||||
type ExternalProcessorsType struct {
|
||||
ExternalLock sync.RWMutex
|
||||
Processors map[string]*Processor
|
||||
}
|
||||
|
||||
var ExternalProcessors ExternalProcessorsType
|
||||
|
||||
func NewExternalProcessors() *ExternalProcessorsType {
|
||||
return &ExternalProcessorsType{
|
||||
Processors: make(map[string]*Processor),
|
||||
}
|
||||
}
|
||||
|
||||
func (e *ExternalProcessorsType) GetExternalAlertRule(datasourceId, id int64) (*Processor, bool) {
|
||||
e.ExternalLock.RLock()
|
||||
defer e.ExternalLock.RUnlock()
|
||||
processor, has := e.Processors[common.RuleKey(datasourceId, id)]
|
||||
return processor, has
|
||||
}
|
||||
|
||||
type Processor struct {
|
||||
datasourceId int64
|
||||
|
||||
rule *models.AlertRule
|
||||
fires *AlertCurEventMap
|
||||
pendings *AlertCurEventMap
|
||||
inhibit bool
|
||||
|
||||
tagsMap map[string]string
|
||||
tagsArr []string
|
||||
target string
|
||||
targetNote string
|
||||
groupName string
|
||||
|
||||
atertRuleCache *memsto.AlertRuleCacheType
|
||||
TargetCache *memsto.TargetCacheType
|
||||
BusiGroupCache *memsto.BusiGroupCacheType
|
||||
alertMuteCache *memsto.AlertMuteCacheType
|
||||
datasourceCache *memsto.DatasourceCacheType
|
||||
|
||||
promClients *prom.PromClientMap
|
||||
ctx *ctx.Context
|
||||
stats *astats.Stats
|
||||
}
|
||||
|
||||
func (p *Processor) Key() string {
|
||||
return common.RuleKey(p.datasourceId, p.rule.Id)
|
||||
}
|
||||
|
||||
func (p *Processor) DatasourceId() int64 {
|
||||
return p.datasourceId
|
||||
}
|
||||
|
||||
func (p *Processor) Hash() string {
|
||||
return str.MD5(fmt.Sprintf("%d_%d_%s_%d",
|
||||
p.rule.Id,
|
||||
p.rule.PromEvalInterval,
|
||||
p.rule.RuleConfig,
|
||||
p.datasourceId,
|
||||
))
|
||||
}
|
||||
|
||||
func NewProcessor(rule *models.AlertRule, datasourceId int64, atertRuleCache *memsto.AlertRuleCacheType, targetCache *memsto.TargetCacheType,
|
||||
busiGroupCache *memsto.BusiGroupCacheType, alertMuteCache *memsto.AlertMuteCacheType, datasourceCache *memsto.DatasourceCacheType, promClients *prom.PromClientMap, ctx *ctx.Context,
|
||||
stats *astats.Stats) *Processor {
|
||||
|
||||
p := &Processor{
|
||||
datasourceId: datasourceId,
|
||||
rule: rule,
|
||||
|
||||
TargetCache: targetCache,
|
||||
BusiGroupCache: busiGroupCache,
|
||||
alertMuteCache: alertMuteCache,
|
||||
atertRuleCache: atertRuleCache,
|
||||
datasourceCache: datasourceCache,
|
||||
|
||||
promClients: promClients,
|
||||
ctx: ctx,
|
||||
stats: stats,
|
||||
}
|
||||
|
||||
p.mayHandleGroup()
|
||||
return p
|
||||
}
|
||||
|
||||
func (p *Processor) Handle(anomalyPoints []common.AnomalyPoint, from string, inhibit bool) {
|
||||
// 有可能rule的一些配置已经发生变化,比如告警接收人、callbacks等
|
||||
// 这些信息的修改是不会引起worker restart的,但是确实会影响告警处理逻辑
|
||||
// 所以,这里直接从memsto.AlertRuleCache中获取并覆盖
|
||||
p.inhibit = inhibit
|
||||
p.rule = p.atertRuleCache.Get(p.rule.Id)
|
||||
cachedRule := p.rule
|
||||
if cachedRule == nil {
|
||||
logger.Errorf("rule not found %+v", anomalyPoints)
|
||||
return
|
||||
}
|
||||
|
||||
now := time.Now().Unix()
|
||||
alertingKeys := map[string]struct{}{}
|
||||
|
||||
// 根据 event 的 tag 将 events 分组,处理告警抑制的情况
|
||||
eventsMap := make(map[string][]*models.AlertCurEvent)
|
||||
for _, anomalyPoint := range anomalyPoints {
|
||||
event := p.BuildEvent(anomalyPoint, from, now)
|
||||
// 如果 event 被 mute 了,本质也是 fire 的状态,这里无论如何都添加到 alertingKeys 中,防止 fire 的事件自动恢复了
|
||||
hash := event.Hash
|
||||
alertingKeys[hash] = struct{}{}
|
||||
if mute.IsMuted(cachedRule, event, p.TargetCache, p.alertMuteCache) {
|
||||
logger.Debugf("rule_eval:%s event:%v is muted", p.Key(), event)
|
||||
continue
|
||||
}
|
||||
tagHash := TagHash(anomalyPoint)
|
||||
eventsMap[tagHash] = append(eventsMap[tagHash], event)
|
||||
}
|
||||
|
||||
for _, events := range eventsMap {
|
||||
p.handleEvent(events)
|
||||
}
|
||||
|
||||
p.HandleRecover(alertingKeys, now)
|
||||
}
|
||||
|
||||
func (p *Processor) BuildEvent(anomalyPoint common.AnomalyPoint, from string, now int64) *models.AlertCurEvent {
|
||||
p.fillTags(anomalyPoint)
|
||||
p.mayHandleIdent()
|
||||
hash := Hash(p.rule.Id, p.datasourceId, anomalyPoint)
|
||||
ds := p.datasourceCache.GetById(p.datasourceId)
|
||||
var dsName string
|
||||
if ds != nil {
|
||||
dsName = ds.Name
|
||||
}
|
||||
|
||||
event := p.rule.GenerateNewEvent(p.ctx)
|
||||
event.TriggerTime = anomalyPoint.Timestamp
|
||||
event.TagsMap = p.tagsMap
|
||||
event.DatasourceId = p.datasourceId
|
||||
event.Cluster = dsName
|
||||
event.Hash = hash
|
||||
event.TargetIdent = p.target
|
||||
event.TargetNote = p.targetNote
|
||||
event.TriggerValue = anomalyPoint.ReadableValue()
|
||||
event.TagsJSON = p.tagsArr
|
||||
event.GroupName = p.groupName
|
||||
event.Tags = strings.Join(p.tagsArr, ",,")
|
||||
event.IsRecovered = false
|
||||
event.Callbacks = p.rule.Callbacks
|
||||
event.CallbacksJSON = p.rule.CallbacksJSON
|
||||
event.Annotations = p.rule.Annotations
|
||||
event.AnnotationsJSON = make(map[string]string)
|
||||
event.RuleConfig = p.rule.RuleConfig
|
||||
event.RuleConfigJson = p.rule.RuleConfigJson
|
||||
event.Severity = anomalyPoint.Severity
|
||||
|
||||
if from == "inner" {
|
||||
event.LastEvalTime = now
|
||||
} else {
|
||||
event.LastEvalTime = event.TriggerTime
|
||||
}
|
||||
return event
|
||||
}
|
||||
|
||||
func (p *Processor) HandleRecover(alertingKeys map[string]struct{}, now int64) {
|
||||
for _, hash := range p.pendings.Keys() {
|
||||
if _, has := alertingKeys[hash]; has {
|
||||
continue
|
||||
}
|
||||
p.pendings.Delete(hash)
|
||||
}
|
||||
|
||||
for hash := range p.fires.GetAll() {
|
||||
if _, has := alertingKeys[hash]; has {
|
||||
continue
|
||||
}
|
||||
p.RecoverSingle(hash, now, nil)
|
||||
}
|
||||
}
|
||||
|
||||
func (p *Processor) RecoverSingle(hash string, now int64, value *string) {
|
||||
cachedRule := p.rule
|
||||
if cachedRule == nil {
|
||||
return
|
||||
}
|
||||
event, has := p.fires.Get(hash)
|
||||
if !has {
|
||||
return
|
||||
}
|
||||
// 如果配置了留观时长,就不能立马恢复了
|
||||
if cachedRule.RecoverDuration > 0 && now-event.LastEvalTime < cachedRule.RecoverDuration {
|
||||
logger.Debugf("rule_eval:%s event:%v not recover", p.Key(), event)
|
||||
return
|
||||
}
|
||||
if value != nil {
|
||||
event.TriggerValue = *value
|
||||
}
|
||||
|
||||
// 没查到触发阈值的vector,姑且就认为这个vector的值恢复了
|
||||
// 我确实无法分辨,是prom中有值但是未满足阈值所以没返回,还是prom中确实丢了一些点导致没有数据可以返回,尴尬
|
||||
p.fires.Delete(hash)
|
||||
p.pendings.Delete(hash)
|
||||
|
||||
// 可能是因为调整了promql才恢复的,所以事件里边要体现最新的promql,否则用户会比较困惑
|
||||
// 当然,其实rule的各个字段都可能发生变化了,都更新一下吧
|
||||
cachedRule.UpdateEvent(event)
|
||||
event.IsRecovered = true
|
||||
event.LastEvalTime = now
|
||||
p.pushEventToQueue(event)
|
||||
}
|
||||
|
||||
func (p *Processor) handleEvent(events []*models.AlertCurEvent) {
|
||||
var fireEvents []*models.AlertCurEvent
|
||||
// severity 初始为 4, 一定为遇到比自己优先级高的事件
|
||||
severity := 4
|
||||
for _, event := range events {
|
||||
if event == nil {
|
||||
continue
|
||||
}
|
||||
if p.rule.PromForDuration == 0 {
|
||||
fireEvents = append(fireEvents, event)
|
||||
if severity > event.Severity {
|
||||
severity = event.Severity
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
var preTriggerTime int64
|
||||
preEvent, has := p.pendings.Get(event.Hash)
|
||||
if has {
|
||||
p.pendings.UpdateLastEvalTime(event.Hash, event.LastEvalTime)
|
||||
preTriggerTime = preEvent.TriggerTime
|
||||
} else {
|
||||
p.pendings.Set(event.Hash, event)
|
||||
preTriggerTime = event.TriggerTime
|
||||
}
|
||||
|
||||
if event.LastEvalTime-preTriggerTime+int64(event.PromEvalInterval) >= int64(p.rule.PromForDuration) {
|
||||
fireEvents = append(fireEvents, event)
|
||||
if severity > event.Severity {
|
||||
severity = event.Severity
|
||||
}
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
p.inhibitEvent(fireEvents, severity)
|
||||
}
|
||||
|
||||
func (p *Processor) inhibitEvent(events []*models.AlertCurEvent, highSeverity int) {
|
||||
for _, event := range events {
|
||||
if p.inhibit && event.Severity > highSeverity {
|
||||
logger.Debugf("rule_eval:%s event:%+v inhibit highSeverity:%d", p.Key(), event, highSeverity)
|
||||
continue
|
||||
}
|
||||
p.fireEvent(event)
|
||||
}
|
||||
}
|
||||
|
||||
func (p *Processor) fireEvent(event *models.AlertCurEvent) {
|
||||
// As p.rule maybe outdated, use rule from cache
|
||||
cachedRule := p.rule
|
||||
if cachedRule == nil {
|
||||
return
|
||||
}
|
||||
logger.Debugf("rule_eval:%s event:%+v fire", p.Key(), event)
|
||||
if fired, has := p.fires.Get(event.Hash); has {
|
||||
p.fires.UpdateLastEvalTime(event.Hash, event.LastEvalTime)
|
||||
|
||||
if cachedRule.NotifyRepeatStep == 0 {
|
||||
logger.Debugf("rule_eval:%s event:%+v repeat is zero nothing to do", p.Key(), event)
|
||||
// 说明不想重复通知,那就直接返回了,nothing to do
|
||||
// do not need to send alert again
|
||||
return
|
||||
}
|
||||
|
||||
// 之前发送过告警了,这次是否要继续发送,要看是否过了通道静默时间
|
||||
if event.LastEvalTime > fired.LastSentTime+int64(cachedRule.NotifyRepeatStep)*60 {
|
||||
if cachedRule.NotifyMaxNumber == 0 {
|
||||
// 最大可以发送次数如果是0,表示不想限制最大发送次数,一直发即可
|
||||
event.NotifyCurNumber = fired.NotifyCurNumber + 1
|
||||
event.FirstTriggerTime = fired.FirstTriggerTime
|
||||
p.pushEventToQueue(event)
|
||||
} else {
|
||||
// 有最大发送次数的限制,就要看已经发了几次了,是否达到了最大发送次数
|
||||
if fired.NotifyCurNumber >= cachedRule.NotifyMaxNumber {
|
||||
logger.Debugf("rule_eval:%s event:%+v reach max number", p.Key(), event)
|
||||
return
|
||||
} else {
|
||||
event.NotifyCurNumber = fired.NotifyCurNumber + 1
|
||||
event.FirstTriggerTime = fired.FirstTriggerTime
|
||||
p.pushEventToQueue(event)
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
event.NotifyCurNumber = 1
|
||||
event.FirstTriggerTime = event.TriggerTime
|
||||
p.pushEventToQueue(event)
|
||||
}
|
||||
}
|
||||
|
||||
func (p *Processor) pushEventToQueue(e *models.AlertCurEvent) {
|
||||
if !e.IsRecovered {
|
||||
e.LastSentTime = e.LastEvalTime
|
||||
p.fires.Set(e.Hash, e)
|
||||
}
|
||||
|
||||
p.stats.CounterAlertsTotal.WithLabelValues(fmt.Sprintf("%d", e.DatasourceId)).Inc()
|
||||
dispatch.LogEvent(e, "push_queue")
|
||||
if !queue.EventQueue.PushFront(e) {
|
||||
logger.Warningf("event_push_queue: queue is full, event:%+v", e)
|
||||
}
|
||||
}
|
||||
|
||||
func (p *Processor) RecoverAlertCurEventFromDb() {
|
||||
p.pendings = NewAlertCurEventMap(nil)
|
||||
|
||||
curEvents, err := models.AlertCurEventGetByRuleIdAndCluster(p.ctx, p.rule.Id, p.datasourceId)
|
||||
if err != nil {
|
||||
logger.Errorf("recover event from db for rule:%s failed, err:%s", p.Key(), err)
|
||||
p.fires = NewAlertCurEventMap(nil)
|
||||
return
|
||||
}
|
||||
|
||||
fireMap := make(map[string]*models.AlertCurEvent)
|
||||
for _, event := range curEvents {
|
||||
event.DB2Mem()
|
||||
fireMap[event.Hash] = event
|
||||
}
|
||||
|
||||
p.fires = NewAlertCurEventMap(fireMap)
|
||||
}
|
||||
|
||||
func (p *Processor) fillTags(anomalyPoint common.AnomalyPoint) {
|
||||
// handle series tags
|
||||
tagsMap := make(map[string]string)
|
||||
for label, value := range anomalyPoint.Labels {
|
||||
tagsMap[string(label)] = string(value)
|
||||
}
|
||||
|
||||
var e = &models.AlertCurEvent{
|
||||
TagsMap: tagsMap,
|
||||
}
|
||||
|
||||
// handle rule tags
|
||||
for _, tag := range p.rule.AppendTagsJSON {
|
||||
arr := strings.SplitN(tag, "=", 2)
|
||||
|
||||
var defs = []string{
|
||||
"{{$labels := .TagsMap}}",
|
||||
"{{$value := .TriggerValue}}",
|
||||
}
|
||||
tagValue := arr[1]
|
||||
text := strings.Join(append(defs, tagValue), "")
|
||||
t, err := template.New(fmt.Sprint(p.rule.Id)).Funcs(template.FuncMap(tplx.TemplateFuncMap)).Parse(text)
|
||||
if err != nil {
|
||||
tagValue = fmt.Sprintf("parse tag value failed, err:%s", err)
|
||||
tagsMap[arr[0]] = tagValue
|
||||
continue
|
||||
}
|
||||
|
||||
var body bytes.Buffer
|
||||
err = t.Execute(&body, e)
|
||||
if err != nil {
|
||||
tagValue = fmt.Sprintf("parse tag value failed, err:%s", err)
|
||||
tagsMap[arr[0]] = tagValue
|
||||
continue
|
||||
}
|
||||
|
||||
tagsMap[arr[0]] = body.String()
|
||||
}
|
||||
|
||||
tagsMap["rulename"] = p.rule.Name
|
||||
p.tagsMap = tagsMap
|
||||
|
||||
// handle tagsArr
|
||||
p.tagsArr = labelMapToArr(tagsMap)
|
||||
}
|
||||
|
||||
func (p *Processor) mayHandleIdent() {
|
||||
// handle ident
|
||||
if ident, has := p.tagsMap["ident"]; has {
|
||||
if target, exists := p.TargetCache.Get(ident); exists {
|
||||
p.target = target.Ident
|
||||
p.targetNote = target.Note
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (p *Processor) mayHandleGroup() {
|
||||
// handle bg
|
||||
bg := p.BusiGroupCache.GetByBusiGroupId(p.rule.GroupId)
|
||||
if bg != nil {
|
||||
p.groupName = bg.Name
|
||||
}
|
||||
}
|
||||
|
||||
func labelMapToArr(m map[string]string) []string {
|
||||
numLabels := len(m)
|
||||
|
||||
labelStrings := make([]string, 0, numLabels)
|
||||
for label, value := range m {
|
||||
labelStrings = append(labelStrings, fmt.Sprintf("%s=%s", label, value))
|
||||
}
|
||||
|
||||
if numLabels > 1 {
|
||||
sort.Strings(labelStrings)
|
||||
}
|
||||
return labelStrings
|
||||
}
|
||||
|
||||
func Hash(ruleId, datasourceId int64, vector common.AnomalyPoint) string {
|
||||
return str.MD5(fmt.Sprintf("%d_%s_%d_%d", ruleId, vector.Labels.String(), datasourceId, vector.Severity))
|
||||
}
|
||||
|
||||
func TagHash(vector common.AnomalyPoint) string {
|
||||
return str.MD5(vector.Labels.String())
|
||||
}
|
||||
@@ -1,18 +0,0 @@
|
||||
package queue
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/astats"
|
||||
"github.com/toolkits/pkg/container/list"
|
||||
)
|
||||
|
||||
var EventQueue = list.NewSafeListLimited(10000000)
|
||||
|
||||
func ReportQueueSize(stats *astats.Stats) {
|
||||
for {
|
||||
time.Sleep(time.Second)
|
||||
|
||||
stats.GaugeAlertQueueSize.Set(float64(EventQueue.Len()))
|
||||
}
|
||||
}
|
||||
@@ -1,94 +0,0 @@
|
||||
package record
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/aconf"
|
||||
"github.com/ccfos/nightingale/v6/alert/astats"
|
||||
"github.com/ccfos/nightingale/v6/alert/naming"
|
||||
"github.com/ccfos/nightingale/v6/memsto"
|
||||
"github.com/ccfos/nightingale/v6/prom"
|
||||
"github.com/ccfos/nightingale/v6/pushgw/writer"
|
||||
)
|
||||
|
||||
type Scheduler struct {
|
||||
// key: hash
|
||||
recordRules map[string]*RecordRuleContext
|
||||
|
||||
aconf aconf.Alert
|
||||
|
||||
recordingRuleCache *memsto.RecordingRuleCacheType
|
||||
|
||||
promClients *prom.PromClientMap
|
||||
writers *writer.WritersType
|
||||
|
||||
stats *astats.Stats
|
||||
}
|
||||
|
||||
func NewScheduler(aconf aconf.Alert, rrc *memsto.RecordingRuleCacheType, promClients *prom.PromClientMap, writers *writer.WritersType, stats *astats.Stats) *Scheduler {
|
||||
scheduler := &Scheduler{
|
||||
aconf: aconf,
|
||||
recordRules: make(map[string]*RecordRuleContext),
|
||||
|
||||
recordingRuleCache: rrc,
|
||||
|
||||
promClients: promClients,
|
||||
writers: writers,
|
||||
|
||||
stats: stats,
|
||||
}
|
||||
|
||||
go scheduler.LoopSyncRules(context.Background())
|
||||
return scheduler
|
||||
}
|
||||
|
||||
func (s *Scheduler) LoopSyncRules(ctx context.Context) {
|
||||
time.Sleep(time.Duration(s.aconf.EngineDelay) * time.Second)
|
||||
duration := 9000 * time.Millisecond
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-time.After(duration):
|
||||
s.syncRecordRules()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Scheduler) syncRecordRules() {
|
||||
ids := s.recordingRuleCache.GetRuleIds()
|
||||
recordRules := make(map[string]*RecordRuleContext)
|
||||
for _, id := range ids {
|
||||
rule := s.recordingRuleCache.Get(id)
|
||||
if rule == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
datasourceIds := s.promClients.Hit(rule.DatasourceIdsJson)
|
||||
for _, dsId := range datasourceIds {
|
||||
if !naming.DatasourceHashRing.IsHit(dsId, fmt.Sprintf("%d", rule.Id), s.aconf.Heartbeat.Endpoint) {
|
||||
continue
|
||||
}
|
||||
|
||||
recordRule := NewRecordRuleContext(rule, dsId, s.promClients, s.writers)
|
||||
recordRules[recordRule.Hash()] = recordRule
|
||||
}
|
||||
}
|
||||
|
||||
for hash, rule := range recordRules {
|
||||
if _, has := s.recordRules[hash]; !has {
|
||||
rule.Prepare()
|
||||
rule.Start()
|
||||
s.recordRules[hash] = rule
|
||||
}
|
||||
}
|
||||
|
||||
for hash, rule := range s.recordRules {
|
||||
if _, has := recordRules[hash]; !has {
|
||||
rule.Stop()
|
||||
delete(s.recordRules, hash)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,78 +0,0 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/aconf"
|
||||
"github.com/ccfos/nightingale/v6/alert/astats"
|
||||
"github.com/ccfos/nightingale/v6/alert/process"
|
||||
"github.com/ccfos/nightingale/v6/memsto"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/httpx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
type Router struct {
|
||||
HTTP httpx.Config
|
||||
Alert aconf.Alert
|
||||
AlertMuteCache *memsto.AlertMuteCacheType
|
||||
TargetCache *memsto.TargetCacheType
|
||||
BusiGroupCache *memsto.BusiGroupCacheType
|
||||
AlertStats *astats.Stats
|
||||
Ctx *ctx.Context
|
||||
ExternalProcessors *process.ExternalProcessorsType
|
||||
}
|
||||
|
||||
func New(httpConfig httpx.Config, alert aconf.Alert, amc *memsto.AlertMuteCacheType, tc *memsto.TargetCacheType, bgc *memsto.BusiGroupCacheType,
|
||||
astats *astats.Stats, ctx *ctx.Context, externalProcessors *process.ExternalProcessorsType) *Router {
|
||||
return &Router{
|
||||
HTTP: httpConfig,
|
||||
Alert: alert,
|
||||
AlertMuteCache: amc,
|
||||
TargetCache: tc,
|
||||
BusiGroupCache: bgc,
|
||||
AlertStats: astats,
|
||||
Ctx: ctx,
|
||||
ExternalProcessors: externalProcessors,
|
||||
}
|
||||
}
|
||||
|
||||
func (rt *Router) Config(r *gin.Engine) {
|
||||
if !rt.HTTP.Alert.Enable {
|
||||
return
|
||||
}
|
||||
|
||||
service := r.Group("/v1/n9e")
|
||||
if len(rt.HTTP.Alert.BasicAuth) > 0 {
|
||||
service.Use(gin.BasicAuth(rt.HTTP.Alert.BasicAuth))
|
||||
}
|
||||
service.POST("/event", rt.pushEventToQueue)
|
||||
service.POST("/make-event", rt.makeEvent)
|
||||
}
|
||||
|
||||
func Render(c *gin.Context, data, msg interface{}) {
|
||||
if msg == nil {
|
||||
if data == nil {
|
||||
data = struct{}{}
|
||||
}
|
||||
c.JSON(http.StatusOK, gin.H{"data": data, "error": ""})
|
||||
} else {
|
||||
c.JSON(http.StatusOK, gin.H{"error": gin.H{"message": msg}})
|
||||
}
|
||||
}
|
||||
|
||||
func Dangerous(c *gin.Context, v interface{}, code ...int) {
|
||||
if v == nil {
|
||||
return
|
||||
}
|
||||
|
||||
switch t := v.(type) {
|
||||
case string:
|
||||
if t != "" {
|
||||
c.JSON(http.StatusOK, gin.H{"error": gin.H{"message": v}})
|
||||
}
|
||||
case error:
|
||||
c.JSON(http.StatusOK, gin.H{"error": gin.H{"message": t.Error()}})
|
||||
}
|
||||
}
|
||||
@@ -1,97 +0,0 @@
|
||||
package sender
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"os"
|
||||
"os/exec"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
|
||||
"github.com/toolkits/pkg/file"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
"github.com/toolkits/pkg/sys"
|
||||
)
|
||||
|
||||
func MayPluginNotify(noticeBytes []byte, notifyScript models.NotifyScript) {
|
||||
if len(noticeBytes) == 0 {
|
||||
return
|
||||
}
|
||||
alertingCallScript(noticeBytes, notifyScript)
|
||||
}
|
||||
|
||||
func alertingCallScript(stdinBytes []byte, notifyScript models.NotifyScript) {
|
||||
// not enable or no notify.py? do nothing
|
||||
config := notifyScript
|
||||
if !config.Enable || config.Content == "" {
|
||||
return
|
||||
}
|
||||
|
||||
fpath := ".notify_scriptt"
|
||||
if config.Type == 1 {
|
||||
fpath = config.Content
|
||||
} else {
|
||||
rewrite := true
|
||||
if file.IsExist(fpath) {
|
||||
oldContent, err := file.ToString(fpath)
|
||||
if err != nil {
|
||||
logger.Errorf("event_notify: read script file err: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
if oldContent == config.Content {
|
||||
rewrite = false
|
||||
}
|
||||
}
|
||||
|
||||
if rewrite {
|
||||
_, err := file.WriteString(fpath, config.Content)
|
||||
if err != nil {
|
||||
logger.Errorf("event_notify: write script file err: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
err = os.Chmod(fpath, 0777)
|
||||
if err != nil {
|
||||
logger.Errorf("event_notify: chmod script file err: %v", err)
|
||||
return
|
||||
}
|
||||
}
|
||||
fpath = "./" + fpath
|
||||
}
|
||||
|
||||
cmd := exec.Command(fpath)
|
||||
cmd.Stdin = bytes.NewReader(stdinBytes)
|
||||
|
||||
// combine stdout and stderr
|
||||
var buf bytes.Buffer
|
||||
cmd.Stdout = &buf
|
||||
cmd.Stderr = &buf
|
||||
|
||||
err := startCmd(cmd)
|
||||
if err != nil {
|
||||
logger.Errorf("event_notify: run cmd err: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
err, isTimeout := sys.WrapTimeout(cmd, time.Duration(config.Timeout)*time.Second)
|
||||
|
||||
if isTimeout {
|
||||
if err == nil {
|
||||
logger.Errorf("event_notify: timeout and killed process %s", fpath)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
logger.Errorf("event_notify: kill process %s occur error %v", fpath, err)
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
logger.Errorf("event_notify: exec script %s occur error: %v, output: %s", fpath, err, buf.String())
|
||||
return
|
||||
}
|
||||
|
||||
logger.Infof("event_notify: exec %s output: %s", fpath, buf.String())
|
||||
}
|
||||
@@ -1,62 +0,0 @@
|
||||
package sender
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"html/template"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/aconf"
|
||||
"github.com/ccfos/nightingale/v6/memsto"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
)
|
||||
|
||||
type (
|
||||
// Sender 发送消息通知的接口
|
||||
Sender interface {
|
||||
Send(ctx MessageContext)
|
||||
}
|
||||
|
||||
// MessageContext 一个event所生成的告警通知的上下文
|
||||
MessageContext struct {
|
||||
Users []*models.User
|
||||
Rule *models.AlertRule
|
||||
Event *models.AlertCurEvent
|
||||
}
|
||||
)
|
||||
|
||||
func NewSender(key string, tpls map[string]*template.Template, smtp aconf.SMTPConfig) Sender {
|
||||
switch key {
|
||||
case models.Dingtalk:
|
||||
return &DingtalkSender{tpl: tpls[models.Dingtalk]}
|
||||
case models.Wecom:
|
||||
return &WecomSender{tpl: tpls[models.Wecom]}
|
||||
case models.Feishu:
|
||||
return &FeishuSender{tpl: tpls[models.Feishu]}
|
||||
case models.Email:
|
||||
return &EmailSender{subjectTpl: tpls["mailsubject"], contentTpl: tpls[models.Email], smtp: smtp}
|
||||
case models.Mm:
|
||||
return &MmSender{tpl: tpls[models.Mm]}
|
||||
case models.Telegram:
|
||||
return &TelegramSender{tpl: tpls[models.Telegram]}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func BuildMessageContext(rule *models.AlertRule, event *models.AlertCurEvent, uids []int64, userCache *memsto.UserCacheType) MessageContext {
|
||||
users := userCache.GetByUserIds(uids)
|
||||
return MessageContext{
|
||||
Rule: rule,
|
||||
Event: event,
|
||||
Users: users,
|
||||
}
|
||||
}
|
||||
|
||||
func BuildTplMessage(tpl *template.Template, event *models.AlertCurEvent) string {
|
||||
if tpl == nil {
|
||||
return "tpl for current sender not found, please check configuration"
|
||||
}
|
||||
var body bytes.Buffer
|
||||
if err := tpl.Execute(&body, event); err != nil {
|
||||
return err.Error()
|
||||
}
|
||||
return body.String()
|
||||
}
|
||||
@@ -1,35 +0,0 @@
|
||||
package cconf
|
||||
|
||||
import (
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
type Center struct {
|
||||
Plugins []Plugin
|
||||
BasicAuth gin.Accounts
|
||||
MetricsYamlFile string
|
||||
OpsYamlFile string
|
||||
BuiltinIntegrationsDir string
|
||||
I18NHeaderKey string
|
||||
MetricDesc MetricDescType
|
||||
TargetMetrics map[string]string
|
||||
AnonymousAccess AnonymousAccess
|
||||
}
|
||||
|
||||
type Plugin struct {
|
||||
Id int64 `json:"id"`
|
||||
Category string `json:"category"`
|
||||
Type string `json:"plugin_type"`
|
||||
TypeName string `json:"plugin_type_name"`
|
||||
}
|
||||
|
||||
type AnonymousAccess struct {
|
||||
PromQuerier bool
|
||||
AlertDetail bool
|
||||
}
|
||||
|
||||
func (c *Center) PreCheck() {
|
||||
if len(c.Plugins) == 0 {
|
||||
c.Plugins = Plugins
|
||||
}
|
||||
}
|
||||
@@ -1,60 +0,0 @@
|
||||
package cconf
|
||||
|
||||
const EVENT_EXAMPLE = `
|
||||
{
|
||||
"id": 1000000,
|
||||
"cate": "prometheus",
|
||||
"datasource_id": 1,
|
||||
"group_id": 1,
|
||||
"group_name": "Default Busi Group",
|
||||
"hash": "2cb966f9ba1cdc7af94c3796e855955a",
|
||||
"rule_id": 23,
|
||||
"rule_name": "测试告警",
|
||||
"rule_note": "测试告警",
|
||||
"rule_prod": "metric",
|
||||
"rule_config": {
|
||||
"queries": [
|
||||
{
|
||||
"key": "all_hosts",
|
||||
"op": "==",
|
||||
"values": []
|
||||
}
|
||||
],
|
||||
"triggers": [
|
||||
{
|
||||
"duration": 3,
|
||||
"percent": 10,
|
||||
"severity": 3,
|
||||
"type": "pct_target_miss"
|
||||
}
|
||||
]
|
||||
},
|
||||
"prom_for_duration": 60,
|
||||
"prom_eval_interval": 30,
|
||||
"callbacks": ["https://n9e.github.io"],
|
||||
"notify_recovered": 1,
|
||||
"notify_channels": ["dingtalk"],
|
||||
"notify_groups": [],
|
||||
"notify_groups_obj": null,
|
||||
"target_ident": "host01",
|
||||
"target_note": "机器备注",
|
||||
"trigger_time": 1677229517,
|
||||
"trigger_value": "2273533952",
|
||||
"tags": [
|
||||
"__name__=disk_free",
|
||||
"dc=qcloud-dev",
|
||||
"device=vda1",
|
||||
"fstype=ext4",
|
||||
"ident=tt-fc-dev00.nj"
|
||||
],
|
||||
"is_recovered": false,
|
||||
"notify_users_obj": null,
|
||||
"last_eval_time": 1677229517,
|
||||
"last_sent_time": 1677229517,
|
||||
"notify_cur_number": 1,
|
||||
"first_trigger_time": 1677229517,
|
||||
"annotations": {
|
||||
"summary": "测试告警"
|
||||
}
|
||||
}
|
||||
`
|
||||
@@ -1,39 +0,0 @@
|
||||
package cconf
|
||||
|
||||
import (
|
||||
"path"
|
||||
|
||||
"github.com/toolkits/pkg/file"
|
||||
"github.com/toolkits/pkg/runner"
|
||||
)
|
||||
|
||||
var Operations = Operation{}
|
||||
|
||||
type Operation struct {
|
||||
Ops []Ops `yaml:"ops"`
|
||||
}
|
||||
|
||||
type Ops struct {
|
||||
Name string `yaml:"name" json:"name"`
|
||||
Cname string `yaml:"cname" json:"cname"`
|
||||
Ops []string `yaml:"ops" json:"ops"`
|
||||
}
|
||||
|
||||
func LoadOpsYaml(opsYamlFile string) error {
|
||||
fp := opsYamlFile
|
||||
if fp == "" {
|
||||
fp = path.Join(runner.Cwd, "etc", "ops.yaml")
|
||||
}
|
||||
if !file.IsExist(fp) {
|
||||
return nil
|
||||
}
|
||||
return file.ReadYaml(fp, &Operations)
|
||||
}
|
||||
|
||||
func GetAllOps(ops []Ops) []string {
|
||||
var ret []string
|
||||
for _, op := range ops {
|
||||
ret = append(ret, op.Ops...)
|
||||
}
|
||||
return ret
|
||||
}
|
||||
@@ -1,22 +0,0 @@
|
||||
package cconf
|
||||
|
||||
var Plugins = []Plugin{
|
||||
{
|
||||
Id: 1,
|
||||
Category: "timeseries",
|
||||
Type: "prometheus",
|
||||
TypeName: "Prometheus Like",
|
||||
},
|
||||
{
|
||||
Id: 2,
|
||||
Category: "logging",
|
||||
Type: "elasticsearch",
|
||||
TypeName: "Elasticsearch",
|
||||
},
|
||||
{
|
||||
Id: 3,
|
||||
Category: "logging",
|
||||
Type: "jaeger",
|
||||
TypeName: "Jaeger",
|
||||
},
|
||||
}
|
||||
@@ -1,96 +0,0 @@
|
||||
package center
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert"
|
||||
"github.com/ccfos/nightingale/v6/alert/astats"
|
||||
"github.com/ccfos/nightingale/v6/alert/process"
|
||||
"github.com/ccfos/nightingale/v6/center/cconf"
|
||||
"github.com/ccfos/nightingale/v6/center/metas"
|
||||
"github.com/ccfos/nightingale/v6/center/sso"
|
||||
"github.com/ccfos/nightingale/v6/conf"
|
||||
"github.com/ccfos/nightingale/v6/memsto"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/httpx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/i18nx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/logx"
|
||||
"github.com/ccfos/nightingale/v6/prom"
|
||||
"github.com/ccfos/nightingale/v6/pushgw/idents"
|
||||
"github.com/ccfos/nightingale/v6/pushgw/writer"
|
||||
"github.com/ccfos/nightingale/v6/storage"
|
||||
|
||||
alertrt "github.com/ccfos/nightingale/v6/alert/router"
|
||||
centerrt "github.com/ccfos/nightingale/v6/center/router"
|
||||
pushgwrt "github.com/ccfos/nightingale/v6/pushgw/router"
|
||||
)
|
||||
|
||||
func Initialize(configDir string, cryptoKey string) (func(), error) {
|
||||
config, err := conf.InitConfig(configDir, cryptoKey)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to init config: %v", err)
|
||||
}
|
||||
|
||||
cconf.LoadMetricsYaml(config.Center.MetricsYamlFile)
|
||||
cconf.LoadOpsYaml(config.Center.OpsYamlFile)
|
||||
|
||||
logxClean, err := logx.Init(config.Log)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
i18nx.Init()
|
||||
|
||||
db, err := storage.New(config.DB)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ctx := ctx.NewContext(context.Background(), db)
|
||||
models.InitRoot(ctx)
|
||||
|
||||
redis, err := storage.NewRedis(config.Redis)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
metas := metas.New(redis)
|
||||
idents := idents.New(db)
|
||||
|
||||
syncStats := memsto.NewSyncStats()
|
||||
alertStats := astats.NewSyncStats()
|
||||
|
||||
sso := sso.Init(config.Center, ctx)
|
||||
|
||||
busiGroupCache := memsto.NewBusiGroupCache(ctx, syncStats)
|
||||
targetCache := memsto.NewTargetCache(ctx, syncStats, redis)
|
||||
dsCache := memsto.NewDatasourceCache(ctx, syncStats)
|
||||
alertMuteCache := memsto.NewAlertMuteCache(ctx, syncStats)
|
||||
alertRuleCache := memsto.NewAlertRuleCache(ctx, syncStats)
|
||||
notifyConfigCache := memsto.NewNotifyConfigCache(ctx)
|
||||
|
||||
promClients := prom.NewPromClient(ctx, config.Alert.Heartbeat)
|
||||
|
||||
externalProcessors := process.NewExternalProcessors()
|
||||
alert.Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache, alertRuleCache, notifyConfigCache, dsCache, ctx, promClients, true)
|
||||
|
||||
writers := writer.NewWriters(config.Pushgw)
|
||||
|
||||
alertrtRouter := alertrt.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors)
|
||||
centerRouter := centerrt.New(config.HTTP, config.Center, cconf.Operations, dsCache, notifyConfigCache, promClients, redis, sso, ctx, metas, targetCache)
|
||||
pushgwRouter := pushgwrt.New(config.HTTP, config.Pushgw, targetCache, busiGroupCache, idents, writers, ctx)
|
||||
|
||||
r := httpx.GinEngine(config.Global.RunMode, config.HTTP)
|
||||
|
||||
centerRouter.Config(r)
|
||||
alertrtRouter.Config(r)
|
||||
pushgwRouter.Config(r)
|
||||
|
||||
httpClean := httpx.Init(config.HTTP, r)
|
||||
|
||||
return func() {
|
||||
logxClean()
|
||||
httpClean()
|
||||
}, nil
|
||||
}
|
||||
@@ -1,104 +0,0 @@
|
||||
package metas
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/storage"
|
||||
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
type Set struct {
|
||||
sync.RWMutex
|
||||
items map[string]models.HostMeta
|
||||
redis storage.Redis
|
||||
}
|
||||
|
||||
func New(redis storage.Redis) *Set {
|
||||
set := &Set{
|
||||
items: make(map[string]models.HostMeta),
|
||||
redis: redis,
|
||||
}
|
||||
|
||||
set.Init()
|
||||
return set
|
||||
}
|
||||
|
||||
func (s *Set) Init() {
|
||||
go s.LoopPersist()
|
||||
}
|
||||
|
||||
func (s *Set) MSet(items map[string]models.HostMeta) {
|
||||
s.Lock()
|
||||
defer s.Unlock()
|
||||
for ident, meta := range items {
|
||||
s.items[ident] = meta
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Set) Set(ident string, meta models.HostMeta) {
|
||||
s.Lock()
|
||||
defer s.Unlock()
|
||||
s.items[ident] = meta
|
||||
}
|
||||
|
||||
func (s *Set) LoopPersist() {
|
||||
for {
|
||||
time.Sleep(time.Second)
|
||||
s.persist()
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Set) persist() {
|
||||
var items map[string]models.HostMeta
|
||||
|
||||
s.Lock()
|
||||
if len(s.items) == 0 {
|
||||
s.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
items = s.items
|
||||
s.items = make(map[string]models.HostMeta)
|
||||
s.Unlock()
|
||||
|
||||
s.updateMeta(items)
|
||||
}
|
||||
|
||||
func (s *Set) updateMeta(items map[string]models.HostMeta) {
|
||||
m := make(map[string]models.HostMeta, 100)
|
||||
num := 0
|
||||
|
||||
for _, meta := range items {
|
||||
m[meta.Hostname] = meta
|
||||
num++
|
||||
if num == 100 {
|
||||
if err := s.updateTargets(m); err != nil {
|
||||
logger.Errorf("failed to update targets: %v", err)
|
||||
}
|
||||
m = make(map[string]models.HostMeta, 100)
|
||||
num = 0
|
||||
}
|
||||
}
|
||||
|
||||
if err := s.updateTargets(m); err != nil {
|
||||
logger.Errorf("failed to update targets: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Set) updateTargets(m map[string]models.HostMeta) error {
|
||||
count := int64(len(m))
|
||||
if count == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
newMap := make(map[string]interface{}, count)
|
||||
for ident, meta := range m {
|
||||
newMap[models.WrapIdent(ident)] = meta
|
||||
}
|
||||
err := storage.MSet(context.Background(), s.redis, newMap)
|
||||
return err
|
||||
}
|
||||
@@ -1,410 +0,0 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"path"
|
||||
"runtime"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/center/cconf"
|
||||
"github.com/ccfos/nightingale/v6/center/cstats"
|
||||
"github.com/ccfos/nightingale/v6/center/metas"
|
||||
"github.com/ccfos/nightingale/v6/center/sso"
|
||||
"github.com/ccfos/nightingale/v6/memsto"
|
||||
"github.com/ccfos/nightingale/v6/pkg/aop"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/httpx"
|
||||
"github.com/ccfos/nightingale/v6/prom"
|
||||
"github.com/ccfos/nightingale/v6/storage"
|
||||
"github.com/toolkits/pkg/runner"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
type Router struct {
|
||||
HTTP httpx.Config
|
||||
Center cconf.Center
|
||||
Operations cconf.Operation
|
||||
DatasourceCache *memsto.DatasourceCacheType
|
||||
NotifyConfigCache *memsto.NotifyConfigCacheType
|
||||
PromClients *prom.PromClientMap
|
||||
Redis storage.Redis
|
||||
MetaSet *metas.Set
|
||||
TargetCache *memsto.TargetCacheType
|
||||
Sso *sso.SsoClient
|
||||
Ctx *ctx.Context
|
||||
}
|
||||
|
||||
func New(httpConfig httpx.Config, center cconf.Center, operations cconf.Operation, ds *memsto.DatasourceCacheType, ncc *memsto.NotifyConfigCacheType,
|
||||
pc *prom.PromClientMap, redis storage.Redis, sso *sso.SsoClient, ctx *ctx.Context, metaSet *metas.Set, tc *memsto.TargetCacheType) *Router {
|
||||
return &Router{
|
||||
HTTP: httpConfig,
|
||||
Center: center,
|
||||
Operations: operations,
|
||||
DatasourceCache: ds,
|
||||
NotifyConfigCache: ncc,
|
||||
PromClients: pc,
|
||||
Redis: redis,
|
||||
MetaSet: metaSet,
|
||||
TargetCache: tc,
|
||||
Sso: sso,
|
||||
Ctx: ctx,
|
||||
}
|
||||
}
|
||||
|
||||
func stat() gin.HandlerFunc {
|
||||
return func(c *gin.Context) {
|
||||
start := time.Now()
|
||||
c.Next()
|
||||
|
||||
code := fmt.Sprintf("%d", c.Writer.Status())
|
||||
method := c.Request.Method
|
||||
labels := []string{cstats.Service, code, c.FullPath(), method}
|
||||
|
||||
cstats.RequestCounter.WithLabelValues(labels...).Inc()
|
||||
cstats.RequestDuration.WithLabelValues(labels...).Observe(float64(time.Since(start).Seconds()))
|
||||
}
|
||||
}
|
||||
|
||||
func languageDetector(i18NHeaderKey string) gin.HandlerFunc {
|
||||
headerKey := i18NHeaderKey
|
||||
return func(c *gin.Context) {
|
||||
if headerKey != "" {
|
||||
lang := c.GetHeader(headerKey)
|
||||
if lang != "" {
|
||||
if strings.HasPrefix(lang, "zh") {
|
||||
c.Request.Header.Set("X-Language", "zh")
|
||||
} else if strings.HasPrefix(lang, "en") {
|
||||
c.Request.Header.Set("X-Language", "en")
|
||||
} else {
|
||||
c.Request.Header.Set("X-Language", lang)
|
||||
}
|
||||
} else {
|
||||
c.Request.Header.Set("X-Language", "en")
|
||||
}
|
||||
}
|
||||
c.Next()
|
||||
}
|
||||
}
|
||||
|
||||
func (rt *Router) configNoRoute(r *gin.Engine) {
|
||||
r.NoRoute(func(c *gin.Context) {
|
||||
arr := strings.Split(c.Request.URL.Path, ".")
|
||||
suffix := arr[len(arr)-1]
|
||||
switch suffix {
|
||||
case "png", "jpeg", "jpg", "svg", "ico", "gif", "css", "js", "html", "htm", "gz", "zip", "map":
|
||||
cwdarr := []string{"/"}
|
||||
if runtime.GOOS == "windows" {
|
||||
cwdarr[0] = ""
|
||||
}
|
||||
cwdarr = append(cwdarr, strings.Split(runner.Cwd, "/")...)
|
||||
cwdarr = append(cwdarr, "pub")
|
||||
cwdarr = append(cwdarr, strings.Split(c.Request.URL.Path, "/")...)
|
||||
c.File(path.Join(cwdarr...))
|
||||
default:
|
||||
cwdarr := []string{"/"}
|
||||
if runtime.GOOS == "windows" {
|
||||
cwdarr[0] = ""
|
||||
}
|
||||
cwdarr = append(cwdarr, strings.Split(runner.Cwd, "/")...)
|
||||
cwdarr = append(cwdarr, "pub")
|
||||
cwdarr = append(cwdarr, "index.html")
|
||||
c.File(path.Join(cwdarr...))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func (rt *Router) Config(r *gin.Engine) {
|
||||
r.Use(stat())
|
||||
r.Use(languageDetector(rt.Center.I18NHeaderKey))
|
||||
r.Use(aop.Recovery())
|
||||
|
||||
pagesPrefix := "/api/n9e"
|
||||
pages := r.Group(pagesPrefix)
|
||||
{
|
||||
|
||||
if rt.Center.AnonymousAccess.PromQuerier {
|
||||
pages.Any("/proxy/:id/*url", rt.dsProxy)
|
||||
pages.POST("/query-range-batch", rt.promBatchQueryRange)
|
||||
pages.POST("/query-instant-batch", rt.promBatchQueryInstant)
|
||||
pages.GET("/datasource/brief", rt.datasourceBriefs)
|
||||
} else {
|
||||
pages.Any("/proxy/:id/*url", rt.auth(), rt.dsProxy)
|
||||
pages.POST("/query-range-batch", rt.auth(), rt.promBatchQueryRange)
|
||||
pages.POST("/query-instant-batch", rt.auth(), rt.promBatchQueryInstant)
|
||||
pages.GET("/datasource/brief", rt.auth(), rt.datasourceBriefs)
|
||||
}
|
||||
|
||||
pages.POST("/auth/login", rt.jwtMock(), rt.loginPost)
|
||||
pages.POST("/auth/logout", rt.jwtMock(), rt.logoutPost)
|
||||
pages.POST("/auth/refresh", rt.jwtMock(), rt.refreshPost)
|
||||
|
||||
pages.GET("/auth/sso-config", rt.ssoConfigNameGet)
|
||||
pages.GET("/auth/redirect", rt.loginRedirect)
|
||||
pages.GET("/auth/redirect/cas", rt.loginRedirectCas)
|
||||
pages.GET("/auth/redirect/oauth", rt.loginRedirectOAuth)
|
||||
pages.GET("/auth/callback", rt.loginCallback)
|
||||
pages.GET("/auth/callback/cas", rt.loginCallbackCas)
|
||||
pages.GET("/auth/callback/oauth", rt.loginCallbackOAuth)
|
||||
|
||||
pages.GET("/metrics/desc", rt.metricsDescGetFile)
|
||||
pages.POST("/metrics/desc", rt.metricsDescGetMap)
|
||||
|
||||
pages.GET("/notify-channels", rt.notifyChannelsGets)
|
||||
pages.GET("/contact-keys", rt.contactKeysGets)
|
||||
|
||||
pages.GET("/self/perms", rt.auth(), rt.user(), rt.permsGets)
|
||||
pages.GET("/self/profile", rt.auth(), rt.user(), rt.selfProfileGet)
|
||||
pages.PUT("/self/profile", rt.auth(), rt.user(), rt.selfProfilePut)
|
||||
pages.PUT("/self/password", rt.auth(), rt.user(), rt.selfPasswordPut)
|
||||
|
||||
pages.GET("/users", rt.auth(), rt.user(), rt.perm("/users"), rt.userGets)
|
||||
pages.POST("/users", rt.auth(), rt.admin(), rt.userAddPost)
|
||||
pages.GET("/user/:id/profile", rt.auth(), rt.userProfileGet)
|
||||
pages.PUT("/user/:id/profile", rt.auth(), rt.admin(), rt.userProfilePut)
|
||||
pages.PUT("/user/:id/password", rt.auth(), rt.admin(), rt.userPasswordPut)
|
||||
pages.DELETE("/user/:id", rt.auth(), rt.admin(), rt.userDel)
|
||||
|
||||
pages.GET("/metric-views", rt.auth(), rt.metricViewGets)
|
||||
pages.DELETE("/metric-views", rt.auth(), rt.user(), rt.metricViewDel)
|
||||
pages.POST("/metric-views", rt.auth(), rt.user(), rt.metricViewAdd)
|
||||
pages.PUT("/metric-views", rt.auth(), rt.user(), rt.metricViewPut)
|
||||
|
||||
pages.GET("/user-groups", rt.auth(), rt.user(), rt.userGroupGets)
|
||||
pages.POST("/user-groups", rt.auth(), rt.user(), rt.perm("/user-groups/add"), rt.userGroupAdd)
|
||||
pages.GET("/user-group/:id", rt.auth(), rt.user(), rt.userGroupGet)
|
||||
pages.PUT("/user-group/:id", rt.auth(), rt.user(), rt.perm("/user-groups/put"), rt.userGroupWrite(), rt.userGroupPut)
|
||||
pages.DELETE("/user-group/:id", rt.auth(), rt.user(), rt.perm("/user-groups/del"), rt.userGroupWrite(), rt.userGroupDel)
|
||||
pages.POST("/user-group/:id/members", rt.auth(), rt.user(), rt.perm("/user-groups/put"), rt.userGroupWrite(), rt.userGroupMemberAdd)
|
||||
pages.DELETE("/user-group/:id/members", rt.auth(), rt.user(), rt.perm("/user-groups/put"), rt.userGroupWrite(), rt.userGroupMemberDel)
|
||||
|
||||
pages.GET("/busi-groups", rt.auth(), rt.user(), rt.busiGroupGets)
|
||||
pages.POST("/busi-groups", rt.auth(), rt.user(), rt.perm("/busi-groups/add"), rt.busiGroupAdd)
|
||||
pages.GET("/busi-groups/alertings", rt.auth(), rt.busiGroupAlertingsGets)
|
||||
pages.GET("/busi-group/:id", rt.auth(), rt.user(), rt.bgro(), rt.busiGroupGet)
|
||||
pages.PUT("/busi-group/:id", rt.auth(), rt.user(), rt.perm("/busi-groups/put"), rt.bgrw(), rt.busiGroupPut)
|
||||
pages.POST("/busi-group/:id/members", rt.auth(), rt.user(), rt.perm("/busi-groups/put"), rt.bgrw(), rt.busiGroupMemberAdd)
|
||||
pages.DELETE("/busi-group/:id/members", rt.auth(), rt.user(), rt.perm("/busi-groups/put"), rt.bgrw(), rt.busiGroupMemberDel)
|
||||
pages.DELETE("/busi-group/:id", rt.auth(), rt.user(), rt.perm("/busi-groups/del"), rt.bgrw(), rt.busiGroupDel)
|
||||
pages.GET("/busi-group/:id/perm/:perm", rt.auth(), rt.user(), rt.checkBusiGroupPerm)
|
||||
|
||||
pages.GET("/targets", rt.auth(), rt.user(), rt.targetGets)
|
||||
pages.POST("/target/list", rt.auth(), rt.user(), rt.targetGetsByHostFilter)
|
||||
pages.DELETE("/targets", rt.auth(), rt.user(), rt.perm("/targets/del"), rt.targetDel)
|
||||
pages.GET("/targets/tags", rt.auth(), rt.user(), rt.targetGetTags)
|
||||
pages.POST("/targets/tags", rt.auth(), rt.user(), rt.perm("/targets/put"), rt.targetBindTagsByFE)
|
||||
pages.DELETE("/targets/tags", rt.auth(), rt.user(), rt.perm("/targets/put"), rt.targetUnbindTagsByFE)
|
||||
pages.PUT("/targets/note", rt.auth(), rt.user(), rt.perm("/targets/put"), rt.targetUpdateNote)
|
||||
pages.PUT("/targets/bgid", rt.auth(), rt.user(), rt.perm("/targets/put"), rt.targetUpdateBgid)
|
||||
|
||||
pages.POST("/builtin-cate-favorite", rt.auth(), rt.user(), rt.builtinCateFavoriteAdd)
|
||||
pages.DELETE("/builtin-cate-favorite/:name", rt.auth(), rt.user(), rt.builtinCateFavoriteDel)
|
||||
|
||||
pages.GET("/builtin-boards", rt.builtinBoardGets)
|
||||
pages.GET("/builtin-board/:name", rt.builtinBoardGet)
|
||||
pages.GET("/dashboards/builtin/list", rt.builtinBoardGets)
|
||||
pages.GET("/builtin-boards-cates", rt.auth(), rt.user(), rt.builtinBoardCateGets)
|
||||
pages.POST("/builtin-boards-detail", rt.auth(), rt.user(), rt.builtinBoardDetailGets)
|
||||
pages.GET("/integrations/icon/:cate/:name", rt.builtinIcon)
|
||||
|
||||
pages.GET("/busi-group/:id/boards", rt.auth(), rt.user(), rt.perm("/dashboards"), rt.bgro(), rt.boardGets)
|
||||
pages.POST("/busi-group/:id/boards", rt.auth(), rt.user(), rt.perm("/dashboards/add"), rt.bgrw(), rt.boardAdd)
|
||||
pages.POST("/busi-group/:id/board/:bid/clone", rt.auth(), rt.user(), rt.perm("/dashboards/add"), rt.bgrw(), rt.boardClone)
|
||||
|
||||
pages.GET("/board/:bid", rt.boardGet)
|
||||
pages.GET("/board/:bid/pure", rt.boardPureGet)
|
||||
pages.PUT("/board/:bid", rt.auth(), rt.user(), rt.perm("/dashboards/put"), rt.boardPut)
|
||||
pages.PUT("/board/:bid/configs", rt.auth(), rt.user(), rt.perm("/dashboards/put"), rt.boardPutConfigs)
|
||||
pages.PUT("/board/:bid/public", rt.auth(), rt.user(), rt.perm("/dashboards/put"), rt.boardPutPublic)
|
||||
pages.DELETE("/boards", rt.auth(), rt.user(), rt.perm("/dashboards/del"), rt.boardDel)
|
||||
|
||||
pages.GET("/share-charts", rt.chartShareGets)
|
||||
pages.POST("/share-charts", rt.auth(), rt.chartShareAdd)
|
||||
|
||||
pages.GET("/alert-rules/builtin/alerts-cates", rt.auth(), rt.user(), rt.builtinAlertCateGets)
|
||||
pages.GET("/alert-rules/builtin/list", rt.auth(), rt.user(), rt.builtinAlertRules)
|
||||
|
||||
pages.GET("/busi-group/:id/alert-rules", rt.auth(), rt.user(), rt.perm("/alert-rules"), rt.alertRuleGets)
|
||||
pages.POST("/busi-group/:id/alert-rules", rt.auth(), rt.user(), rt.perm("/alert-rules/add"), rt.bgrw(), rt.alertRuleAddByFE)
|
||||
pages.POST("/busi-group/:id/alert-rules/import", rt.auth(), rt.user(), rt.perm("/alert-rules/add"), rt.bgrw(), rt.alertRuleAddByImport)
|
||||
pages.DELETE("/busi-group/:id/alert-rules", rt.auth(), rt.user(), rt.perm("/alert-rules/del"), rt.bgrw(), rt.alertRuleDel)
|
||||
pages.PUT("/busi-group/:id/alert-rules/fields", rt.auth(), rt.user(), rt.perm("/alert-rules/put"), rt.bgrw(), rt.alertRulePutFields)
|
||||
pages.PUT("/busi-group/:id/alert-rule/:arid", rt.auth(), rt.user(), rt.perm("/alert-rules/put"), rt.alertRulePutByFE)
|
||||
pages.GET("/alert-rule/:arid", rt.auth(), rt.user(), rt.perm("/alert-rules"), rt.alertRuleGet)
|
||||
|
||||
pages.GET("/busi-group/:id/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules"), rt.recordingRuleGets)
|
||||
pages.POST("/busi-group/:id/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules/add"), rt.bgrw(), rt.recordingRuleAddByFE)
|
||||
pages.DELETE("/busi-group/:id/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules/del"), rt.bgrw(), rt.recordingRuleDel)
|
||||
pages.PUT("/busi-group/:id/recording-rule/:rrid", rt.auth(), rt.user(), rt.perm("/recording-rules/put"), rt.bgrw(), rt.recordingRulePutByFE)
|
||||
pages.GET("/recording-rule/:rrid", rt.auth(), rt.user(), rt.perm("/recording-rules"), rt.recordingRuleGet)
|
||||
pages.PUT("/busi-group/:id/recording-rules/fields", rt.auth(), rt.user(), rt.perm("/recording-rules/put"), rt.recordingRulePutFields)
|
||||
|
||||
pages.GET("/busi-group/:id/alert-mutes", rt.auth(), rt.user(), rt.perm("/alert-mutes"), rt.bgro(), rt.alertMuteGetsByBG)
|
||||
pages.POST("/busi-group/:id/alert-mutes", rt.auth(), rt.user(), rt.perm("/alert-mutes/add"), rt.bgrw(), rt.alertMuteAdd)
|
||||
pages.DELETE("/busi-group/:id/alert-mutes", rt.auth(), rt.user(), rt.perm("/alert-mutes/del"), rt.bgrw(), rt.alertMuteDel)
|
||||
pages.PUT("/busi-group/:id/alert-mute/:amid", rt.auth(), rt.user(), rt.perm("/alert-mutes/put"), rt.alertMutePutByFE)
|
||||
pages.PUT("/busi-group/:id/alert-mutes/fields", rt.auth(), rt.user(), rt.perm("/alert-mutes/put"), rt.bgrw(), rt.alertMutePutFields)
|
||||
|
||||
pages.GET("/busi-group/:id/alert-subscribes", rt.auth(), rt.user(), rt.perm("/alert-subscribes"), rt.bgro(), rt.alertSubscribeGets)
|
||||
pages.GET("/alert-subscribe/:sid", rt.auth(), rt.user(), rt.perm("/alert-subscribes"), rt.alertSubscribeGet)
|
||||
pages.POST("/busi-group/:id/alert-subscribes", rt.auth(), rt.user(), rt.perm("/alert-subscribes/add"), rt.bgrw(), rt.alertSubscribeAdd)
|
||||
pages.PUT("/busi-group/:id/alert-subscribes", rt.auth(), rt.user(), rt.perm("/alert-subscribes/put"), rt.bgrw(), rt.alertSubscribePut)
|
||||
pages.DELETE("/busi-group/:id/alert-subscribes", rt.auth(), rt.user(), rt.perm("/alert-subscribes/del"), rt.bgrw(), rt.alertSubscribeDel)
|
||||
|
||||
if rt.Center.AnonymousAccess.AlertDetail {
|
||||
pages.GET("/alert-cur-event/:eid", rt.alertCurEventGet)
|
||||
pages.GET("/alert-his-event/:eid", rt.alertHisEventGet)
|
||||
} else {
|
||||
pages.GET("/alert-cur-event/:eid", rt.auth(), rt.alertCurEventGet)
|
||||
pages.GET("/alert-his-event/:eid", rt.auth(), rt.alertHisEventGet)
|
||||
}
|
||||
|
||||
// card logic
|
||||
pages.GET("/alert-cur-events/list", rt.auth(), rt.alertCurEventsList)
|
||||
pages.GET("/alert-cur-events/card", rt.auth(), rt.alertCurEventsCard)
|
||||
pages.POST("/alert-cur-events/card/details", rt.auth(), rt.alertCurEventsCardDetails)
|
||||
pages.GET("/alert-his-events/list", rt.auth(), rt.alertHisEventsList)
|
||||
pages.DELETE("/alert-cur-events", rt.auth(), rt.user(), rt.perm("/alert-cur-events/del"), rt.alertCurEventDel)
|
||||
|
||||
pages.GET("/alert-aggr-views", rt.auth(), rt.alertAggrViewGets)
|
||||
pages.DELETE("/alert-aggr-views", rt.auth(), rt.user(), rt.alertAggrViewDel)
|
||||
pages.POST("/alert-aggr-views", rt.auth(), rt.user(), rt.alertAggrViewAdd)
|
||||
pages.PUT("/alert-aggr-views", rt.auth(), rt.user(), rt.alertAggrViewPut)
|
||||
|
||||
pages.GET("/busi-group/:id/task-tpls", rt.auth(), rt.user(), rt.perm("/job-tpls"), rt.bgro(), rt.taskTplGets)
|
||||
pages.POST("/busi-group/:id/task-tpls", rt.auth(), rt.user(), rt.perm("/job-tpls/add"), rt.bgrw(), rt.taskTplAdd)
|
||||
pages.DELETE("/busi-group/:id/task-tpl/:tid", rt.auth(), rt.user(), rt.perm("/job-tpls/del"), rt.bgrw(), rt.taskTplDel)
|
||||
pages.POST("/busi-group/:id/task-tpls/tags", rt.auth(), rt.user(), rt.perm("/job-tpls/put"), rt.bgrw(), rt.taskTplBindTags)
|
||||
pages.DELETE("/busi-group/:id/task-tpls/tags", rt.auth(), rt.user(), rt.perm("/job-tpls/put"), rt.bgrw(), rt.taskTplUnbindTags)
|
||||
pages.GET("/busi-group/:id/task-tpl/:tid", rt.auth(), rt.user(), rt.perm("/job-tpls"), rt.bgro(), rt.taskTplGet)
|
||||
pages.PUT("/busi-group/:id/task-tpl/:tid", rt.auth(), rt.user(), rt.perm("/job-tpls/put"), rt.bgrw(), rt.taskTplPut)
|
||||
|
||||
pages.GET("/busi-group/:id/tasks", rt.auth(), rt.user(), rt.perm("/job-tasks"), rt.bgro(), rt.taskGets)
|
||||
pages.POST("/busi-group/:id/tasks", rt.auth(), rt.user(), rt.perm("/job-tasks/add"), rt.bgrw(), rt.taskAdd)
|
||||
pages.GET("/busi-group/:id/task/*url", rt.auth(), rt.user(), rt.perm("/job-tasks"), rt.taskProxy)
|
||||
pages.PUT("/busi-group/:id/task/*url", rt.auth(), rt.user(), rt.perm("/job-tasks/put"), rt.bgrw(), rt.taskProxy)
|
||||
|
||||
pages.GET("/servers", rt.auth(), rt.admin(), rt.serversGet)
|
||||
pages.GET("/server-clusters", rt.auth(), rt.admin(), rt.serverClustersGet)
|
||||
|
||||
pages.POST("/datasource/list", rt.auth(), rt.datasourceList)
|
||||
pages.POST("/datasource/plugin/list", rt.auth(), rt.pluginList)
|
||||
pages.POST("/datasource/upsert", rt.auth(), rt.admin(), rt.datasourceUpsert)
|
||||
pages.POST("/datasource/desc", rt.auth(), rt.admin(), rt.datasourceGet)
|
||||
pages.POST("/datasource/status/update", rt.auth(), rt.admin(), rt.datasourceUpdataStatus)
|
||||
pages.DELETE("/datasource/", rt.auth(), rt.admin(), rt.datasourceDel)
|
||||
|
||||
pages.GET("/roles", rt.auth(), rt.admin(), rt.roleGets)
|
||||
pages.POST("/roles", rt.auth(), rt.admin(), rt.roleAdd)
|
||||
pages.PUT("/roles", rt.auth(), rt.admin(), rt.rolePut)
|
||||
pages.DELETE("/role/:id", rt.auth(), rt.admin(), rt.roleDel)
|
||||
|
||||
pages.GET("/role/:id/ops", rt.auth(), rt.admin(), rt.operationOfRole)
|
||||
pages.PUT("/role/:id/ops", rt.auth(), rt.admin(), rt.roleBindOperation)
|
||||
pages.GET("operation", rt.operations)
|
||||
|
||||
pages.GET("/notify-tpls", rt.auth(), rt.admin(), rt.notifyTplGets)
|
||||
pages.PUT("/notify-tpl/content", rt.auth(), rt.admin(), rt.notifyTplUpdateContent)
|
||||
pages.PUT("/notify-tpl", rt.auth(), rt.admin(), rt.notifyTplUpdate)
|
||||
pages.POST("/notify-tpl/preview", rt.auth(), rt.admin(), rt.notifyTplPreview)
|
||||
|
||||
pages.GET("/sso-configs", rt.auth(), rt.admin(), rt.ssoConfigGets)
|
||||
pages.PUT("/sso-config", rt.auth(), rt.admin(), rt.ssoConfigUpdate)
|
||||
|
||||
pages.GET("/webhooks", rt.auth(), rt.admin(), rt.webhookGets)
|
||||
pages.PUT("/webhooks", rt.auth(), rt.admin(), rt.webhookPuts)
|
||||
|
||||
pages.GET("/notify-script", rt.auth(), rt.admin(), rt.notifyScriptGet)
|
||||
pages.PUT("/notify-script", rt.auth(), rt.admin(), rt.notifyScriptPut)
|
||||
|
||||
pages.GET("/notify-channel", rt.auth(), rt.admin(), rt.notifyChannelGets)
|
||||
pages.PUT("/notify-channel", rt.auth(), rt.admin(), rt.notifyChannelPuts)
|
||||
|
||||
pages.GET("/notify-contact", rt.auth(), rt.admin(), rt.notifyContactGets)
|
||||
pages.PUT("/notify-contact", rt.auth(), rt.admin(), rt.notifyContactPuts)
|
||||
|
||||
pages.GET("/notify-config", rt.auth(), rt.admin(), rt.notifyConfigGet)
|
||||
pages.PUT("/notify-config", rt.auth(), rt.admin(), rt.notifyConfigPut)
|
||||
}
|
||||
|
||||
if rt.HTTP.Service.Enable {
|
||||
service := r.Group("/v1/n9e")
|
||||
if len(rt.HTTP.Service.BasicAuth) > 0 {
|
||||
service.Use(gin.BasicAuth(rt.HTTP.Service.BasicAuth))
|
||||
}
|
||||
{
|
||||
service.Any("/prometheus/*url", rt.dsProxy)
|
||||
service.POST("/users", rt.userAddPost)
|
||||
service.GET("/users", rt.userFindAll)
|
||||
|
||||
service.GET("/targets", rt.targetGets)
|
||||
service.GET("/targets/tags", rt.targetGetTags)
|
||||
service.POST("/targets/tags", rt.targetBindTagsByService)
|
||||
service.DELETE("/targets/tags", rt.targetUnbindTagsByService)
|
||||
service.PUT("/targets/note", rt.targetUpdateNoteByService)
|
||||
|
||||
service.POST("/alert-rules", rt.alertRuleAddByService)
|
||||
service.DELETE("/alert-rules", rt.alertRuleDelByService)
|
||||
service.PUT("/alert-rule/:arid", rt.alertRulePutByService)
|
||||
service.GET("/alert-rule/:arid", rt.alertRuleGet)
|
||||
service.GET("/alert-rules", rt.alertRulesGetByService)
|
||||
|
||||
service.GET("/alert-mutes", rt.alertMuteGets)
|
||||
service.POST("/alert-mutes", rt.alertMuteAddByService)
|
||||
service.DELETE("/alert-mutes", rt.alertMuteDel)
|
||||
|
||||
service.GET("/alert-cur-events", rt.alertCurEventsList)
|
||||
service.GET("/alert-his-events", rt.alertHisEventsList)
|
||||
service.GET("/alert-his-event/:eid", rt.alertHisEventGet)
|
||||
|
||||
service.GET("/config/:id", rt.configGet)
|
||||
service.GET("/configs", rt.configsGet)
|
||||
service.PUT("/configs", rt.configsPut)
|
||||
service.POST("/configs", rt.configsPost)
|
||||
service.DELETE("/configs", rt.configsDel)
|
||||
|
||||
service.POST("/conf-prop/encrypt", rt.confPropEncrypt)
|
||||
service.POST("/conf-prop/decrypt", rt.confPropDecrypt)
|
||||
}
|
||||
}
|
||||
|
||||
if rt.HTTP.Heartbeat.Enable {
|
||||
heartbeat := r.Group("/v1/n9e")
|
||||
{
|
||||
if len(rt.HTTP.Heartbeat.BasicAuth) > 0 {
|
||||
heartbeat.Use(gin.BasicAuth(rt.HTTP.Heartbeat.BasicAuth))
|
||||
}
|
||||
heartbeat.POST("/heartbeat", rt.heartbeat)
|
||||
}
|
||||
}
|
||||
|
||||
rt.configNoRoute(r)
|
||||
}
|
||||
|
||||
func Render(c *gin.Context, data, msg interface{}) {
|
||||
if msg == nil {
|
||||
if data == nil {
|
||||
data = struct{}{}
|
||||
}
|
||||
c.JSON(http.StatusOK, gin.H{"data": data, "error": ""})
|
||||
} else {
|
||||
c.JSON(http.StatusOK, gin.H{"error": gin.H{"message": msg}})
|
||||
}
|
||||
}
|
||||
|
||||
func Dangerous(c *gin.Context, v interface{}, code ...int) {
|
||||
if v == nil {
|
||||
return
|
||||
}
|
||||
|
||||
switch t := v.(type) {
|
||||
case string:
|
||||
if t != "" {
|
||||
c.JSON(http.StatusOK, gin.H{"error": v})
|
||||
}
|
||||
case error:
|
||||
c.JSON(http.StatusOK, gin.H{"error": t.Error()})
|
||||
}
|
||||
}
|
||||
@@ -1,311 +0,0 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"path"
|
||||
"strings"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/file"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
"github.com/toolkits/pkg/runner"
|
||||
)
|
||||
|
||||
// 创建 builtin_cate
|
||||
func (rt *Router) builtinCateFavoriteAdd(c *gin.Context) {
|
||||
var f models.BuiltinCate
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
if f.Name == "" {
|
||||
ginx.Bomb(http.StatusBadRequest, "name is empty")
|
||||
}
|
||||
|
||||
me := c.MustGet("user").(*models.User)
|
||||
f.UserId = me.Id
|
||||
|
||||
ginx.NewRender(c).Message(f.Create(rt.Ctx))
|
||||
}
|
||||
|
||||
// 删除 builtin_cate
|
||||
func (rt *Router) builtinCateFavoriteDel(c *gin.Context) {
|
||||
name := ginx.UrlParamStr(c, "name")
|
||||
me := c.MustGet("user").(*models.User)
|
||||
|
||||
ginx.NewRender(c).Message(models.BuiltinCateDelete(rt.Ctx, name, me.Id))
|
||||
}
|
||||
|
||||
type Payload struct {
|
||||
Cate string `json:"cate"`
|
||||
Fname string `json:"fname"`
|
||||
Name string `json:"name"`
|
||||
Configs interface{} `json:"configs"`
|
||||
Tags string `json:"tags"`
|
||||
}
|
||||
|
||||
type BoardCate struct {
|
||||
Name string `json:"name"`
|
||||
IconUrl string `json:"icon_url"`
|
||||
Boards []Payload `json:"boards"`
|
||||
Favorite bool `json:"favorite"`
|
||||
}
|
||||
|
||||
func (rt *Router) builtinBoardDetailGets(c *gin.Context) {
|
||||
var payload Payload
|
||||
ginx.BindJSON(c, &payload)
|
||||
|
||||
fp := rt.Center.BuiltinIntegrationsDir
|
||||
if fp == "" {
|
||||
fp = path.Join(runner.Cwd, "integrations")
|
||||
}
|
||||
|
||||
fn := fp + "/" + payload.Cate + "/dashboards/" + payload.Fname
|
||||
content, err := file.ReadBytes(fn)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
err = json.Unmarshal(content, &payload)
|
||||
ginx.NewRender(c).Data(payload, err)
|
||||
}
|
||||
|
||||
func (rt *Router) builtinBoardCateGets(c *gin.Context) {
|
||||
fp := rt.Center.BuiltinIntegrationsDir
|
||||
if fp == "" {
|
||||
fp = path.Join(runner.Cwd, "integrations")
|
||||
}
|
||||
|
||||
me := c.MustGet("user").(*models.User)
|
||||
buildinFavoritesMap, err := models.BuiltinCateGetByUserId(rt.Ctx, me.Id)
|
||||
if err != nil {
|
||||
logger.Warningf("get builtin favorites fail: %v", err)
|
||||
}
|
||||
|
||||
var boardCates []BoardCate
|
||||
dirList, err := file.DirsUnder(fp)
|
||||
ginx.Dangerous(err)
|
||||
for _, dir := range dirList {
|
||||
var boardCate BoardCate
|
||||
boardCate.Name = dir
|
||||
files, err := file.FilesUnder(fp + "/" + dir + "/dashboards")
|
||||
ginx.Dangerous(err)
|
||||
|
||||
var boards []Payload
|
||||
for _, f := range files {
|
||||
fn := fp + "/" + dir + "/dashboards/" + f
|
||||
content, err := file.ReadBytes(fn)
|
||||
if err != nil {
|
||||
logger.Warningf("add board fail: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
var payload Payload
|
||||
err = json.Unmarshal(content, &payload)
|
||||
if err != nil {
|
||||
logger.Warningf("add board:%s fail: %v", fn, err)
|
||||
continue
|
||||
}
|
||||
payload.Cate = dir
|
||||
payload.Fname = f
|
||||
payload.Configs = ""
|
||||
boards = append(boards, payload)
|
||||
}
|
||||
boardCate.Boards = boards
|
||||
|
||||
if _, ok := buildinFavoritesMap[dir]; ok {
|
||||
boardCate.Favorite = true
|
||||
}
|
||||
|
||||
iconFiles, _ := file.FilesUnder(fp + "/" + dir + "/icon")
|
||||
if len(iconFiles) > 0 {
|
||||
boardCate.IconUrl = fmt.Sprintf("/api/n9e/integrations/icon/%s/%s", dir, iconFiles[0])
|
||||
}
|
||||
|
||||
boardCates = append(boardCates, boardCate)
|
||||
}
|
||||
ginx.NewRender(c).Data(boardCates, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) builtinBoardGets(c *gin.Context) {
|
||||
fp := rt.Center.BuiltinIntegrationsDir
|
||||
if fp == "" {
|
||||
fp = path.Join(runner.Cwd, "integrations")
|
||||
}
|
||||
|
||||
var fileList []string
|
||||
dirList, err := file.DirsUnder(fp)
|
||||
ginx.Dangerous(err)
|
||||
for _, dir := range dirList {
|
||||
files, err := file.FilesUnder(fp + "/" + dir + "/dashboards")
|
||||
ginx.Dangerous(err)
|
||||
fileList = append(fileList, files...)
|
||||
}
|
||||
|
||||
names := make([]string, 0, len(fileList))
|
||||
for _, f := range fileList {
|
||||
if !strings.HasSuffix(f, ".json") {
|
||||
continue
|
||||
}
|
||||
|
||||
name := strings.TrimSuffix(f, ".json")
|
||||
names = append(names, name)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(names, nil)
|
||||
}
|
||||
|
||||
type AlertCate struct {
|
||||
Name string `json:"name"`
|
||||
IconUrl string `json:"icon_url"`
|
||||
AlertRules []models.AlertRule `json:"alert_rules"`
|
||||
Favorite bool `json:"favorite"`
|
||||
}
|
||||
|
||||
func (rt *Router) builtinAlertCateGets(c *gin.Context) {
|
||||
fp := rt.Center.BuiltinIntegrationsDir
|
||||
if fp == "" {
|
||||
fp = path.Join(runner.Cwd, "integrations")
|
||||
}
|
||||
|
||||
me := c.MustGet("user").(*models.User)
|
||||
buildinFavoritesMap, err := models.BuiltinCateGetByUserId(rt.Ctx, me.Id)
|
||||
if err != nil {
|
||||
logger.Warningf("get builtin favorites fail: %v", err)
|
||||
}
|
||||
|
||||
var alertCates []AlertCate
|
||||
dirList, err := file.DirsUnder(fp)
|
||||
ginx.Dangerous(err)
|
||||
for _, dir := range dirList {
|
||||
var alertCate AlertCate
|
||||
alertCate.Name = dir
|
||||
files, err := file.FilesUnder(fp + "/" + dir + "/alerts")
|
||||
ginx.Dangerous(err)
|
||||
|
||||
var alertRules []models.AlertRule
|
||||
for _, f := range files {
|
||||
fn := fp + "/" + dir + "/alerts/" + f
|
||||
content, err := file.ReadBytes(fn)
|
||||
if err != nil {
|
||||
logger.Warningf("add board fail: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
var ars []models.AlertRule
|
||||
err = json.Unmarshal(content, &ars)
|
||||
if err != nil {
|
||||
logger.Warningf("add board:%s fail: %v", fn, err)
|
||||
continue
|
||||
}
|
||||
alertRules = append(alertRules, ars...)
|
||||
}
|
||||
alertCate.AlertRules = alertRules
|
||||
iconFiles, _ := file.FilesUnder(fp + "/" + dir + "/icon")
|
||||
if len(iconFiles) > 0 {
|
||||
alertCate.IconUrl = fmt.Sprintf("/api/n9e/integrations/icon/%s/%s", dir, iconFiles[0])
|
||||
}
|
||||
|
||||
if _, ok := buildinFavoritesMap[dir]; ok {
|
||||
alertCate.Favorite = true
|
||||
}
|
||||
|
||||
alertCates = append(alertCates, alertCate)
|
||||
}
|
||||
ginx.NewRender(c).Data(alertCates, nil)
|
||||
}
|
||||
|
||||
type builtinAlertRulesList struct {
|
||||
Name string `json:"name"`
|
||||
IconUrl string `json:"icon_url"`
|
||||
AlertRules map[string][]models.AlertRule `json:"alert_rules"`
|
||||
Favorite bool `json:"favorite"`
|
||||
}
|
||||
|
||||
func (rt *Router) builtinAlertRules(c *gin.Context) {
|
||||
fp := rt.Center.BuiltinIntegrationsDir
|
||||
if fp == "" {
|
||||
fp = path.Join(runner.Cwd, "integrations")
|
||||
}
|
||||
|
||||
me := c.MustGet("user").(*models.User)
|
||||
buildinFavoritesMap, err := models.BuiltinCateGetByUserId(rt.Ctx, me.Id)
|
||||
if err != nil {
|
||||
logger.Warningf("get builtin favorites fail: %v", err)
|
||||
}
|
||||
|
||||
var alertCates []builtinAlertRulesList
|
||||
dirList, err := file.DirsUnder(fp)
|
||||
ginx.Dangerous(err)
|
||||
for _, dir := range dirList {
|
||||
var alertCate builtinAlertRulesList
|
||||
alertCate.Name = dir
|
||||
files, err := file.FilesUnder(fp + "/" + dir + "/alerts")
|
||||
ginx.Dangerous(err)
|
||||
|
||||
alertRules := make(map[string][]models.AlertRule)
|
||||
for _, f := range files {
|
||||
fn := fp + "/" + dir + "/alerts/" + f
|
||||
content, err := file.ReadBytes(fn)
|
||||
if err != nil {
|
||||
logger.Warningf("add board fail: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
var ars []models.AlertRule
|
||||
err = json.Unmarshal(content, &ars)
|
||||
if err != nil {
|
||||
logger.Warningf("add board:%s fail: %v", fn, err)
|
||||
continue
|
||||
}
|
||||
alertRules[strings.TrimSuffix(f, ".json")] = ars
|
||||
}
|
||||
|
||||
alertCate.AlertRules = alertRules
|
||||
iconFiles, _ := file.FilesUnder(fp + "/" + dir + "/icon")
|
||||
if len(iconFiles) > 0 {
|
||||
alertCate.IconUrl = fmt.Sprintf("/api/n9e/integrations/icon/%s/%s", dir, iconFiles[0])
|
||||
}
|
||||
|
||||
if _, ok := buildinFavoritesMap[dir]; ok {
|
||||
alertCate.Favorite = true
|
||||
}
|
||||
|
||||
alertCates = append(alertCates, alertCate)
|
||||
}
|
||||
ginx.NewRender(c).Data(alertCates, nil)
|
||||
}
|
||||
|
||||
// read the json file content
|
||||
func (rt *Router) builtinBoardGet(c *gin.Context) {
|
||||
name := ginx.UrlParamStr(c, "name")
|
||||
dirpath := rt.Center.BuiltinIntegrationsDir
|
||||
if dirpath == "" {
|
||||
dirpath = path.Join(runner.Cwd, "integrations")
|
||||
}
|
||||
|
||||
dirList, err := file.DirsUnder(dirpath)
|
||||
ginx.Dangerous(err)
|
||||
for _, dir := range dirList {
|
||||
jsonFile := dirpath + "/" + dir + "/dashboards/" + name + ".json"
|
||||
if file.IsExist(jsonFile) {
|
||||
body, err := file.ReadString(jsonFile)
|
||||
ginx.NewRender(c).Data(body, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
ginx.Bomb(http.StatusBadRequest, "%s not found", name)
|
||||
}
|
||||
|
||||
func (rt *Router) builtinIcon(c *gin.Context) {
|
||||
fp := rt.Center.BuiltinIntegrationsDir
|
||||
if fp == "" {
|
||||
fp = path.Join(runner.Cwd, "integrations")
|
||||
}
|
||||
|
||||
cate := ginx.UrlParamStr(c, "cate")
|
||||
iconPath := fp + "/" + cate + "/icon/" + ginx.UrlParamStr(c, "name")
|
||||
c.File(path.Join(iconPath))
|
||||
}
|
||||
@@ -1,64 +0,0 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func (rt *Router) notifyChannelsGets(c *gin.Context) {
|
||||
var labelAndKeys []models.LabelAndKey
|
||||
cval, err := models.ConfigsGet(rt.Ctx, models.NOTIFYCHANNEL)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if cval == "" {
|
||||
ginx.NewRender(c).Data(labelAndKeys, nil)
|
||||
return
|
||||
}
|
||||
|
||||
var notifyChannels []models.NotifyChannel
|
||||
err = json.Unmarshal([]byte(cval), ¬ifyChannels)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
for _, v := range notifyChannels {
|
||||
if v.Hide {
|
||||
continue
|
||||
}
|
||||
var labelAndKey models.LabelAndKey
|
||||
labelAndKey.Label = v.Name
|
||||
labelAndKey.Key = v.Ident
|
||||
labelAndKeys = append(labelAndKeys, labelAndKey)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(labelAndKeys, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) contactKeysGets(c *gin.Context) {
|
||||
var labelAndKeys []models.LabelAndKey
|
||||
cval, err := models.ConfigsGet(rt.Ctx, models.NOTIFYCONTACT)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if cval == "" {
|
||||
ginx.NewRender(c).Data(labelAndKeys, nil)
|
||||
return
|
||||
}
|
||||
|
||||
var notifyContacts []models.NotifyContact
|
||||
err = json.Unmarshal([]byte(cval), ¬ifyContacts)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
for _, v := range notifyContacts {
|
||||
if v.Hide {
|
||||
continue
|
||||
}
|
||||
var labelAndKey models.LabelAndKey
|
||||
labelAndKey.Label = v.Name
|
||||
labelAndKey.Key = v.Ident
|
||||
labelAndKeys = append(labelAndKeys, labelAndKey)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(labelAndKeys, nil)
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
package router
|
||||
|
||||
type ChartPure struct {
|
||||
Configs string `json:"configs"`
|
||||
Weight int `json:"weight"`
|
||||
}
|
||||
|
||||
type ChartGroupPure struct {
|
||||
Name string `json:"name"`
|
||||
Weight int `json:"weight"`
|
||||
Charts []ChartPure `json:"charts"`
|
||||
}
|
||||
|
||||
type DashboardPure struct {
|
||||
Name string `json:"name"`
|
||||
Tags string `json:"tags"`
|
||||
Configs string `json:"configs"`
|
||||
ChartGroups []ChartGroupPure `json:"chart_groups"`
|
||||
}
|
||||
@@ -1,178 +0,0 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"crypto/tls"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
func (rt *Router) pluginList(c *gin.Context) {
|
||||
Render(c, rt.Center.Plugins, nil)
|
||||
}
|
||||
|
||||
type listReq struct {
|
||||
Name string `json:"name"`
|
||||
Type string `json:"plugin_type"`
|
||||
Category string `json:"category"`
|
||||
}
|
||||
|
||||
func (rt *Router) datasourceList(c *gin.Context) {
|
||||
var req listReq
|
||||
ginx.BindJSON(c, &req)
|
||||
|
||||
typ := req.Type
|
||||
category := req.Category
|
||||
name := req.Name
|
||||
|
||||
list, err := models.GetDatasourcesGetsBy(rt.Ctx, typ, category, name, "")
|
||||
Render(c, list, err)
|
||||
}
|
||||
|
||||
type datasourceBrief struct {
|
||||
Id int64 `json:"id"`
|
||||
Name string `json:"name"`
|
||||
PluginType string `json:"plugin_type"`
|
||||
}
|
||||
|
||||
func (rt *Router) datasourceBriefs(c *gin.Context) {
|
||||
var dss []datasourceBrief
|
||||
list, err := models.GetDatasourcesGetsBy(rt.Ctx, "", "", "", "")
|
||||
ginx.Dangerous(err)
|
||||
|
||||
for i := range list {
|
||||
dss = append(dss, datasourceBrief{
|
||||
Id: list[i].Id,
|
||||
Name: list[i].Name,
|
||||
PluginType: list[i].PluginType,
|
||||
})
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(dss, err)
|
||||
}
|
||||
|
||||
func (rt *Router) datasourceUpsert(c *gin.Context) {
|
||||
var req models.Datasource
|
||||
ginx.BindJSON(c, &req)
|
||||
username := Username(c)
|
||||
req.UpdatedBy = username
|
||||
|
||||
var err error
|
||||
var count int64
|
||||
|
||||
err = DatasourceCheck(req)
|
||||
if err != nil {
|
||||
Dangerous(c, err)
|
||||
return
|
||||
}
|
||||
|
||||
if req.Id == 0 {
|
||||
req.CreatedBy = username
|
||||
req.Status = "enabled"
|
||||
count, err = models.GetDatasourcesCountBy(rt.Ctx, "", "", req.Name)
|
||||
if err != nil {
|
||||
Render(c, nil, err)
|
||||
return
|
||||
}
|
||||
|
||||
if count > 0 {
|
||||
Render(c, nil, "name already exists")
|
||||
return
|
||||
}
|
||||
err = req.Add(rt.Ctx)
|
||||
} else {
|
||||
err = req.Update(rt.Ctx, "name", "description", "cluster_name", "settings", "http", "auth", "updated_by", "updated_at")
|
||||
}
|
||||
|
||||
Render(c, nil, err)
|
||||
}
|
||||
|
||||
func DatasourceCheck(ds models.Datasource) error {
|
||||
if ds.HTTPJson.Url == "" {
|
||||
return fmt.Errorf("url is empty")
|
||||
}
|
||||
|
||||
client := &http.Client{
|
||||
Transport: &http.Transport{
|
||||
TLSClientConfig: &tls.Config{
|
||||
InsecureSkipVerify: ds.HTTPJson.TLS.SkipTlsVerify,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
fullURL := ds.HTTPJson.Url
|
||||
req, err := http.NewRequest("GET", fullURL, nil)
|
||||
if err != nil {
|
||||
logger.Errorf("Error creating request: %v", err)
|
||||
return fmt.Errorf("request url:%s failed", fullURL)
|
||||
}
|
||||
|
||||
if ds.PluginType == models.PROMETHEUS {
|
||||
subPath := "/api/v1/query"
|
||||
query := url.Values{}
|
||||
query.Add("query", "1+1")
|
||||
fullURL = fmt.Sprintf("%s%s?%s", ds.HTTPJson.Url, subPath, query.Encode())
|
||||
|
||||
req, err = http.NewRequest("POST", fullURL, nil)
|
||||
if err != nil {
|
||||
logger.Errorf("Error creating request: %v", err)
|
||||
return fmt.Errorf("request url:%s failed", fullURL)
|
||||
}
|
||||
}
|
||||
|
||||
if ds.AuthJson.BasicAuthUser != "" {
|
||||
req.SetBasicAuth(ds.AuthJson.BasicAuthUser, ds.AuthJson.BasicAuthPassword)
|
||||
}
|
||||
|
||||
for k, v := range ds.HTTPJson.Headers {
|
||||
req.Header.Set(k, v)
|
||||
}
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
logger.Errorf("Error making request: %v\n", err)
|
||||
return fmt.Errorf("request url:%s failed", fullURL)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != 200 {
|
||||
logger.Errorf("Error making request: %v\n", resp.StatusCode)
|
||||
return fmt.Errorf("request url:%s failed code:%d", fullURL, resp.StatusCode)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (rt *Router) datasourceGet(c *gin.Context) {
|
||||
var req models.Datasource
|
||||
ginx.BindJSON(c, &req)
|
||||
err := req.Get(rt.Ctx)
|
||||
Render(c, req, err)
|
||||
}
|
||||
|
||||
func (rt *Router) datasourceUpdataStatus(c *gin.Context) {
|
||||
var req models.Datasource
|
||||
ginx.BindJSON(c, &req)
|
||||
username := Username(c)
|
||||
req.UpdatedBy = username
|
||||
err := req.Update(rt.Ctx, "status", "updated_by", "updated_at")
|
||||
Render(c, req, err)
|
||||
}
|
||||
|
||||
func (rt *Router) datasourceDel(c *gin.Context) {
|
||||
var ids []int64
|
||||
ginx.BindJSON(c, &ids)
|
||||
err := models.DatasourceDel(rt.Ctx, ids)
|
||||
Render(c, nil, err)
|
||||
}
|
||||
|
||||
func Username(c *gin.Context) string {
|
||||
|
||||
return c.MustGet("username").(string)
|
||||
}
|
||||
@@ -1,121 +0,0 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/aconf"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ibex"
|
||||
"github.com/gin-gonic/gin"
|
||||
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
const defaultLimit = 300
|
||||
|
||||
func queryDatasourceIds(c *gin.Context) []int64 {
|
||||
datasourceIds := ginx.QueryStr(c, "datasource_ids", "")
|
||||
datasourceIds = strings.ReplaceAll(datasourceIds, ",", " ")
|
||||
idsStr := strings.Fields(datasourceIds)
|
||||
ids := make([]int64, len(idsStr))
|
||||
for i, idStr := range idsStr {
|
||||
id, _ := strconv.ParseInt(idStr, 10, 64)
|
||||
ids[i] = id
|
||||
}
|
||||
return ids
|
||||
}
|
||||
|
||||
type idsForm struct {
|
||||
Ids []int64 `json:"ids"`
|
||||
}
|
||||
|
||||
func (f idsForm) Verify() {
|
||||
if len(f.Ids) == 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, "ids empty")
|
||||
}
|
||||
}
|
||||
|
||||
func User(ctx *ctx.Context, id int64) *models.User {
|
||||
obj, err := models.UserGetById(ctx, id)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if obj == nil {
|
||||
ginx.Bomb(http.StatusNotFound, "No such user")
|
||||
}
|
||||
|
||||
return obj
|
||||
}
|
||||
|
||||
func UserGroup(ctx *ctx.Context, id int64) *models.UserGroup {
|
||||
obj, err := models.UserGroupGetById(ctx, id)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if obj == nil {
|
||||
ginx.Bomb(http.StatusNotFound, "No such UserGroup")
|
||||
}
|
||||
|
||||
return obj
|
||||
}
|
||||
|
||||
func BusiGroup(ctx *ctx.Context, id int64) *models.BusiGroup {
|
||||
obj, err := models.BusiGroupGetById(ctx, id)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if obj == nil {
|
||||
ginx.Bomb(http.StatusNotFound, "No such BusiGroup")
|
||||
}
|
||||
|
||||
return obj
|
||||
}
|
||||
|
||||
func Dashboard(ctx *ctx.Context, id int64) *models.Dashboard {
|
||||
obj, err := models.DashboardGet(ctx, "id=?", id)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if obj == nil {
|
||||
ginx.Bomb(http.StatusNotFound, "No such dashboard")
|
||||
}
|
||||
|
||||
return obj
|
||||
}
|
||||
|
||||
type DoneIdsReply struct {
|
||||
Err string `json:"err"`
|
||||
Dat struct {
|
||||
List []int64 `json:"list"`
|
||||
} `json:"dat"`
|
||||
}
|
||||
|
||||
type TaskCreateReply struct {
|
||||
Err string `json:"err"`
|
||||
Dat int64 `json:"dat"` // task.id
|
||||
}
|
||||
|
||||
// return task.id, error
|
||||
func TaskCreate(v interface{}, ibexc aconf.Ibex) (int64, error) {
|
||||
var res TaskCreateReply
|
||||
err := ibex.New(
|
||||
ibexc.Address,
|
||||
ibexc.BasicAuthUser,
|
||||
ibexc.BasicAuthPass,
|
||||
ibexc.Timeout,
|
||||
).
|
||||
Path("/ibex/v1/tasks").
|
||||
In(v).
|
||||
Out(&res).
|
||||
POST()
|
||||
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if res.Err != "" {
|
||||
return 0, fmt.Errorf("response.err: %v", res.Err)
|
||||
}
|
||||
|
||||
return res.Dat, nil
|
||||
}
|
||||
@@ -1,52 +0,0 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"compress/gzip"
|
||||
"encoding/json"
|
||||
"io/ioutil"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func (rt *Router) heartbeat(c *gin.Context) {
|
||||
var bs []byte
|
||||
var err error
|
||||
var r *gzip.Reader
|
||||
var req models.HostMeta
|
||||
if c.GetHeader("Content-Encoding") == "gzip" {
|
||||
r, err = gzip.NewReader(c.Request.Body)
|
||||
if err != nil {
|
||||
c.String(400, err.Error())
|
||||
return
|
||||
}
|
||||
defer r.Close()
|
||||
bs, err = ioutil.ReadAll(r)
|
||||
ginx.Dangerous(err)
|
||||
} else {
|
||||
defer c.Request.Body.Close()
|
||||
bs, err = ioutil.ReadAll(c.Request.Body)
|
||||
ginx.Dangerous(err)
|
||||
}
|
||||
|
||||
err = json.Unmarshal(bs, &req)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
req.Offset = (time.Now().UnixMilli() - req.UnixTime)
|
||||
req.RemoteAddr = c.ClientIP()
|
||||
rt.MetaSet.Set(req.Hostname, req)
|
||||
|
||||
gid := ginx.QueryInt64(c, "gid", 0)
|
||||
|
||||
if gid != 0 {
|
||||
target, has := rt.TargetCache.Get(req.Hostname)
|
||||
if has && target.GroupId != gid {
|
||||
err = models.TargetUpdateBgid(rt.Ctx, []string{req.Hostname}, gid, false)
|
||||
}
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Message(err)
|
||||
}
|
||||
@@ -1,190 +0,0 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/aconf"
|
||||
"github.com/ccfos/nightingale/v6/alert/sender"
|
||||
"github.com/ccfos/nightingale/v6/memsto"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/pelletier/go-toml/v2"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func (rt *Router) webhookGets(c *gin.Context) {
|
||||
var webhooks []models.Webhook
|
||||
cval, err := models.ConfigsGet(rt.Ctx, models.WEBHOOKKEY)
|
||||
ginx.Dangerous(err)
|
||||
if cval == "" {
|
||||
ginx.NewRender(c).Data(webhooks, nil)
|
||||
return
|
||||
}
|
||||
|
||||
err = json.Unmarshal([]byte(cval), &webhooks)
|
||||
ginx.NewRender(c).Data(webhooks, err)
|
||||
}
|
||||
|
||||
func (rt *Router) webhookPuts(c *gin.Context) {
|
||||
var webhooks []models.Webhook
|
||||
ginx.BindJSON(c, &webhooks)
|
||||
for i := 0; i < len(webhooks); i++ {
|
||||
for k, v := range webhooks[i].HeaderMap {
|
||||
webhooks[i].Headers = append(webhooks[i].Headers, k)
|
||||
webhooks[i].Headers = append(webhooks[i].Headers, v)
|
||||
}
|
||||
}
|
||||
|
||||
data, err := json.Marshal(webhooks)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Message(models.ConfigsSet(rt.Ctx, models.WEBHOOKKEY, string(data)))
|
||||
}
|
||||
|
||||
func (rt *Router) notifyScriptGet(c *gin.Context) {
|
||||
var notifyScript models.NotifyScript
|
||||
cval, err := models.ConfigsGet(rt.Ctx, models.NOTIFYSCRIPT)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if cval == "" {
|
||||
ginx.NewRender(c).Data(notifyScript, nil)
|
||||
return
|
||||
}
|
||||
|
||||
err = json.Unmarshal([]byte(cval), ¬ifyScript)
|
||||
ginx.NewRender(c).Data(notifyScript, err)
|
||||
}
|
||||
|
||||
func (rt *Router) notifyScriptPut(c *gin.Context) {
|
||||
var notifyScript models.NotifyScript
|
||||
ginx.BindJSON(c, ¬ifyScript)
|
||||
|
||||
data, err := json.Marshal(notifyScript)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Message(models.ConfigsSet(rt.Ctx, models.NOTIFYSCRIPT, string(data)))
|
||||
}
|
||||
|
||||
func (rt *Router) notifyChannelGets(c *gin.Context) {
|
||||
var notifyChannels []models.NotifyChannel
|
||||
cval, err := models.ConfigsGet(rt.Ctx, models.NOTIFYCHANNEL)
|
||||
ginx.Dangerous(err)
|
||||
if cval == "" {
|
||||
ginx.NewRender(c).Data(notifyChannels, nil)
|
||||
return
|
||||
}
|
||||
|
||||
err = json.Unmarshal([]byte(cval), ¬ifyChannels)
|
||||
ginx.NewRender(c).Data(notifyChannels, err)
|
||||
}
|
||||
|
||||
func (rt *Router) notifyChannelPuts(c *gin.Context) {
|
||||
var notifyChannels []models.NotifyChannel
|
||||
ginx.BindJSON(c, ¬ifyChannels)
|
||||
|
||||
channels := []string{models.Dingtalk, models.Wecom, models.Feishu, models.Mm, models.Telegram, models.Email}
|
||||
|
||||
m := make(map[string]struct{})
|
||||
for _, v := range notifyChannels {
|
||||
m[v.Ident] = struct{}{}
|
||||
}
|
||||
|
||||
for _, v := range channels {
|
||||
if _, ok := m[v]; !ok {
|
||||
ginx.Bomb(200, "channel %s ident can not modify", v)
|
||||
}
|
||||
}
|
||||
|
||||
data, err := json.Marshal(notifyChannels)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Message(models.ConfigsSet(rt.Ctx, models.NOTIFYCHANNEL, string(data)))
|
||||
}
|
||||
|
||||
func (rt *Router) notifyContactGets(c *gin.Context) {
|
||||
var notifyContacts []models.NotifyContact
|
||||
cval, err := models.ConfigsGet(rt.Ctx, models.NOTIFYCONTACT)
|
||||
ginx.Dangerous(err)
|
||||
if cval == "" {
|
||||
ginx.NewRender(c).Data(notifyContacts, nil)
|
||||
return
|
||||
}
|
||||
|
||||
err = json.Unmarshal([]byte(cval), ¬ifyContacts)
|
||||
ginx.NewRender(c).Data(notifyContacts, err)
|
||||
}
|
||||
|
||||
func (rt *Router) notifyContactPuts(c *gin.Context) {
|
||||
var notifyContacts []models.NotifyContact
|
||||
ginx.BindJSON(c, ¬ifyContacts)
|
||||
|
||||
keys := []string{models.DingtalkKey, models.WecomKey, models.FeishuKey, models.MmKey, models.TelegramKey}
|
||||
|
||||
m := make(map[string]struct{})
|
||||
for _, v := range notifyContacts {
|
||||
m[v.Ident] = struct{}{}
|
||||
}
|
||||
|
||||
for _, v := range keys {
|
||||
if _, ok := m[v]; !ok {
|
||||
ginx.Bomb(200, "contact %s ident can not modify", v)
|
||||
}
|
||||
}
|
||||
|
||||
data, err := json.Marshal(notifyContacts)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Message(models.ConfigsSet(rt.Ctx, models.NOTIFYCONTACT, string(data)))
|
||||
}
|
||||
|
||||
func (rt *Router) notifyConfigGet(c *gin.Context) {
|
||||
key := ginx.QueryStr(c, "ckey")
|
||||
cval, err := models.ConfigsGet(rt.Ctx, key)
|
||||
if cval == "" {
|
||||
switch key {
|
||||
case models.IBEX:
|
||||
cval = memsto.DefaultIbex
|
||||
case models.SMTP:
|
||||
cval = memsto.DefaultSMTP
|
||||
}
|
||||
}
|
||||
ginx.NewRender(c).Data(cval, err)
|
||||
}
|
||||
|
||||
func (rt *Router) notifyConfigPut(c *gin.Context) {
|
||||
var f models.Configs
|
||||
ginx.BindJSON(c, &f)
|
||||
switch f.Ckey {
|
||||
case models.SMTP:
|
||||
var smtp aconf.SMTPConfig
|
||||
err := toml.Unmarshal([]byte(f.Cval), &smtp)
|
||||
ginx.Dangerous(err)
|
||||
case models.IBEX:
|
||||
var ibex aconf.Ibex
|
||||
err := toml.Unmarshal([]byte(f.Cval), &ibex)
|
||||
ginx.Dangerous(err)
|
||||
default:
|
||||
ginx.Bomb(200, "key %s can not modify", f.Ckey)
|
||||
}
|
||||
|
||||
err := models.ConfigsSet(rt.Ctx, f.Ckey, f.Cval)
|
||||
if err != nil {
|
||||
ginx.Bomb(200, err.Error())
|
||||
}
|
||||
|
||||
if f.Ckey == models.SMTP {
|
||||
// 重置邮件发送器
|
||||
var smtp aconf.SMTPConfig
|
||||
err := toml.Unmarshal([]byte(f.Cval), &smtp)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if smtp.Host == "" || smtp.Port == 0 {
|
||||
ginx.Bomb(200, "smtp host or port can not be empty")
|
||||
}
|
||||
|
||||
go sender.RestartEmailSender(smtp)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Message(nil)
|
||||
}
|
||||
@@ -1,80 +0,0 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"html/template"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/center/cconf"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/tplx"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func (rt *Router) notifyTplGets(c *gin.Context) {
|
||||
lst, err := models.NotifyTplGets(rt.Ctx)
|
||||
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
|
||||
func (rt *Router) notifyTplUpdateContent(c *gin.Context) {
|
||||
var f models.NotifyTpl
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
if err := templateValidate(f); err != nil {
|
||||
ginx.NewRender(c).Message(err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Message(f.UpdateContent(rt.Ctx))
|
||||
}
|
||||
|
||||
func (rt *Router) notifyTplUpdate(c *gin.Context) {
|
||||
var f models.NotifyTpl
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
if err := templateValidate(f); err != nil {
|
||||
ginx.NewRender(c).Message(err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Message(f.Update(rt.Ctx))
|
||||
}
|
||||
|
||||
func templateValidate(f models.NotifyTpl) error {
|
||||
if f.Content == "" {
|
||||
return nil
|
||||
}
|
||||
if _, err := template.New(f.Channel).Funcs(tplx.TemplateFuncMap).Parse(f.Content); err != nil {
|
||||
return fmt.Errorf("notify template verify illegal:%s", err.Error())
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (rt *Router) notifyTplPreview(c *gin.Context) {
|
||||
var event models.AlertCurEvent
|
||||
err := json.Unmarshal([]byte(cconf.EVENT_EXAMPLE), &event)
|
||||
if err != nil {
|
||||
ginx.NewRender(c).Message(err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
var f models.NotifyTpl
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
tpl, err := template.New(f.Channel).Funcs(tplx.TemplateFuncMap).Parse(f.Content)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
var body bytes.Buffer
|
||||
var ret string
|
||||
if err := tpl.Execute(&body, event); err != nil {
|
||||
ret = err.Error()
|
||||
} else {
|
||||
ret = body.String()
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(ret, nil)
|
||||
}
|
||||
@@ -1,159 +0,0 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/http/httputil"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
pkgprom "github.com/ccfos/nightingale/v6/pkg/prom"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
type queryFormItem struct {
|
||||
Start int64 `json:"start" binding:"required"`
|
||||
End int64 `json:"end" binding:"required"`
|
||||
Step int64 `json:"step" binding:"required"`
|
||||
Query string `json:"query" binding:"required"`
|
||||
}
|
||||
|
||||
type batchQueryForm struct {
|
||||
DatasourceId int64 `json:"datasource_id" binding:"required"`
|
||||
Queries []queryFormItem `json:"queries" binding:"required"`
|
||||
}
|
||||
|
||||
func (rt *Router) promBatchQueryRange(c *gin.Context) {
|
||||
var f batchQueryForm
|
||||
ginx.Dangerous(c.BindJSON(&f))
|
||||
|
||||
cli := rt.PromClients.GetCli(f.DatasourceId)
|
||||
|
||||
var lst []model.Value
|
||||
|
||||
for _, item := range f.Queries {
|
||||
r := pkgprom.Range{
|
||||
Start: time.Unix(item.Start, 0),
|
||||
End: time.Unix(item.End, 0),
|
||||
Step: time.Duration(item.Step) * time.Second,
|
||||
}
|
||||
|
||||
resp, _, err := cli.QueryRange(context.Background(), item.Query, r)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
lst = append(lst, resp)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(lst, nil)
|
||||
}
|
||||
|
||||
type batchInstantForm struct {
|
||||
DatasourceId int64 `json:"datasource_id" binding:"required"`
|
||||
Queries []InstantFormItem `json:"queries" binding:"required"`
|
||||
}
|
||||
|
||||
type InstantFormItem struct {
|
||||
Time int64 `json:"time" binding:"required"`
|
||||
Query string `json:"query" binding:"required"`
|
||||
}
|
||||
|
||||
func (rt *Router) promBatchQueryInstant(c *gin.Context) {
|
||||
var f batchInstantForm
|
||||
ginx.Dangerous(c.BindJSON(&f))
|
||||
|
||||
cli := rt.PromClients.GetCli(f.DatasourceId)
|
||||
|
||||
var lst []model.Value
|
||||
|
||||
for _, item := range f.Queries {
|
||||
resp, _, err := cli.Query(context.Background(), item.Query, time.Unix(item.Time, 0))
|
||||
ginx.Dangerous(err)
|
||||
|
||||
lst = append(lst, resp)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(lst, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) dsProxy(c *gin.Context) {
|
||||
dsId := ginx.UrlParamInt64(c, "id")
|
||||
ds := rt.DatasourceCache.GetById(dsId)
|
||||
|
||||
if ds == nil {
|
||||
c.String(http.StatusBadRequest, "no such datasource")
|
||||
return
|
||||
}
|
||||
|
||||
target, err := url.Parse(ds.HTTPJson.Url)
|
||||
if err != nil {
|
||||
c.String(http.StatusInternalServerError, "invalid url: %s", ds.HTTPJson.Url)
|
||||
return
|
||||
}
|
||||
|
||||
director := func(req *http.Request) {
|
||||
req.URL.Scheme = target.Scheme
|
||||
req.URL.Host = target.Host
|
||||
req.Host = target.Host
|
||||
|
||||
req.Header.Set("Host", target.Host)
|
||||
|
||||
// fe request e.g. /api/n9e/proxy/:id/*
|
||||
arr := strings.Split(req.URL.Path, "/")
|
||||
if len(arr) < 6 {
|
||||
c.String(http.StatusBadRequest, "invalid url path")
|
||||
return
|
||||
}
|
||||
|
||||
req.URL.Path = strings.TrimRight(target.Path, "/") + "/" + strings.Join(arr[5:], "/")
|
||||
if target.RawQuery == "" || req.URL.RawQuery == "" {
|
||||
req.URL.RawQuery = target.RawQuery + req.URL.RawQuery
|
||||
} else {
|
||||
req.URL.RawQuery = target.RawQuery + "&" + req.URL.RawQuery
|
||||
}
|
||||
|
||||
if _, ok := req.Header["User-Agent"]; !ok {
|
||||
req.Header.Set("User-Agent", "")
|
||||
}
|
||||
|
||||
if ds.AuthJson.BasicAuthUser != "" {
|
||||
req.SetBasicAuth(ds.AuthJson.BasicAuthUser, ds.AuthJson.BasicAuthPassword)
|
||||
}
|
||||
|
||||
headerCount := len(ds.HTTPJson.Headers)
|
||||
if headerCount > 0 {
|
||||
for key, value := range ds.HTTPJson.Headers {
|
||||
req.Header.Set(key, value)
|
||||
if key == "Host" {
|
||||
req.Host = value
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
errFunc := func(w http.ResponseWriter, r *http.Request, err error) {
|
||||
http.Error(w, err.Error(), http.StatusBadGateway)
|
||||
}
|
||||
|
||||
transport := &http.Transport{
|
||||
TLSClientConfig: &tls.Config{InsecureSkipVerify: ds.HTTPJson.TLS.SkipTlsVerify},
|
||||
Proxy: http.ProxyFromEnvironment,
|
||||
DialContext: (&net.Dialer{
|
||||
Timeout: time.Duration(ds.HTTPJson.DialTimeout) * time.Millisecond,
|
||||
}).DialContext,
|
||||
ResponseHeaderTimeout: time.Duration(ds.HTTPJson.Timeout) * time.Millisecond,
|
||||
MaxIdleConnsPerHost: ds.HTTPJson.MaxIdleConnsPerHost,
|
||||
}
|
||||
|
||||
proxy := &httputil.ReverseProxy{
|
||||
Director: director,
|
||||
Transport: transport,
|
||||
ErrorHandler: errFunc,
|
||||
}
|
||||
|
||||
proxy.ServeHTTP(c.Writer, c.Request)
|
||||
}
|
||||
@@ -1,85 +0,0 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func (rt *Router) rolesGets(c *gin.Context) {
|
||||
lst, err := models.RoleGetsAll(rt.Ctx)
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
|
||||
func (rt *Router) permsGets(c *gin.Context) {
|
||||
user := c.MustGet("user").(*models.User)
|
||||
lst, err := models.OperationsOfRole(rt.Ctx, strings.Fields(user.Roles))
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
|
||||
// 创建角色
|
||||
func (rt *Router) roleAdd(c *gin.Context) {
|
||||
var f models.Role
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
err := f.Add(rt.Ctx)
|
||||
ginx.NewRender(c).Message(err)
|
||||
}
|
||||
|
||||
// 更新角色
|
||||
func (rt *Router) rolePut(c *gin.Context) {
|
||||
var f models.Role
|
||||
ginx.BindJSON(c, &f)
|
||||
oldRule, err := models.RoleGet(rt.Ctx, "id=?", f.Id)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if oldRule == nil {
|
||||
ginx.Bomb(http.StatusOK, "role not found")
|
||||
}
|
||||
|
||||
if oldRule.Name == "Admin" {
|
||||
ginx.Bomb(http.StatusOK, "admin role can not be modified")
|
||||
}
|
||||
|
||||
if oldRule.Name != f.Name {
|
||||
// name changed, check duplication
|
||||
num, err := models.RoleCount(rt.Ctx, "name=? and id<>?", f.Name, oldRule.Id)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if num > 0 {
|
||||
ginx.Bomb(http.StatusOK, "role name already exists")
|
||||
}
|
||||
}
|
||||
|
||||
oldRule.Name = f.Name
|
||||
oldRule.Note = f.Note
|
||||
|
||||
ginx.NewRender(c).Message(oldRule.Update(rt.Ctx, "name", "note"))
|
||||
}
|
||||
|
||||
func (rt *Router) roleDel(c *gin.Context) {
|
||||
id := ginx.UrlParamInt64(c, "id")
|
||||
target, err := models.RoleGet(rt.Ctx, "id=?", id)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if target.Name == "Admin" {
|
||||
ginx.Bomb(http.StatusOK, "admin role can not be modified")
|
||||
}
|
||||
|
||||
if target == nil {
|
||||
ginx.NewRender(c).Message(nil)
|
||||
return
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Message(target.Del(rt.Ctx))
|
||||
}
|
||||
|
||||
// 角色列表
|
||||
func (rt *Router) roleGets(c *gin.Context) {
|
||||
lst, err := models.RoleGetsAll(rt.Ctx)
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
@@ -1,43 +0,0 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func (rt *Router) operationOfRole(c *gin.Context) {
|
||||
id := ginx.UrlParamInt64(c, "id")
|
||||
role, err := models.RoleGet(rt.Ctx, "id=?", id)
|
||||
ginx.Dangerous(err)
|
||||
if role == nil {
|
||||
ginx.Bomb(http.StatusOK, "role not found")
|
||||
}
|
||||
|
||||
ops, err := models.OperationsOfRole(rt.Ctx, []string{role.Name})
|
||||
ginx.NewRender(c).Data(ops, err)
|
||||
}
|
||||
|
||||
func (rt *Router) roleBindOperation(c *gin.Context) {
|
||||
id := ginx.UrlParamInt64(c, "id")
|
||||
role, err := models.RoleGet(rt.Ctx, "id=?", id)
|
||||
ginx.Dangerous(err)
|
||||
if role == nil {
|
||||
ginx.Bomb(http.StatusOK, "role not found")
|
||||
}
|
||||
|
||||
if role.Name == "Admin" {
|
||||
ginx.Bomb(http.StatusOK, "admin role can not be modified")
|
||||
}
|
||||
|
||||
var ops []string
|
||||
ginx.BindJSON(c, &ops)
|
||||
|
||||
ginx.NewRender(c).Message(models.RoleOperationBind(rt.Ctx, role.Name, ops))
|
||||
}
|
||||
|
||||
func (rt *Router) operations(c *gin.Context) {
|
||||
ginx.NewRender(c).Data(rt.Operations.Ops, nil)
|
||||
}
|
||||
@@ -1,18 +0,0 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func (rt *Router) serversGet(c *gin.Context) {
|
||||
list, err := models.AlertingEngineGets(rt.Ctx, "")
|
||||
ginx.NewRender(c).Data(list, err)
|
||||
}
|
||||
|
||||
func (rt *Router) serverClustersGet(c *gin.Context) {
|
||||
list, err := models.AlertingEngineGetsClusters(rt.Ctx, "")
|
||||
ginx.NewRender(c).Data(list, err)
|
||||
}
|
||||
@@ -1,169 +0,0 @@
|
||||
package sso
|
||||
|
||||
import (
|
||||
"log"
|
||||
|
||||
"github.com/BurntSushi/toml"
|
||||
"github.com/ccfos/nightingale/v6/center/cconf"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/cas"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ldapx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/oauth2x"
|
||||
"github.com/ccfos/nightingale/v6/pkg/oidcx"
|
||||
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
type SsoClient struct {
|
||||
OIDC *oidcx.SsoClient
|
||||
LDAP *ldapx.SsoClient
|
||||
CAS *cas.SsoClient
|
||||
OAuth2 *oauth2x.SsoClient
|
||||
}
|
||||
|
||||
const LDAP = `
|
||||
Enable = false
|
||||
Host = 'ldap.example.org'
|
||||
Port = 389
|
||||
BaseDn = 'dc=example,dc=org'
|
||||
BindUser = 'cn=manager,dc=example,dc=org'
|
||||
BindPass = '*******'
|
||||
AuthFilter = '(&(uid=%s))'
|
||||
CoverAttributes = true
|
||||
TLS = false
|
||||
StartTLS = true
|
||||
DefaultRoles = ['Standard']
|
||||
|
||||
[Attributes]
|
||||
Nickname = 'cn'
|
||||
Phone = 'mobile'
|
||||
Email = 'mail'
|
||||
`
|
||||
|
||||
const OAuth2 = `
|
||||
Enable = false
|
||||
DisplayName = 'OAuth2登录'
|
||||
RedirectURL = 'http://127.0.0.1:18000/callback/oauth'
|
||||
SsoAddr = 'https://sso.example.com/oauth2/authorize'
|
||||
TokenAddr = 'https://sso.example.com/oauth2/token'
|
||||
UserInfoAddr = 'https://api.example.com/api/v1/user/info'
|
||||
TranTokenMethod = 'header'
|
||||
ClientId = ''
|
||||
ClientSecret = ''
|
||||
CoverAttributes = true
|
||||
DefaultRoles = ['Standard']
|
||||
UserinfoIsArray = false
|
||||
UserinfoPrefix = 'data'
|
||||
Scopes = ['profile', 'email', 'phone']
|
||||
|
||||
[Attributes]
|
||||
Username = 'username'
|
||||
Nickname = 'nickname'
|
||||
Phone = 'phone_number'
|
||||
Email = 'email'
|
||||
`
|
||||
|
||||
const CAS = `
|
||||
Enable = false
|
||||
SsoAddr = 'https://cas.example.com/cas/'
|
||||
RedirectURL = 'http://127.0.0.1:18000/callback/cas'
|
||||
DisplayName = 'CAS登录'
|
||||
CoverAttributes = false
|
||||
DefaultRoles = ['Standard']
|
||||
|
||||
[Attributes]
|
||||
Nickname = 'nickname'
|
||||
Phone = 'phone_number'
|
||||
Email = 'email'
|
||||
`
|
||||
const OIDC = `
|
||||
Enable = false
|
||||
DisplayName = 'OIDC登录'
|
||||
RedirectURL = 'http://n9e.com/callback'
|
||||
SsoAddr = 'http://sso.example.org'
|
||||
ClientId = ''
|
||||
ClientSecret = ''
|
||||
CoverAttributes = true
|
||||
DefaultRoles = ['Standard']
|
||||
|
||||
[Attributes]
|
||||
Nickname = 'nickname'
|
||||
Phone = 'phone_number'
|
||||
Email = 'email'
|
||||
`
|
||||
|
||||
func Init(center cconf.Center, ctx *ctx.Context) *SsoClient {
|
||||
ssoClient := new(SsoClient)
|
||||
m := make(map[string]string)
|
||||
m["LDAP"] = LDAP
|
||||
m["CAS"] = CAS
|
||||
m["OIDC"] = OIDC
|
||||
m["OAuth2"] = OAuth2
|
||||
|
||||
for name, config := range m {
|
||||
count, err := models.SsoConfigCountByName(ctx, name)
|
||||
if err != nil {
|
||||
logger.Error(err)
|
||||
continue
|
||||
}
|
||||
|
||||
if count > 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
ssoConfig := models.SsoConfig{
|
||||
Name: name,
|
||||
Content: config,
|
||||
}
|
||||
|
||||
err = ssoConfig.Create(ctx)
|
||||
if err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
}
|
||||
|
||||
configs, err := models.SsoConfigGets(ctx)
|
||||
if err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
|
||||
for _, cfg := range configs {
|
||||
switch cfg.Name {
|
||||
case "LDAP":
|
||||
var config ldapx.Config
|
||||
err := toml.Unmarshal([]byte(cfg.Content), &config)
|
||||
if err != nil {
|
||||
log.Fatalln("init ldap failed", err)
|
||||
}
|
||||
ssoClient.LDAP = ldapx.New(config)
|
||||
case "OIDC":
|
||||
var config oidcx.Config
|
||||
err := toml.Unmarshal([]byte(cfg.Content), &config)
|
||||
if err != nil {
|
||||
log.Fatalln("init oidc failed:", err)
|
||||
}
|
||||
oidcClient, err := oidcx.New(config)
|
||||
if err != nil {
|
||||
logger.Error("init oidc failed:", err)
|
||||
} else {
|
||||
ssoClient.OIDC = oidcClient
|
||||
}
|
||||
case "CAS":
|
||||
var config cas.Config
|
||||
err := toml.Unmarshal([]byte(cfg.Content), &config)
|
||||
if err != nil {
|
||||
log.Fatalln("init cas failed:", err)
|
||||
}
|
||||
ssoClient.CAS = cas.New(config)
|
||||
case "OAuth2":
|
||||
var config oauth2x.Config
|
||||
err := toml.Unmarshal([]byte(cfg.Content), &config)
|
||||
if err != nil {
|
||||
log.Fatalln("init oauth2 failed:", err)
|
||||
}
|
||||
ssoClient.OAuth2 = oauth2x.New(config)
|
||||
}
|
||||
}
|
||||
return ssoClient
|
||||
}
|
||||
@@ -1,9 +0,0 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"github.com/ccfos/nightingale/v6/cli/upgrade"
|
||||
)
|
||||
|
||||
func Upgrade(configFile string) error {
|
||||
return upgrade.Upgrade(configFile)
|
||||
}
|
||||
@@ -1,63 +0,0 @@
|
||||
package upgrade
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"path"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/pkg/cfg"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ormx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/tlsx"
|
||||
"github.com/koding/multiconfig"
|
||||
)
|
||||
|
||||
type Config struct {
|
||||
DB ormx.DBConfig
|
||||
Clusters []ClusterOptions
|
||||
}
|
||||
|
||||
type ClusterOptions struct {
|
||||
Name string
|
||||
Prom string
|
||||
|
||||
BasicAuthUser string
|
||||
BasicAuthPass string
|
||||
|
||||
Headers []string
|
||||
|
||||
Timeout int64
|
||||
DialTimeout int64
|
||||
|
||||
UseTLS bool
|
||||
tlsx.ClientConfig
|
||||
|
||||
MaxIdleConnsPerHost int
|
||||
}
|
||||
|
||||
func Parse(fpath string, configPtr interface{}) error {
|
||||
var (
|
||||
tBuf []byte
|
||||
)
|
||||
loaders := []multiconfig.Loader{
|
||||
&multiconfig.TagLoader{},
|
||||
&multiconfig.EnvironmentLoader{},
|
||||
}
|
||||
s := cfg.NewFileScanner()
|
||||
|
||||
s.Read(path.Join(fpath))
|
||||
tBuf = append(tBuf, s.Data()...)
|
||||
tBuf = append(tBuf, []byte("\n")...)
|
||||
|
||||
if s.Err() != nil {
|
||||
return s.Err()
|
||||
}
|
||||
|
||||
if len(tBuf) != 0 {
|
||||
loaders = append(loaders, &multiconfig.TOMLLoader{Reader: bytes.NewReader(tBuf)})
|
||||
}
|
||||
|
||||
m := multiconfig.DefaultLoader{
|
||||
Loader: multiconfig.MultiLoader(loaders...),
|
||||
Validator: multiconfig.MultiValidator(&multiconfig.RequiredValidator{}),
|
||||
}
|
||||
return m.Load(configPtr)
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
# v5 升级 v6 手册
|
||||
0. 操作之前,记得备注下数据库!
|
||||
|
||||
1. 需要先将你正在使用的夜莺数据源表结构更新到和 v5.15.0 一致,[release](https://github.com/ccfos/nightingale/releases) 页面有每个版本表结构的更新说明,可以根据你正在使用的版本,按照说明,逐个执行的更新表结构的语句
|
||||
|
||||
2. 解压 n9e 安装包,导入 upgrade.sql 到 n9e_v5 数据库
|
||||
```
|
||||
mysql -h 127.0.0.1 -u root -p1234 < cli/upgrade/upgrade.sql
|
||||
```
|
||||
|
||||
3. 执行 n9e-cli 完成数据库表结构升级, webapi.conf 为 v5 版本 n9e-webapi 正在使用的配置文件
|
||||
```
|
||||
./n9e-cli --upgrade --config webapi.conf
|
||||
```
|
||||
|
||||
4. 修改 n9e 配置文件中的数据库为 n9e_v5,启动 n9e 进程
|
||||
```
|
||||
nohup ./n9e &> n9e.log &
|
||||
```
|
||||
|
||||
5. n9e 监听的端口为 17000,需要将之前的 web 端口和数据上报的端口,都调整为 17000
|
||||
@@ -1,117 +0,0 @@
|
||||
package upgrade
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/storage"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
func Upgrade(configFile string) error {
|
||||
var config Config
|
||||
Parse(configFile, &config)
|
||||
|
||||
db, err := storage.New(config.DB)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
ctx := ctx.NewContext(context.Background(), db)
|
||||
for _, cluster := range config.Clusters {
|
||||
count, err := models.GetDatasourcesCountBy(ctx, "", "", cluster.Name)
|
||||
if err != nil {
|
||||
logger.Errorf("get datasource %s count error: %v", cluster.Name, err)
|
||||
continue
|
||||
}
|
||||
if count > 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
header := make(map[string]string)
|
||||
headerCount := len(cluster.Headers)
|
||||
if headerCount > 0 && headerCount%2 == 0 {
|
||||
for i := 0; i < len(cluster.Headers); i += 2 {
|
||||
header[cluster.Headers[i]] = cluster.Headers[i+1]
|
||||
}
|
||||
}
|
||||
|
||||
authJosn := models.Auth{
|
||||
BasicAuthUser: cluster.BasicAuthUser,
|
||||
BasicAuthPassword: cluster.BasicAuthPass,
|
||||
}
|
||||
|
||||
httpJson := models.HTTP{
|
||||
Timeout: cluster.Timeout,
|
||||
DialTimeout: cluster.DialTimeout,
|
||||
TLS: models.TLS{
|
||||
SkipTlsVerify: cluster.UseTLS,
|
||||
},
|
||||
MaxIdleConnsPerHost: cluster.MaxIdleConnsPerHost,
|
||||
Url: cluster.Prom,
|
||||
Headers: header,
|
||||
}
|
||||
|
||||
datasrouce := models.Datasource{
|
||||
PluginId: 1,
|
||||
PluginType: "prometheus",
|
||||
PluginTypeName: "Prometheus Like",
|
||||
Name: cluster.Name,
|
||||
HTTPJson: httpJson,
|
||||
AuthJson: authJosn,
|
||||
ClusterName: "default",
|
||||
Status: "enabled",
|
||||
}
|
||||
|
||||
err = datasrouce.Add(ctx)
|
||||
if err != nil {
|
||||
logger.Errorf("add datasource %s error: %v", cluster.Name, err)
|
||||
}
|
||||
}
|
||||
|
||||
datasources, err := models.GetDatasources(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
m := make(map[string]models.Datasource)
|
||||
for i := 0; i < len(datasources); i++ {
|
||||
m[datasources[i].Name] = datasources[i]
|
||||
}
|
||||
|
||||
err = models.AlertRuleUpgradeToV6(ctx, m)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// alert mute
|
||||
err = models.AlertMuteUpgradeToV6(ctx, m)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// alert subscribe
|
||||
err = models.AlertSubscribeUpgradeToV6(ctx, m)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// recoding rule
|
||||
err = models.RecordingRuleUpgradeToV6(ctx, m)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// alert cur event
|
||||
err = models.AlertCurEventUpgradeToV6(ctx, m)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// alert his event
|
||||
err = models.AlertHisEventUpgradeToV6(ctx, m)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -1,92 +0,0 @@
|
||||
use n9e_v5;
|
||||
|
||||
insert into `role_operation`(role_name, operation) values('Guest', '/log/explorer');
|
||||
insert into `role_operation`(role_name, operation) values('Guest', '/trace/explorer');
|
||||
|
||||
insert into `role_operation`(role_name, operation) values('Standard', '/log/explorer');
|
||||
insert into `role_operation`(role_name, operation) values('Standard', '/trace/explorer');
|
||||
insert into `role_operation`(role_name, operation) values('Standard', '/alert-rules-built-in');
|
||||
insert into `role_operation`(role_name, operation) values('Standard', '/dashboards-built-in');
|
||||
insert into `role_operation`(role_name, operation) values('Standard', '/trace/dependencies');
|
||||
|
||||
alter table `board` add built_in tinyint(1) not null default 0 comment '0:false 1:true';
|
||||
alter table `board` add hide tinyint(1) not null default 0 comment '0:false 1:true';
|
||||
|
||||
alter table `chart_share` add datasource_id bigint unsigned not null default 0;
|
||||
alter table `chart_share` drop dashboard_id;
|
||||
|
||||
alter table `alert_rule` add datasource_ids varchar(255) not null default '';
|
||||
alter table `alert_rule` add rule_config text not null comment 'rule_config';
|
||||
alter table `alert_rule` add annotations text not null comment 'annotations';
|
||||
|
||||
alter table `alert_mute` add datasource_ids varchar(255) not null default '';
|
||||
alter table `alert_mute` add periodic_mutes varchar(4096) not null default '[]';
|
||||
alter table `alert_mute` add mute_time_type tinyint(1) not null default 0;
|
||||
|
||||
alter table `alert_subscribe` add datasource_ids varchar(255) not null default '';
|
||||
alter table `alert_subscribe` add prod varchar(255) not null default '';
|
||||
alter table `alert_subscribe` add webhooks text;
|
||||
alter table `alert_subscribe` add redefine_webhooks tinyint(1) default 0;
|
||||
alter table `alert_subscribe` add for_duration bigint not null default 0;
|
||||
|
||||
alter table `recording_rule` add datasource_ids varchar(255) default '';
|
||||
|
||||
alter table `target` modify cluster varchar(128) not null default '';
|
||||
|
||||
alter table `alert_cur_event` add datasource_id bigint unsigned not null default 0;
|
||||
alter table `alert_cur_event` add annotations text not null comment 'annotations';
|
||||
alter table `alert_cur_event` add rule_config text not null comment 'rule_config';
|
||||
|
||||
alter table `alert_his_event` add datasource_id bigint unsigned not null default 0;
|
||||
alter table `alert_his_event` add annotations text not null comment 'annotations';
|
||||
alter table `alert_his_event` add rule_config text not null comment 'rule_config';
|
||||
|
||||
alter table `alerting_engines` add datasource_id bigint unsigned not null default 0;
|
||||
alter table `alerting_engines` change cluster engine_cluster varchar(128) not null default '' comment 'n9e engine cluster';
|
||||
|
||||
alter table `task_record` add event_id bigint not null comment 'event id' default 0;
|
||||
|
||||
CREATE TABLE `datasource`
|
||||
(
|
||||
`id` int unsigned NOT NULL AUTO_INCREMENT,
|
||||
`name` varchar(255) not null default '',
|
||||
`description` varchar(255) not null default '',
|
||||
`category` varchar(255) not null default '',
|
||||
`plugin_id` int unsigned not null default 0,
|
||||
`plugin_type` varchar(255) not null default '',
|
||||
`plugin_type_name` varchar(255) not null default '',
|
||||
`cluster_name` varchar(255) not null default '',
|
||||
`settings` text not null,
|
||||
`status` varchar(255) not null default '',
|
||||
`http` varchar(4096) not null default '',
|
||||
`auth` varchar(8192) not null default '',
|
||||
`created_at` bigint not null default 0,
|
||||
`created_by` varchar(64) not null default '',
|
||||
`updated_at` bigint not null default 0,
|
||||
`updated_by` varchar(64) not null default '',
|
||||
PRIMARY KEY (`id`)
|
||||
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
|
||||
|
||||
CREATE TABLE `builtin_cate` (
|
||||
`id` bigint unsigned not null auto_increment,
|
||||
`name` varchar(191) not null,
|
||||
`user_id` bigint not null default 0,
|
||||
PRIMARY KEY (`id`)
|
||||
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
|
||||
|
||||
CREATE TABLE `notify_tpl` (
|
||||
`id` bigint unsigned not null auto_increment,
|
||||
`channel` varchar(32) not null,
|
||||
`name` varchar(255) not null,
|
||||
`content` text not null,
|
||||
PRIMARY KEY (`id`),
|
||||
UNIQUE KEY (`channel`)
|
||||
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
|
||||
|
||||
CREATE TABLE `sso_config` (
|
||||
`id` bigint unsigned not null auto_increment,
|
||||
`name` varchar(191) not null,
|
||||
`content` text not null,
|
||||
PRIMARY KEY (`id`),
|
||||
UNIQUE KEY (`name`)
|
||||
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
|
||||
@@ -1,69 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"os/signal"
|
||||
"syscall"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert"
|
||||
"github.com/ccfos/nightingale/v6/pkg/osx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/version"
|
||||
|
||||
"github.com/toolkits/pkg/runner"
|
||||
)
|
||||
|
||||
var (
|
||||
showVersion = flag.Bool("version", false, "Show version.")
|
||||
configDir = flag.String("configs", osx.GetEnv("N9E_CONFIGS", "etc"), "Specify configuration directory.(env:N9E_CONFIGS)")
|
||||
cryptoKey = flag.String("crypto-key", "", "Specify the secret key for configuration file field encryption.")
|
||||
)
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
if *showVersion {
|
||||
fmt.Println(version.Version)
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
printEnv()
|
||||
|
||||
cleanFunc, err := alert.Initialize(*configDir, *cryptoKey)
|
||||
if err != nil {
|
||||
log.Fatalln("failed to initialize:", err)
|
||||
}
|
||||
|
||||
code := 1
|
||||
sc := make(chan os.Signal, 1)
|
||||
signal.Notify(sc, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT)
|
||||
|
||||
EXIT:
|
||||
for {
|
||||
sig := <-sc
|
||||
fmt.Println("received signal:", sig.String())
|
||||
switch sig {
|
||||
case syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGINT:
|
||||
code = 0
|
||||
break EXIT
|
||||
case syscall.SIGHUP:
|
||||
// reload configuration?
|
||||
default:
|
||||
break EXIT
|
||||
}
|
||||
}
|
||||
|
||||
cleanFunc()
|
||||
fmt.Println("process exited")
|
||||
os.Exit(code)
|
||||
}
|
||||
|
||||
func printEnv() {
|
||||
runner.Init()
|
||||
fmt.Println("runner.cwd:", runner.Cwd)
|
||||
fmt.Println("runner.hostname:", runner.Hostname)
|
||||
fmt.Println("runner.fd_limits:", runner.FdLimits())
|
||||
fmt.Println("runner.vm_limits:", runner.VMLimits())
|
||||
}
|
||||
@@ -1,69 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"os/signal"
|
||||
"syscall"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/center"
|
||||
"github.com/ccfos/nightingale/v6/pkg/osx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/version"
|
||||
|
||||
"github.com/toolkits/pkg/runner"
|
||||
)
|
||||
|
||||
var (
|
||||
showVersion = flag.Bool("version", false, "Show version.")
|
||||
configDir = flag.String("configs", osx.GetEnv("N9E_CONFIGS", "etc"), "Specify configuration directory.(env:N9E_CONFIGS)")
|
||||
cryptoKey = flag.String("crypto-key", "", "Specify the secret key for configuration file field encryption.")
|
||||
)
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
if *showVersion {
|
||||
fmt.Println(version.Version)
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
printEnv()
|
||||
|
||||
cleanFunc, err := center.Initialize(*configDir, *cryptoKey)
|
||||
if err != nil {
|
||||
log.Fatalln("failed to initialize:", err)
|
||||
}
|
||||
|
||||
code := 1
|
||||
sc := make(chan os.Signal, 1)
|
||||
signal.Notify(sc, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT)
|
||||
|
||||
EXIT:
|
||||
for {
|
||||
sig := <-sc
|
||||
fmt.Println("received signal:", sig.String())
|
||||
switch sig {
|
||||
case syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGINT:
|
||||
code = 0
|
||||
break EXIT
|
||||
case syscall.SIGHUP:
|
||||
// reload configuration?
|
||||
default:
|
||||
break EXIT
|
||||
}
|
||||
}
|
||||
|
||||
cleanFunc()
|
||||
fmt.Println("process exited")
|
||||
os.Exit(code)
|
||||
}
|
||||
|
||||
func printEnv() {
|
||||
runner.Init()
|
||||
fmt.Println("runner.cwd:", runner.Cwd)
|
||||
fmt.Println("runner.hostname:", runner.Hostname)
|
||||
fmt.Println("runner.fd_limits:", runner.FdLimits())
|
||||
fmt.Println("runner.vm_limits:", runner.VMLimits())
|
||||
}
|
||||
@@ -1,40 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/cli"
|
||||
"github.com/ccfos/nightingale/v6/pkg/version"
|
||||
)
|
||||
|
||||
var (
|
||||
upgrade = flag.Bool("upgrade", false, "Upgrade the database.")
|
||||
showVersion = flag.Bool("version", false, "Show version.")
|
||||
configFile = flag.String("config", "", "Specify webapi.conf of v5.x version")
|
||||
)
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
if *showVersion {
|
||||
fmt.Println(version.Version)
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
if *upgrade {
|
||||
if *configFile == "" {
|
||||
fmt.Println("Please specify the configuration directory.")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
err := cli.Upgrade(*configFile)
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
os.Exit(1)
|
||||
}
|
||||
fmt.Print("Upgrade successfully.")
|
||||
os.Exit(0)
|
||||
}
|
||||
}
|
||||
@@ -1,69 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"os/signal"
|
||||
"syscall"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/pkg/osx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/version"
|
||||
"github.com/ccfos/nightingale/v6/pushgw"
|
||||
|
||||
"github.com/toolkits/pkg/runner"
|
||||
)
|
||||
|
||||
var (
|
||||
showVersion = flag.Bool("version", false, "Show version.")
|
||||
configDir = flag.String("configs", osx.GetEnv("N9E_CONFIGS", "etc"), "Specify configuration directory.(env:N9E_CONFIGS)")
|
||||
cryptoKey = flag.String("crypto-key", "", "Specify the secret key for configuration file field encryption.")
|
||||
)
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
if *showVersion {
|
||||
fmt.Println(version.Version)
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
printEnv()
|
||||
|
||||
cleanFunc, err := pushgw.Initialize(*configDir, *cryptoKey)
|
||||
if err != nil {
|
||||
log.Fatalln("failed to initialize:", err)
|
||||
}
|
||||
|
||||
code := 1
|
||||
sc := make(chan os.Signal, 1)
|
||||
signal.Notify(sc, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT)
|
||||
|
||||
EXIT:
|
||||
for {
|
||||
sig := <-sc
|
||||
fmt.Println("received signal:", sig.String())
|
||||
switch sig {
|
||||
case syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGINT:
|
||||
code = 0
|
||||
break EXIT
|
||||
case syscall.SIGHUP:
|
||||
// reload configuration?
|
||||
default:
|
||||
break EXIT
|
||||
}
|
||||
}
|
||||
|
||||
cleanFunc()
|
||||
fmt.Println("process exited")
|
||||
os.Exit(code)
|
||||
}
|
||||
|
||||
func printEnv() {
|
||||
runner.Init()
|
||||
fmt.Println("runner.cwd:", runner.Cwd)
|
||||
fmt.Println("runner.hostname:", runner.Hostname)
|
||||
fmt.Println("runner.fd_limits:", runner.FdLimits())
|
||||
fmt.Println("runner.vm_limits:", runner.VMLimits())
|
||||
}
|
||||
76
conf/conf.go
76
conf/conf.go
@@ -1,76 +0,0 @@
|
||||
package conf
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/aconf"
|
||||
"github.com/ccfos/nightingale/v6/center/cconf"
|
||||
"github.com/ccfos/nightingale/v6/pkg/cfg"
|
||||
"github.com/ccfos/nightingale/v6/pkg/httpx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/logx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ormx"
|
||||
"github.com/ccfos/nightingale/v6/pushgw/pconf"
|
||||
"github.com/ccfos/nightingale/v6/storage"
|
||||
)
|
||||
|
||||
type ConfigType struct {
|
||||
Global GlobalConfig
|
||||
Log logx.Config
|
||||
HTTP httpx.Config
|
||||
DB ormx.DBConfig
|
||||
Redis storage.RedisConfig
|
||||
|
||||
Pushgw pconf.Pushgw
|
||||
Alert aconf.Alert
|
||||
Center cconf.Center
|
||||
}
|
||||
|
||||
type GlobalConfig struct {
|
||||
RunMode string
|
||||
}
|
||||
|
||||
func InitConfig(configDir, cryptoKey string) (*ConfigType, error) {
|
||||
var config = new(ConfigType)
|
||||
|
||||
if err := cfg.LoadConfigByDir(configDir, config); err != nil {
|
||||
return nil, fmt.Errorf("failed to load configs of directory: %s error: %s", configDir, err)
|
||||
}
|
||||
|
||||
config.Pushgw.PreCheck()
|
||||
config.Alert.PreCheck()
|
||||
config.Center.PreCheck()
|
||||
|
||||
err := decryptConfig(config, cryptoKey)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if config.Alert.Heartbeat.IP == "" {
|
||||
// auto detect
|
||||
// config.Alert.Heartbeat.IP = fmt.Sprint(GetOutboundIP())
|
||||
// 自动获取IP在有些环境下容易出错,这里用hostname+pid来作唯一标识
|
||||
|
||||
hostname, err := os.Hostname()
|
||||
if err != nil {
|
||||
fmt.Println("failed to get hostname:", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if strings.Contains(hostname, "localhost") {
|
||||
fmt.Println("Warning! hostname contains substring localhost, setting a more unique hostname is recommended")
|
||||
}
|
||||
|
||||
config.Alert.Heartbeat.IP = hostname
|
||||
|
||||
// if config.Alert.Heartbeat.IP == "" {
|
||||
// fmt.Println("heartbeat ip auto got is blank")
|
||||
// os.Exit(1)
|
||||
// }
|
||||
}
|
||||
|
||||
config.Alert.Heartbeat.Endpoint = fmt.Sprintf("%s:%d", config.Alert.Heartbeat.IP, config.HTTP.Port)
|
||||
|
||||
return config, nil
|
||||
}
|
||||
@@ -1,62 +0,0 @@
|
||||
package conf
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/pkg/secu"
|
||||
)
|
||||
|
||||
func decryptConfig(config *ConfigType, cryptoKey string) error {
|
||||
decryptDsn, err := secu.DealWithDecrypt(config.DB.DSN, cryptoKey)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to decrypt the db dsn: %s", err)
|
||||
}
|
||||
|
||||
config.DB.DSN = decryptDsn
|
||||
|
||||
for k := range config.HTTP.Alert.BasicAuth {
|
||||
decryptPwd, err := secu.DealWithDecrypt(config.HTTP.Alert.BasicAuth[k], cryptoKey)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to decrypt http basic auth password: %s", err)
|
||||
}
|
||||
|
||||
config.HTTP.Alert.BasicAuth[k] = decryptPwd
|
||||
}
|
||||
|
||||
for k := range config.HTTP.Pushgw.BasicAuth {
|
||||
decryptPwd, err := secu.DealWithDecrypt(config.HTTP.Pushgw.BasicAuth[k], cryptoKey)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to decrypt http basic auth password: %s", err)
|
||||
}
|
||||
|
||||
config.HTTP.Pushgw.BasicAuth[k] = decryptPwd
|
||||
}
|
||||
|
||||
for k := range config.HTTP.Heartbeat.BasicAuth {
|
||||
decryptPwd, err := secu.DealWithDecrypt(config.HTTP.Heartbeat.BasicAuth[k], cryptoKey)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to decrypt http basic auth password: %s", err)
|
||||
}
|
||||
|
||||
config.HTTP.Heartbeat.BasicAuth[k] = decryptPwd
|
||||
}
|
||||
|
||||
for k := range config.HTTP.Service.BasicAuth {
|
||||
decryptPwd, err := secu.DealWithDecrypt(config.HTTP.Service.BasicAuth[k], cryptoKey)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to decrypt http basic auth password: %s", err)
|
||||
}
|
||||
config.HTTP.Service.BasicAuth[k] = decryptPwd
|
||||
}
|
||||
|
||||
for i, v := range config.Pushgw.Writers {
|
||||
decryptWriterPwd, err := secu.DealWithDecrypt(v.BasicAuthPass, cryptoKey)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to decrypt writer basic auth password: %s", err)
|
||||
}
|
||||
|
||||
config.Pushgw.Writers[i].BasicAuthPass = decryptWriterPwd
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 625 KiB |
@@ -1,4 +1,5 @@
|
||||
FROM python:3-slim
|
||||
FROM python:2.7.8-slim
|
||||
#FROM python:2
|
||||
#FROM ubuntu:21.04
|
||||
|
||||
WORKDIR /app
|
||||
@@ -8,6 +9,7 @@ RUN mkdir -p /app/pub && chmod +x /wait
|
||||
ADD pub /app/pub/
|
||||
RUN chmod +x n9e
|
||||
|
||||
EXPOSE 17000
|
||||
EXPOSE 19000
|
||||
EXPOSE 18000
|
||||
|
||||
CMD ["/app/n9e", "-h"]
|
||||
|
||||
@@ -1,13 +1,14 @@
|
||||
FROM --platform=$TARGETPLATFORM python:3-slim
|
||||
FROM --platform=$BUILDPLATFORM python:2.7.8-slim
|
||||
|
||||
|
||||
WORKDIR /app
|
||||
ADD n9e /app/
|
||||
ADD etc /app/
|
||||
ADD integrations /app/integrations/
|
||||
ADD --chmod=755 https://github.com/ufoscout/docker-compose-wait/releases/download/2.11.0/wait_x86_64 /wait
|
||||
ADD n9e /app
|
||||
ADD http://download.flashcat.cloud/wait /wait
|
||||
RUN mkdir -p /app/pub && chmod +x /wait
|
||||
ADD pub /app/pub/
|
||||
RUN chmod +x n9e
|
||||
|
||||
EXPOSE 17000
|
||||
EXPOSE 19000
|
||||
EXPOSE 18000
|
||||
|
||||
CMD ["/app/n9e", "-h"]
|
||||
|
||||
@@ -1,13 +0,0 @@
|
||||
FROM --platform=$TARGETPLATFORM python:3-slim
|
||||
|
||||
|
||||
WORKDIR /app
|
||||
ADD n9e /app/
|
||||
ADD etc /app/
|
||||
ADD integrations /app/integrations/
|
||||
ADD --chmod=755 https://github.com/ufoscout/docker-compose-wait/releases/download/2.11.0/wait_aarch64 /wait
|
||||
ADD pub /app/pub/
|
||||
|
||||
EXPOSE 17000
|
||||
|
||||
CMD ["/app/n9e", "-h"]
|
||||
@@ -31,7 +31,7 @@ batch = 2000
|
||||
chan_size = 10000
|
||||
|
||||
[[writers]]
|
||||
url = "http://127.0.0.1:17000/prometheus/v1/write"
|
||||
url = "http://nserver:19000/prometheus/v1/write"
|
||||
|
||||
# Basic auth username
|
||||
basic_auth_user = ""
|
||||
@@ -49,35 +49,3 @@ enable = false
|
||||
address = ":9100"
|
||||
print_access = false
|
||||
run_mode = "release"
|
||||
|
||||
[heartbeat]
|
||||
enable = true
|
||||
|
||||
# report os version cpu.util mem.util metadata
|
||||
url = "http://127.0.0.1:17000/v1/n9e/heartbeat"
|
||||
|
||||
# interval, unit: s
|
||||
interval = 10
|
||||
|
||||
# Basic auth username
|
||||
basic_auth_user = ""
|
||||
|
||||
# Basic auth password
|
||||
basic_auth_pass = ""
|
||||
|
||||
## Optional headers
|
||||
# headers = ["X-From", "categraf", "X-Xyz", "abc"]
|
||||
|
||||
# timeout settings, unit: ms
|
||||
timeout = 5000
|
||||
dial_timeout = 2500
|
||||
max_idle_conns_per_host = 100
|
||||
|
||||
[ibex]
|
||||
enable = true
|
||||
## ibex flush interval
|
||||
interval = "1000ms"
|
||||
## n9e ibex server rpc address
|
||||
servers = ["127.0.0.1:20090"]
|
||||
## temp script dir
|
||||
meta_dir = "./meta"
|
||||
|
||||
35
docker/categraf/conf/logs.toml
Normal file
35
docker/categraf/conf/logs.toml
Normal file
@@ -0,0 +1,35 @@
|
||||
[logs]
|
||||
## key 占位符
|
||||
api_key = "ef4ahfbwzwwtlwfpbertgq1i6mq0ab1q"
|
||||
## 是否开启日志采集
|
||||
enable = false
|
||||
## 接受日志的server地址
|
||||
send_to = "127.0.0.1:17878"
|
||||
## 发送日志的协议 http/tcp
|
||||
send_type = "http"
|
||||
## 是否压缩发送
|
||||
use_compress = false
|
||||
## 是否采用ssl
|
||||
send_with_tls = false
|
||||
##
|
||||
batch_wait = 5
|
||||
## 日志offset信息保存目录
|
||||
run_path = "/opt/categraf/run"
|
||||
## 最多同时采集多少个日志文件
|
||||
open_files_limit = 100
|
||||
## 定期扫描目录下是否有新增日志
|
||||
scan_period = 10
|
||||
##
|
||||
frame_size = 10
|
||||
##
|
||||
collect_container_all = true
|
||||
## 全局的处理规则
|
||||
[[logs.Processing_rules]]
|
||||
## 单个日志采集配置
|
||||
[[logs.items]]
|
||||
## file/journald
|
||||
type = "file"
|
||||
## type=file时 path必填,type=journald时 port必填
|
||||
path = "/opt/tomcat/logs/*.txt"
|
||||
source = "tomcat"
|
||||
service = "my_service"
|
||||
@@ -1,12 +1,18 @@
|
||||
version: "3.7"
|
||||
|
||||
networks:
|
||||
nightingale:
|
||||
driver: bridge
|
||||
|
||||
services:
|
||||
mysql:
|
||||
# platform: linux/x86_64
|
||||
platform: linux/x86_64
|
||||
image: "mysql:5.7"
|
||||
container_name: mysql
|
||||
hostname: mysql
|
||||
restart: always
|
||||
ports:
|
||||
- "3306:3306"
|
||||
environment:
|
||||
TZ: Asia/Shanghai
|
||||
MYSQL_ROOT_PASSWORD: 1234
|
||||
@@ -14,19 +20,23 @@ services:
|
||||
- ./mysqldata:/var/lib/mysql/
|
||||
- ./initsql:/docker-entrypoint-initdb.d/
|
||||
- ./mysqletc/my.cnf:/etc/my.cnf
|
||||
network_mode: host
|
||||
networks:
|
||||
- nightingale
|
||||
|
||||
redis:
|
||||
image: "redis:6.2"
|
||||
container_name: redis
|
||||
hostname: redis
|
||||
restart: always
|
||||
ports:
|
||||
- "6379:6379"
|
||||
environment:
|
||||
TZ: Asia/Shanghai
|
||||
network_mode: host
|
||||
networks:
|
||||
- nightingale
|
||||
|
||||
prometheus:
|
||||
image: prom/prometheus
|
||||
image: prom/prometheus:v2.37.5
|
||||
container_name: prometheus
|
||||
hostname: prometheus
|
||||
restart: always
|
||||
@@ -34,13 +44,16 @@ services:
|
||||
TZ: Asia/Shanghai
|
||||
volumes:
|
||||
- ./prometc:/etc/prometheus
|
||||
network_mode: host
|
||||
ports:
|
||||
- "9090:9090"
|
||||
networks:
|
||||
- nightingale
|
||||
command:
|
||||
- "--config.file=/etc/prometheus/prometheus.yml"
|
||||
- "--storage.tsdb.path=/prometheus"
|
||||
- "--web.console.libraries=/usr/share/prometheus/console_libraries"
|
||||
- "--web.console.templates=/usr/share/prometheus/consoles"
|
||||
- "--enable-feature=remote-write-receiver"
|
||||
- "--web.enable-remote-write-receiver"
|
||||
- "--query.lookback-delta=2m"
|
||||
|
||||
ibex:
|
||||
@@ -51,34 +64,76 @@ services:
|
||||
environment:
|
||||
GIN_MODE: release
|
||||
TZ: Asia/Shanghai
|
||||
WAIT_HOSTS: 127.0.0.1:3306
|
||||
WAIT_HOSTS: mysql:3306
|
||||
ports:
|
||||
- "10090:10090"
|
||||
- "20090:20090"
|
||||
volumes:
|
||||
- ./ibexetc:/app/etc
|
||||
network_mode: host
|
||||
networks:
|
||||
- nightingale
|
||||
depends_on:
|
||||
- mysql
|
||||
links:
|
||||
- mysql:mysql
|
||||
command: >
|
||||
sh -c "/wait && /app/ibex server"
|
||||
|
||||
n9e:
|
||||
nwebapi:
|
||||
image: flashcatcloud/nightingale:latest
|
||||
container_name: n9e
|
||||
hostname: n9e
|
||||
container_name: nwebapi
|
||||
hostname: nwebapi
|
||||
restart: always
|
||||
environment:
|
||||
GIN_MODE: release
|
||||
TZ: Asia/Shanghai
|
||||
WAIT_HOSTS: 127.0.0.1:3306, 127.0.0.1:6379
|
||||
WAIT_HOSTS: mysql:3306, redis:6379
|
||||
volumes:
|
||||
- ../etc:/app/etc
|
||||
network_mode: host
|
||||
- ./n9eetc:/app/etc
|
||||
ports:
|
||||
- "18000:18000"
|
||||
networks:
|
||||
- nightingale
|
||||
depends_on:
|
||||
- mysql
|
||||
- redis
|
||||
- prometheus
|
||||
- ibex
|
||||
links:
|
||||
- mysql:mysql
|
||||
- redis:redis
|
||||
- prometheus:prometheus
|
||||
- ibex:ibex
|
||||
command: >
|
||||
sh -c "/wait && /app/n9e"
|
||||
sh -c "/wait && /app/n9e webapi"
|
||||
|
||||
nserver:
|
||||
image: flashcatcloud/nightingale:latest
|
||||
container_name: nserver
|
||||
hostname: nserver
|
||||
restart: always
|
||||
environment:
|
||||
GIN_MODE: release
|
||||
TZ: Asia/Shanghai
|
||||
WAIT_HOSTS: mysql:3306, redis:6379
|
||||
volumes:
|
||||
- ./n9eetc:/app/etc
|
||||
ports:
|
||||
- "19000:19000"
|
||||
networks:
|
||||
- nightingale
|
||||
depends_on:
|
||||
- mysql
|
||||
- redis
|
||||
- prometheus
|
||||
- ibex
|
||||
links:
|
||||
- mysql:mysql
|
||||
- redis:redis
|
||||
- prometheus:prometheus
|
||||
- ibex:ibex
|
||||
command: >
|
||||
sh -c "/wait && /app/n9e server"
|
||||
|
||||
categraf:
|
||||
image: "flashcatcloud/categraf:latest"
|
||||
@@ -94,7 +149,31 @@ services:
|
||||
- ./categraf/conf:/etc/categraf/conf
|
||||
- /:/hostfs
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
network_mode: host
|
||||
ports:
|
||||
- "9100:9100/tcp"
|
||||
networks:
|
||||
- nightingale
|
||||
depends_on:
|
||||
- n9e
|
||||
- ibex
|
||||
- nserver
|
||||
links:
|
||||
- nserver:nserver
|
||||
|
||||
agentd:
|
||||
image: ulric2019/ibex:0.3
|
||||
container_name: agentd
|
||||
hostname: agentd
|
||||
restart: always
|
||||
environment:
|
||||
GIN_MODE: release
|
||||
TZ: Asia/Shanghai
|
||||
volumes:
|
||||
- ./ibexetc:/app/etc
|
||||
networks:
|
||||
- nightingale
|
||||
depends_on:
|
||||
- ibex
|
||||
links:
|
||||
- ibex:ibex
|
||||
command:
|
||||
- "/app/ibex"
|
||||
- "agentd"
|
||||
|
||||
@@ -1,83 +0,0 @@
|
||||
[global]
|
||||
# whether print configs
|
||||
print_configs = false
|
||||
|
||||
# add label(agent_hostname) to series
|
||||
# "" -> auto detect hostname
|
||||
# "xx" -> use specified string xx
|
||||
# "$hostname" -> auto detect hostname
|
||||
# "$ip" -> auto detect ip
|
||||
# "$hostname-$ip" -> auto detect hostname and ip to replace the vars
|
||||
hostname = "$HOSTNAME"
|
||||
|
||||
# will not add label(agent_hostname) if true
|
||||
omit_hostname = false
|
||||
|
||||
# s | ms
|
||||
precision = "ms"
|
||||
|
||||
# global collect interval
|
||||
interval = 15
|
||||
|
||||
[global.labels]
|
||||
source="categraf"
|
||||
# region = "shanghai"
|
||||
# env = "localhost"
|
||||
|
||||
[writer_opt]
|
||||
# default: 2000
|
||||
batch = 2000
|
||||
# channel(as queue) size
|
||||
chan_size = 10000
|
||||
|
||||
[[writers]]
|
||||
url = "http://n9e:17000/prometheus/v1/write"
|
||||
|
||||
# Basic auth username
|
||||
basic_auth_user = ""
|
||||
|
||||
# Basic auth password
|
||||
basic_auth_pass = ""
|
||||
|
||||
# timeout settings, unit: ms
|
||||
timeout = 5000
|
||||
dial_timeout = 2500
|
||||
max_idle_conns_per_host = 100
|
||||
|
||||
[http]
|
||||
enable = false
|
||||
address = ":9100"
|
||||
print_access = false
|
||||
run_mode = "release"
|
||||
|
||||
[heartbeat]
|
||||
enable = true
|
||||
|
||||
# report os version cpu.util mem.util metadata
|
||||
url = "http://n9e:17000/v1/n9e/heartbeat"
|
||||
|
||||
# interval, unit: s
|
||||
interval = 10
|
||||
|
||||
# Basic auth username
|
||||
basic_auth_user = ""
|
||||
|
||||
# Basic auth password
|
||||
basic_auth_pass = ""
|
||||
|
||||
## Optional headers
|
||||
# headers = ["X-From", "categraf", "X-Xyz", "abc"]
|
||||
|
||||
# timeout settings, unit: ms
|
||||
timeout = 5000
|
||||
dial_timeout = 2500
|
||||
max_idle_conns_per_host = 100
|
||||
|
||||
[ibex]
|
||||
enable = true
|
||||
## ibex flush interval
|
||||
interval = "1000ms"
|
||||
## n9e ibex server rpc address
|
||||
servers = ["ibex:20090"]
|
||||
## temp script dir
|
||||
meta_dir = "./meta"
|
||||
@@ -1,5 +0,0 @@
|
||||
# # collect interval
|
||||
# interval = 15
|
||||
|
||||
# # whether collect per cpu
|
||||
# collect_per_cpu = false
|
||||
@@ -1,11 +0,0 @@
|
||||
# # collect interval
|
||||
# interval = 15
|
||||
|
||||
# # By default stats will be gathered for all mount points.
|
||||
# # Set mount_points will restrict the stats to only the specified mount points.
|
||||
# mount_points = ["/"]
|
||||
|
||||
# Ignore mount points by filesystem type.
|
||||
ignore_fs = ["tmpfs", "devtmpfs", "devfs", "iso9660", "overlay", "aufs", "squashfs"]
|
||||
|
||||
ignore_mount_points = ["/boot"]
|
||||
@@ -1,6 +0,0 @@
|
||||
# # collect interval
|
||||
# interval = 15
|
||||
|
||||
# # By default, categraf will gather stats for all devices including disk partitions.
|
||||
# # Setting devices will restrict the stats to the specified devices.
|
||||
# devices = ["sda", "sdb", "vd*"]
|
||||
@@ -1,63 +0,0 @@
|
||||
# # collect interval
|
||||
# interval = 15
|
||||
|
||||
[[instances]]
|
||||
# # append some labels for series
|
||||
# labels = { region="cloud", product="n9e" }
|
||||
|
||||
# # interval = global.interval * interval_times
|
||||
# interval_times = 1
|
||||
|
||||
## Docker Endpoint
|
||||
## To use TCP, set endpoint = "tcp://[ip]:[port]"
|
||||
## To use environment variables (ie, docker-machine), set endpoint = "ENV"
|
||||
endpoint = "unix:///var/run/docker.sock"
|
||||
|
||||
## Set to true to collect Swarm metrics(desired_replicas, running_replicas)
|
||||
gather_services = false
|
||||
gather_extend_memstats = false
|
||||
|
||||
container_id_label_enable = true
|
||||
container_id_label_short_style = true
|
||||
|
||||
## Containers to include and exclude. Globs accepted.
|
||||
## Note that an empty array for both will include all containers
|
||||
container_name_include = []
|
||||
container_name_exclude = []
|
||||
|
||||
## Container states to include and exclude. Globs accepted.
|
||||
## When empty only containers in the "running" state will be captured.
|
||||
## example: container_state_include = ["created", "restarting", "running", "removing", "paused", "exited", "dead"]
|
||||
## example: container_state_exclude = ["created", "restarting", "running", "removing", "paused", "exited", "dead"]
|
||||
# container_state_include = []
|
||||
# container_state_exclude = []
|
||||
|
||||
## Timeout for docker list, info, and stats commands
|
||||
timeout = "5s"
|
||||
|
||||
## Specifies for which classes a per-device metric should be issued
|
||||
## Possible values are 'cpu' (cpu0, cpu1, ...), 'blkio' (8:0, 8:1, ...) and 'network' (eth0, eth1, ...)
|
||||
## Please note that this setting has no effect if 'perdevice' is set to 'true'
|
||||
perdevice_include = []
|
||||
|
||||
## Specifies for which classes a total metric should be issued. Total is an aggregated of the 'perdevice' values.
|
||||
## Possible values are 'cpu', 'blkio' and 'network'
|
||||
## Total 'cpu' is reported directly by Docker daemon, and 'network' and 'blkio' totals are aggregated by this plugin.
|
||||
## Please note that this setting has no effect if 'total' is set to 'false'
|
||||
total_include = ["cpu", "blkio", "network"]
|
||||
|
||||
## Which environment variables should we use as a tag
|
||||
##tag_env = ["JAVA_HOME", "HEAP_SIZE"]
|
||||
|
||||
## docker labels to include and exclude as tags. Globs accepted.
|
||||
## Note that an empty array for both will include all labels as tags
|
||||
docker_label_include = []
|
||||
docker_label_exclude = ["annotation*", "io.kubernetes*", "*description*", "*maintainer*", "*hash", "*author*"]
|
||||
|
||||
## Optional TLS Config
|
||||
# use_tls = false
|
||||
# tls_ca = "/etc/telegraf/ca.pem"
|
||||
# tls_cert = "/etc/telegraf/cert.pem"
|
||||
# tls_key = "/etc/telegraf/key.pem"
|
||||
## Use TLS but skip chain & host verification
|
||||
# insecure_skip_verify = false
|
||||
@@ -1,2 +0,0 @@
|
||||
# # collect interval
|
||||
# interval = 15
|
||||
@@ -1,124 +0,0 @@
|
||||
# # collect interval
|
||||
# interval = 15
|
||||
|
||||
# file: /proc/vmstat
|
||||
[white_list]
|
||||
oom_kill = 1
|
||||
nr_free_pages = 0
|
||||
nr_alloc_batch = 0
|
||||
nr_inactive_anon = 0
|
||||
nr_active_anon = 0
|
||||
nr_inactive_file = 0
|
||||
nr_active_file = 0
|
||||
nr_unevictable = 0
|
||||
nr_mlock = 0
|
||||
nr_anon_pages = 0
|
||||
nr_mapped = 0
|
||||
nr_file_pages = 0
|
||||
nr_dirty = 0
|
||||
nr_writeback = 0
|
||||
nr_slab_reclaimable = 0
|
||||
nr_slab_unreclaimable = 0
|
||||
nr_page_table_pages = 0
|
||||
nr_kernel_stack = 0
|
||||
nr_unstable = 0
|
||||
nr_bounce = 0
|
||||
nr_vmscan_write = 0
|
||||
nr_vmscan_immediate_reclaim = 0
|
||||
nr_writeback_temp = 0
|
||||
nr_isolated_anon = 0
|
||||
nr_isolated_file = 0
|
||||
nr_shmem = 0
|
||||
nr_dirtied = 0
|
||||
nr_written = 0
|
||||
numa_hit = 0
|
||||
numa_miss = 0
|
||||
numa_foreign = 0
|
||||
numa_interleave = 0
|
||||
numa_local = 0
|
||||
numa_other = 0
|
||||
workingset_refault = 0
|
||||
workingset_activate = 0
|
||||
workingset_nodereclaim = 0
|
||||
nr_anon_transparent_hugepages = 0
|
||||
nr_free_cma = 0
|
||||
nr_dirty_threshold = 0
|
||||
nr_dirty_background_threshold = 0
|
||||
pgpgin = 0
|
||||
pgpgout = 0
|
||||
pswpin = 0
|
||||
pswpout = 0
|
||||
pgalloc_dma = 0
|
||||
pgalloc_dma32 = 0
|
||||
pgalloc_normal = 0
|
||||
pgalloc_movable = 0
|
||||
pgfree = 0
|
||||
pgactivate = 0
|
||||
pgdeactivate = 0
|
||||
pgfault = 0
|
||||
pgmajfault = 0
|
||||
pglazyfreed = 0
|
||||
pgrefill_dma = 0
|
||||
pgrefill_dma32 = 0
|
||||
pgrefill_normal = 0
|
||||
pgrefill_movable = 0
|
||||
pgsteal_kswapd_dma = 0
|
||||
pgsteal_kswapd_dma32 = 0
|
||||
pgsteal_kswapd_normal = 0
|
||||
pgsteal_kswapd_movable = 0
|
||||
pgsteal_direct_dma = 0
|
||||
pgsteal_direct_dma32 = 0
|
||||
pgsteal_direct_normal = 0
|
||||
pgsteal_direct_movable = 0
|
||||
pgscan_kswapd_dma = 0
|
||||
pgscan_kswapd_dma32 = 0
|
||||
pgscan_kswapd_normal = 0
|
||||
pgscan_kswapd_movable = 0
|
||||
pgscan_direct_dma = 0
|
||||
pgscan_direct_dma32 = 0
|
||||
pgscan_direct_normal = 0
|
||||
pgscan_direct_movable = 0
|
||||
pgscan_direct_throttle = 0
|
||||
zone_reclaim_failed = 0
|
||||
pginodesteal = 0
|
||||
slabs_scanned = 0
|
||||
kswapd_inodesteal = 0
|
||||
kswapd_low_wmark_hit_quickly = 0
|
||||
kswapd_high_wmark_hit_quickly = 0
|
||||
pageoutrun = 0
|
||||
allocstall = 0
|
||||
pgrotated = 0
|
||||
drop_pagecache = 0
|
||||
drop_slab = 0
|
||||
numa_pte_updates = 0
|
||||
numa_huge_pte_updates = 0
|
||||
numa_hint_faults = 0
|
||||
numa_hint_faults_local = 0
|
||||
numa_pages_migrated = 0
|
||||
pgmigrate_success = 0
|
||||
pgmigrate_fail = 0
|
||||
compact_migrate_scanned = 0
|
||||
compact_free_scanned = 0
|
||||
compact_isolated = 0
|
||||
compact_stall = 0
|
||||
compact_fail = 0
|
||||
compact_success = 0
|
||||
htlb_buddy_alloc_success = 0
|
||||
htlb_buddy_alloc_fail = 0
|
||||
unevictable_pgs_culled = 0
|
||||
unevictable_pgs_scanned = 0
|
||||
unevictable_pgs_rescued = 0
|
||||
unevictable_pgs_mlocked = 0
|
||||
unevictable_pgs_munlocked = 0
|
||||
unevictable_pgs_cleared = 0
|
||||
unevictable_pgs_stranded = 0
|
||||
thp_fault_alloc = 0
|
||||
thp_fault_fallback = 0
|
||||
thp_collapse_alloc = 0
|
||||
thp_collapse_alloc_failed = 0
|
||||
thp_split = 0
|
||||
thp_zero_page_alloc = 0
|
||||
thp_zero_page_alloc_failed = 0
|
||||
balloon_inflate = 0
|
||||
balloon_deflate = 0
|
||||
balloon_migrate = 0
|
||||
@@ -1,2 +0,0 @@
|
||||
# # collect interval
|
||||
# interval = 15
|
||||
@@ -1,5 +0,0 @@
|
||||
# # collect interval
|
||||
# interval = 15
|
||||
|
||||
# # whether collect platform specified metrics
|
||||
collect_platform_fields = true
|
||||
@@ -1,8 +0,0 @@
|
||||
# # collect interval
|
||||
# interval = 15
|
||||
|
||||
# # whether collect protocol stats on Linux
|
||||
# collect_protocol_stats = false
|
||||
|
||||
# # setting interfaces will tell categraf to gather these explicit interfaces
|
||||
# interfaces = ["eth0"]
|
||||
@@ -1,2 +0,0 @@
|
||||
# # collect interval
|
||||
# interval = 15
|
||||
@@ -1,8 +0,0 @@
|
||||
# # collect interval
|
||||
# interval = 15
|
||||
|
||||
# # force use ps command to gather
|
||||
# force_ps = false
|
||||
|
||||
# # force use /proc to gather
|
||||
# force_proc = false
|
||||
@@ -1,5 +0,0 @@
|
||||
# # collect interval
|
||||
# interval = 15
|
||||
|
||||
# # whether collect metric: system_n_users
|
||||
# collect_user_number = false
|
||||
@@ -1,10 +0,0 @@
|
||||
[prometheus]
|
||||
enable=true
|
||||
scrape_config_file="/etc/prometheus/prometheus.yml"
|
||||
## log level, debug warn info error
|
||||
log_level="info"
|
||||
## wal file storage path ,default ./data-agent
|
||||
# wal_storage_path="/path/to/storage"
|
||||
## wal reserve time duration, default value is 2 hour
|
||||
# wal_min_duration=2
|
||||
|
||||
@@ -1,129 +0,0 @@
|
||||
version: "3.7"
|
||||
|
||||
networks:
|
||||
nightingale:
|
||||
driver: bridge
|
||||
|
||||
services:
|
||||
postgres:
|
||||
# platform: linux/x86_64
|
||||
image: "postgres:12-alpine"
|
||||
container_name: postgres
|
||||
hostname: postgres
|
||||
restart: always
|
||||
ports:
|
||||
- "5432:5432"
|
||||
environment:
|
||||
TZ: Asia/Shanghai
|
||||
POSTGRES_USER: root
|
||||
POSTGRES_PASSWORD: 1234
|
||||
POSTGRES_DB: n9e_v6
|
||||
PGDATA: /var/lib/postgresql/data/pgdata
|
||||
volumes:
|
||||
- ./pgdata:/var/lib/postgresql/data
|
||||
- ./initsql_for_postgres:/docker-entrypoint-initdb.d/
|
||||
networks:
|
||||
- nightingale
|
||||
|
||||
redis:
|
||||
image: "redis:7.0-alpine"
|
||||
container_name: redis
|
||||
hostname: redis
|
||||
restart: always
|
||||
ports:
|
||||
- "6379:6379"
|
||||
environment:
|
||||
TZ: Asia/Shanghai
|
||||
networks:
|
||||
- nightingale
|
||||
|
||||
victoriametrics:
|
||||
image: victoriametrics/victoria-metrics:v1.79.12
|
||||
container_name: victoriametrics
|
||||
hostname: victoriametrics
|
||||
restart: always
|
||||
environment:
|
||||
TZ: Asia/Shanghai
|
||||
ports:
|
||||
- "8428:8428"
|
||||
networks:
|
||||
- nightingale
|
||||
command:
|
||||
- "--loggerTimezone=Asia/Shanghai"
|
||||
|
||||
ibex:
|
||||
image: ulric2019/ibex:0.3
|
||||
container_name: ibex
|
||||
hostname: ibex
|
||||
restart: always
|
||||
environment:
|
||||
GIN_MODE: release
|
||||
TZ: Asia/Shanghai
|
||||
WAIT_HOSTS: postgres:5432
|
||||
ports:
|
||||
- "10090:10090"
|
||||
- "20090:20090"
|
||||
volumes:
|
||||
- ./ibexetc_pg:/app/etc
|
||||
networks:
|
||||
- nightingale
|
||||
depends_on:
|
||||
- postgres
|
||||
links:
|
||||
- postgres:postgres
|
||||
command: >
|
||||
sh -c "/wait && /app/ibex server"
|
||||
|
||||
n9e:
|
||||
image: flashcatcloud/nightingale:latest
|
||||
container_name: n9e
|
||||
hostname: n9e
|
||||
restart: always
|
||||
environment:
|
||||
GIN_MODE: release
|
||||
TZ: Asia/Shanghai
|
||||
WAIT_HOSTS: postgres:5432, redis:6379
|
||||
volumes:
|
||||
- ./n9eetc_pg:/app/etc
|
||||
ports:
|
||||
- "17000:17000"
|
||||
networks:
|
||||
- nightingale
|
||||
depends_on:
|
||||
- postgres
|
||||
- redis
|
||||
- victoriametrics
|
||||
- ibex
|
||||
links:
|
||||
- postgres:postgres
|
||||
- redis:redis
|
||||
- victoriametrics:victoriametrics
|
||||
- ibex:ibex
|
||||
command: >
|
||||
sh -c "/wait && /app/n9e"
|
||||
|
||||
categraf:
|
||||
image: "flashcatcloud/categraf:latest"
|
||||
container_name: "categraf"
|
||||
hostname: "categraf01"
|
||||
restart: always
|
||||
environment:
|
||||
TZ: Asia/Shanghai
|
||||
HOST_PROC: /hostfs/proc
|
||||
HOST_SYS: /hostfs/sys
|
||||
HOST_MOUNT_PREFIX: /hostfs
|
||||
volumes:
|
||||
- ./categraf/conf:/etc/categraf/conf
|
||||
- /:/hostfs
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
- ./prometc_vm:/etc/prometheus
|
||||
# ports:
|
||||
# - "9100:9100/tcp"
|
||||
networks:
|
||||
- nightingale
|
||||
depends_on:
|
||||
- n9e
|
||||
- ibex
|
||||
links:
|
||||
- n9e:n9e
|
||||
- ibex:ibex
|
||||
@@ -1,97 +0,0 @@
|
||||
# debug, release
|
||||
RunMode = "release"
|
||||
|
||||
[Log]
|
||||
# log write dir
|
||||
Dir = "logs-server"
|
||||
# log level: DEBUG INFO WARNING ERROR
|
||||
Level = "DEBUG"
|
||||
# stdout, stderr, file
|
||||
Output = "stdout"
|
||||
# # rotate by time
|
||||
# KeepHours: 4
|
||||
# # rotate by size
|
||||
# RotateNum = 3
|
||||
# # unit: MB
|
||||
# RotateSize = 256
|
||||
|
||||
[HTTP]
|
||||
Enable = true
|
||||
# http listening address
|
||||
Host = "0.0.0.0"
|
||||
# http listening port
|
||||
Port = 10090
|
||||
# https cert file path
|
||||
CertFile = ""
|
||||
# https key file path
|
||||
KeyFile = ""
|
||||
# whether print access log
|
||||
PrintAccessLog = true
|
||||
# whether enable pprof
|
||||
PProf = false
|
||||
# http graceful shutdown timeout, unit: s
|
||||
ShutdownTimeout = 30
|
||||
# max content length: 64M
|
||||
MaxContentLength = 67108864
|
||||
# http server read timeout, unit: s
|
||||
ReadTimeout = 20
|
||||
# http server write timeout, unit: s
|
||||
WriteTimeout = 40
|
||||
# http server idle timeout, unit: s
|
||||
IdleTimeout = 120
|
||||
|
||||
[BasicAuth]
|
||||
# using when call apis
|
||||
ibex = "ibex"
|
||||
|
||||
[RPC]
|
||||
Listen = "0.0.0.0:20090"
|
||||
|
||||
[Heartbeat]
|
||||
# auto detect if blank
|
||||
IP = ""
|
||||
# unit: ms
|
||||
Interval = 1000
|
||||
|
||||
[Output]
|
||||
# database | remote
|
||||
ComeFrom = "database"
|
||||
AgtdPort = 2090
|
||||
|
||||
[Gorm]
|
||||
# enable debug mode or not
|
||||
Debug = false
|
||||
# mysql postgres
|
||||
DBType = "postgres"
|
||||
# unit: s
|
||||
MaxLifetime = 7200
|
||||
# max open connections
|
||||
MaxOpenConns = 150
|
||||
# max idle connections
|
||||
MaxIdleConns = 50
|
||||
# table prefix
|
||||
TablePrefix = ""
|
||||
|
||||
[MySQL]
|
||||
# mysql address host:port
|
||||
Address = "mysql:3306"
|
||||
# mysql username
|
||||
User = "root"
|
||||
# mysql password
|
||||
Password = "1234"
|
||||
# database name
|
||||
DBName = "ibex"
|
||||
# connection params
|
||||
Parameters = "charset=utf8mb4&parseTime=True&loc=Local&allowNativePasswords=true"
|
||||
|
||||
[Postgres]
|
||||
# pg address host:port
|
||||
Address = "postgres:5432"
|
||||
# pg user
|
||||
User = "root"
|
||||
# pg password
|
||||
Password = "1234"
|
||||
# database name
|
||||
DBName = "n9e_v6"
|
||||
# ssl mode
|
||||
SSLMode = "disable"
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,143 +0,0 @@
|
||||
[Global]
|
||||
RunMode = "release"
|
||||
|
||||
[Log]
|
||||
# log write dir
|
||||
Dir = "logs"
|
||||
# log level: DEBUG INFO WARNING ERROR
|
||||
Level = "DEBUG"
|
||||
# stdout, stderr, file
|
||||
Output = "stdout"
|
||||
# # rotate by time
|
||||
# KeepHours: 4
|
||||
# # rotate by size
|
||||
# RotateNum = 3
|
||||
# # unit: MB
|
||||
# RotateSize = 256
|
||||
|
||||
[HTTP]
|
||||
# http listening address
|
||||
Host = "0.0.0.0"
|
||||
# http listening port
|
||||
Port = 17000
|
||||
# https cert file path
|
||||
CertFile = ""
|
||||
# https key file path
|
||||
KeyFile = ""
|
||||
# whether print access log
|
||||
PrintAccessLog = false
|
||||
# whether enable pprof
|
||||
PProf = false
|
||||
# expose prometheus /metrics?
|
||||
ExposeMetrics = true
|
||||
# http graceful shutdown timeout, unit: s
|
||||
ShutdownTimeout = 30
|
||||
# max content length: 64M
|
||||
MaxContentLength = 67108864
|
||||
# http server read timeout, unit: s
|
||||
ReadTimeout = 20
|
||||
# http server write timeout, unit: s
|
||||
WriteTimeout = 40
|
||||
# http server idle timeout, unit: s
|
||||
IdleTimeout = 120
|
||||
|
||||
[HTTP.Pushgw]
|
||||
Enable = true
|
||||
# [HTTP.Pushgw.BasicAuth]
|
||||
# user001 = "ccc26da7b9aba533cbb263a36c07dcc5"
|
||||
|
||||
[HTTP.Alert]
|
||||
Enable = true
|
||||
[HTTP.Alert.BasicAuth]
|
||||
user001 = "ccc26da7b9aba533cbb263a36c07dcc5"
|
||||
|
||||
[HTTP.Heartbeat]
|
||||
Enable = true
|
||||
# [HTTP.Heartbeat.BasicAuth]
|
||||
# user001 = "ccc26da7b9aba533cbb263a36c07dcc5"
|
||||
|
||||
[HTTP.Service]
|
||||
Enable = true
|
||||
[HTTP.Service.BasicAuth]
|
||||
user001 = "ccc26da7b9aba533cbb263a36c07dcc5"
|
||||
|
||||
[HTTP.JWTAuth]
|
||||
# signing key
|
||||
SigningKey = "5b94a0fd640fe2765af826acfe42d151"
|
||||
# unit: min
|
||||
AccessExpired = 1500
|
||||
# unit: min
|
||||
RefreshExpired = 10080
|
||||
RedisKeyPrefix = "/jwt/"
|
||||
|
||||
[HTTP.ProxyAuth]
|
||||
# if proxy auth enabled, jwt auth is disabled
|
||||
Enable = false
|
||||
# username key in http proxy header
|
||||
HeaderUserNameKey = "X-User-Name"
|
||||
DefaultRoles = ["Standard"]
|
||||
|
||||
[DB]
|
||||
DSN="host=postgres port=5432 user=root dbname=n9e_v6 password=1234 sslmode=disable"
|
||||
# enable debug mode or not
|
||||
Debug = false
|
||||
# mysql postgres
|
||||
DBType = "postgres"
|
||||
# unit: s
|
||||
MaxLifetime = 7200
|
||||
# max open connections
|
||||
MaxOpenConns = 150
|
||||
# max idle connections
|
||||
MaxIdleConns = 50
|
||||
# table prefix
|
||||
TablePrefix = ""
|
||||
# enable auto migrate or not
|
||||
# EnableAutoMigrate = false
|
||||
|
||||
[Redis]
|
||||
# address, ip:port or ip1:port,ip2:port for cluster and sentinel(SentinelAddrs)
|
||||
Address = "redis:6379"
|
||||
# Username = ""
|
||||
# Password = ""
|
||||
# DB = 0
|
||||
# UseTLS = false
|
||||
# TLSMinVersion = "1.2"
|
||||
# standalone cluster sentinel
|
||||
RedisType = "standalone"
|
||||
# Mastername for sentinel type
|
||||
# MasterName = "mymaster"
|
||||
# SentinelUsername = ""
|
||||
# SentinelPassword = ""
|
||||
|
||||
[Alert]
|
||||
[Alert.Heartbeat]
|
||||
# auto detect if blank
|
||||
IP = ""
|
||||
# unit ms
|
||||
Interval = 1000
|
||||
ClusterName = "default"
|
||||
|
||||
# [Alert.Alerting]
|
||||
# NotifyConcurrency = 10
|
||||
|
||||
[Center]
|
||||
MetricsYamlFile = "./etc/metrics.yaml"
|
||||
I18NHeaderKey = "X-Language"
|
||||
|
||||
[Center.AnonymousAccess]
|
||||
PromQuerier = true
|
||||
AlertDetail = true
|
||||
|
||||
[Center.Ibex]
|
||||
Address = "http://ibex:10090"
|
||||
# basic auth
|
||||
BasicAuthUser = "ibex"
|
||||
BasicAuthPass = "ibex"
|
||||
# unit: ms
|
||||
Timeout = 3000
|
||||
|
||||
[Pushgw]
|
||||
LabelRewrite = true
|
||||
|
||||
[[Pushgw.Writers]]
|
||||
Url = "http://victoriametrics:8428/api/v1/write"
|
||||
@@ -1,110 +0,0 @@
|
||||
ops:
|
||||
- name: dashboards
|
||||
cname: 仪表盘
|
||||
ops:
|
||||
- "/dashboards"
|
||||
- "/dashboards/add"
|
||||
- "/dashboards/put"
|
||||
- "/dashboards/del"
|
||||
- "/dashboards-built-in"
|
||||
|
||||
- name: alert
|
||||
cname: 告警规则
|
||||
ops:
|
||||
- "/alert-rules"
|
||||
- "/alert-rules/add"
|
||||
- "/alert-rules/put"
|
||||
- "/alert-rules/del"
|
||||
- "/alert-rules-built-in"
|
||||
- name: alert-mutes
|
||||
cname: 告警静默管理
|
||||
ops:
|
||||
- "/alert-mutes"
|
||||
- "/alert-mutes/add"
|
||||
- "/alert-mutes/del"
|
||||
|
||||
- name: alert-subscribes
|
||||
cname: 告警订阅管理
|
||||
ops:
|
||||
- "/alert-subscribes"
|
||||
- "/alert-subscribes/add"
|
||||
- "/alert-subscribes/put"
|
||||
- "/alert-subscribes/del"
|
||||
|
||||
- name: alert-events
|
||||
cname: 告警事件管理
|
||||
ops:
|
||||
- "/alert-cur-events"
|
||||
- "/alert-cur-events/del"
|
||||
- "/alert-his-events"
|
||||
|
||||
- name: recording-rules
|
||||
cname: 记录规则管理
|
||||
ops:
|
||||
- "/recording-rules"
|
||||
- "/recording-rules/add"
|
||||
- "/recording-rules/put"
|
||||
- "/recording-rules/del"
|
||||
|
||||
- name: metric
|
||||
cname: 时序指标
|
||||
ops:
|
||||
- "/metric/explorer"
|
||||
- "/object/explorer"
|
||||
|
||||
- name: log
|
||||
cname: 日志分析
|
||||
ops:
|
||||
- "/log/explorer"
|
||||
|
||||
- name: trace
|
||||
cname: 链路追踪
|
||||
ops:
|
||||
- "/trace/explorer"
|
||||
- "/trace/dependencies"
|
||||
|
||||
- name: targets
|
||||
cname: 基础设施
|
||||
ops:
|
||||
- "/targets"
|
||||
- "/targets/add"
|
||||
- "/targets/put"
|
||||
- "/targets/del"
|
||||
|
||||
- name: job
|
||||
cname: 任务管理
|
||||
ops:
|
||||
- "/job-tpls"
|
||||
- "/job-tpls/add"
|
||||
- "/job-tpls/put"
|
||||
- "/job-tpls/del"
|
||||
- "/job-tasks"
|
||||
- "/job-tasks/add"
|
||||
- "/job-tasks/put"
|
||||
|
||||
- name: user
|
||||
cname: 用户管理
|
||||
ops:
|
||||
- "/users"
|
||||
- "/user-groups"
|
||||
- "/user-groups/add"
|
||||
- "/user-groups/put"
|
||||
- "/user-groups/del"
|
||||
|
||||
- name: busi-groups
|
||||
cname: 业务分组管理
|
||||
ops:
|
||||
- "/busi-groups"
|
||||
- "/busi-groups/add"
|
||||
- "/busi-groups/put"
|
||||
- "/busi-groups/del"
|
||||
|
||||
- name: system
|
||||
cname: 系统信息
|
||||
ops:
|
||||
- "/help/version"
|
||||
- "/help/servers"
|
||||
- "/help/source"
|
||||
- "/help/sso"
|
||||
- "/help/notification-tpls"
|
||||
- "/help/notification-settings"
|
||||
@@ -1,7 +0,0 @@
|
||||
级别状态: S{{.Severity}} {{if .IsRecovered}}Recovered{{else}}Triggered{{end}}
|
||||
规则名称: {{.RuleName}}{{if .RuleNote}}
|
||||
规则备注: {{.RuleNote}}{{end}}
|
||||
监控指标: {{.TagsJSON}}
|
||||
{{if .IsRecovered}}恢复时间:{{timeformat .LastEvalTime}}{{else}}触发时间: {{timeformat .TriggerTime}}
|
||||
触发时值: {{.TriggerValue}}{{end}}
|
||||
发送时间: {{timestamp}}
|
||||
@@ -1,25 +0,0 @@
|
||||
# my global config
|
||||
global:
|
||||
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
|
||||
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
|
||||
# scrape_timeout is set to the global default (10s).
|
||||
|
||||
# A scrape configuration containing exactly one endpoint to scrape:
|
||||
# Here it's Prometheus itself.
|
||||
scrape_configs:
|
||||
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
|
||||
- job_name: 'victoriametrics'
|
||||
# metrics_path defaults to '/metrics'
|
||||
# scheme defaults to 'http'.
|
||||
static_configs:
|
||||
- targets: ['victoriametrics:8428']
|
||||
|
||||
- job_name: 'n9e'
|
||||
# static_configs:
|
||||
# - targets: ['n9e:17000']
|
||||
file_sd_configs:
|
||||
- files:
|
||||
- targets.json
|
||||
|
||||
remote_write:
|
||||
- url: 'http://n9e:17000/prometheus/v1/write'
|
||||
@@ -1,7 +0,0 @@
|
||||
[
|
||||
{
|
||||
"targets": [
|
||||
"n9e:17000"
|
||||
]
|
||||
}
|
||||
]
|
||||
38
docker/ibexetc/agentd.conf
Normal file
38
docker/ibexetc/agentd.conf
Normal file
@@ -0,0 +1,38 @@
|
||||
# debug, release
|
||||
RunMode = "release"
|
||||
|
||||
# task meta storage dir
|
||||
MetaDir = "./meta"
|
||||
|
||||
[HTTP]
|
||||
Enable = true
|
||||
# http listening address
|
||||
Host = "0.0.0.0"
|
||||
# http listening port
|
||||
Port = 2090
|
||||
# https cert file path
|
||||
CertFile = ""
|
||||
# https key file path
|
||||
KeyFile = ""
|
||||
# whether print access log
|
||||
PrintAccessLog = true
|
||||
# whether enable pprof
|
||||
PProf = false
|
||||
# http graceful shutdown timeout, unit: s
|
||||
ShutdownTimeout = 30
|
||||
# max content length: 64M
|
||||
MaxContentLength = 67108864
|
||||
# http server read timeout, unit: s
|
||||
ReadTimeout = 20
|
||||
# http server write timeout, unit: s
|
||||
WriteTimeout = 40
|
||||
# http server idle timeout, unit: s
|
||||
IdleTimeout = 120
|
||||
|
||||
[Heartbeat]
|
||||
# unit: ms
|
||||
Interval = 1000
|
||||
# rpc servers
|
||||
Servers = ["ibex:20090"]
|
||||
# $ip or $hostname or specified string
|
||||
Host = "categraf01"
|
||||
@@ -74,7 +74,7 @@ TablePrefix = ""
|
||||
|
||||
[MySQL]
|
||||
# mysql address host:port
|
||||
Address = "127.0.0.1:3306"
|
||||
Address = "mysql:3306"
|
||||
# mysql username
|
||||
User = "root"
|
||||
# mysql password
|
||||
@@ -86,7 +86,7 @@ Parameters = "charset=utf8mb4&parseTime=True&loc=Local&allowNativePasswords=true
|
||||
|
||||
[Postgres]
|
||||
# pg address host:port
|
||||
Address = "127.0.0.1:5432"
|
||||
Address = "postgres:5432"
|
||||
# pg user
|
||||
User = "root"
|
||||
# pg password
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
set names utf8mb4;
|
||||
|
||||
drop database if exists n9e_v6;
|
||||
create database n9e_v6;
|
||||
use n9e_v6;
|
||||
drop database if exists n9e_v5;
|
||||
create database n9e_v5;
|
||||
use n9e_v5;
|
||||
|
||||
CREATE TABLE `users` (
|
||||
`id` bigint unsigned not null auto_increment,
|
||||
@@ -83,21 +83,12 @@ CREATE TABLE `role_operation`(
|
||||
-- Admin is special, who has no concrete operation but can do anything.
|
||||
insert into `role_operation`(role_name, operation) values('Guest', '/metric/explorer');
|
||||
insert into `role_operation`(role_name, operation) values('Guest', '/object/explorer');
|
||||
insert into `role_operation`(role_name, operation) values('Guest', '/log/explorer');
|
||||
insert into `role_operation`(role_name, operation) values('Guest', '/trace/explorer');
|
||||
insert into `role_operation`(role_name, operation) values('Guest', '/help/version');
|
||||
insert into `role_operation`(role_name, operation) values('Guest', '/help/contact');
|
||||
|
||||
insert into `role_operation`(role_name, operation) values('Standard', '/metric/explorer');
|
||||
insert into `role_operation`(role_name, operation) values('Standard', '/object/explorer');
|
||||
insert into `role_operation`(role_name, operation) values('Standard', '/log/explorer');
|
||||
insert into `role_operation`(role_name, operation) values('Standard', '/trace/explorer');
|
||||
insert into `role_operation`(role_name, operation) values('Standard', '/help/version');
|
||||
insert into `role_operation`(role_name, operation) values('Standard', '/help/contact');
|
||||
insert into `role_operation`(role_name, operation) values('Standard', '/alert-rules-built-in');
|
||||
insert into `role_operation`(role_name, operation) values('Standard', '/dashboards-built-in');
|
||||
insert into `role_operation`(role_name, operation) values('Standard', '/trace/dependencies');
|
||||
|
||||
insert into `role_operation`(role_name, operation) values('Standard', '/users');
|
||||
insert into `role_operation`(role_name, operation) values('Standard', '/user-groups');
|
||||
insert into `role_operation`(role_name, operation) values('Standard', '/user-groups/add');
|
||||
@@ -177,8 +168,6 @@ CREATE TABLE `board` (
|
||||
`ident` varchar(200) not null default '',
|
||||
`tags` varchar(255) not null comment 'split by space',
|
||||
`public` tinyint(1) not null default 0 comment '0:false 1:true',
|
||||
`built_in` tinyint(1) not null default 0 comment '0:false 1:true',
|
||||
`hide` tinyint(1) not null default 0 comment '0:false 1:true',
|
||||
`create_at` bigint not null default 0,
|
||||
`create_by` varchar(64) not null default '',
|
||||
`update_at` bigint not null default 0,
|
||||
@@ -234,7 +223,6 @@ CREATE TABLE `chart` (
|
||||
CREATE TABLE `chart_share` (
|
||||
`id` bigint unsigned not null auto_increment,
|
||||
`cluster` varchar(128) not null,
|
||||
`datasource_id` bigint unsigned not null default 0,
|
||||
`configs` text,
|
||||
`create_at` bigint not null default 0,
|
||||
`create_by` varchar(64) not null default '',
|
||||
@@ -246,7 +234,6 @@ CREATE TABLE `alert_rule` (
|
||||
`id` bigint unsigned not null auto_increment,
|
||||
`group_id` bigint not null default 0 comment 'busi group id',
|
||||
`cate` varchar(128) not null,
|
||||
`datasource_ids` varchar(255) not null default '' comment 'datasource ids',
|
||||
`cluster` varchar(128) not null,
|
||||
`name` varchar(255) not null,
|
||||
`note` varchar(1024) not null default '',
|
||||
@@ -257,12 +244,11 @@ CREATE TABLE `alert_rule` (
|
||||
`severity` tinyint(1) not null comment '1:Emergency 2:Warning 3:Notice',
|
||||
`disabled` tinyint(1) not null comment '0:enabled 1:disabled',
|
||||
`prom_for_duration` int not null comment 'prometheus for, unit:s',
|
||||
`rule_config` text not null comment 'rule_config',
|
||||
`prom_ql` text not null comment 'promql',
|
||||
`prom_eval_interval` int not null comment 'evaluate interval',
|
||||
`enable_stime` varchar(255) not null default '00:00',
|
||||
`enable_etime` varchar(255) not null default '23:59',
|
||||
`enable_days_of_week` varchar(255) not null default '' comment 'split by space: 0 1 2 3 4 5 6',
|
||||
`enable_stime` char(255) not null default '00:00',
|
||||
`enable_etime` char(255) not null default '23:59',
|
||||
`enable_days_of_week` varchar(255) not null default '' comment 'eg: "0 1 2 3 4 5 6 ; 0 1 2"',
|
||||
`enable_in_bg` tinyint(1) not null default 0 comment '1: only this bg 0: global',
|
||||
`notify_recovered` tinyint(1) not null comment 'whether notify when recovery',
|
||||
`notify_channels` varchar(255) not null default '' comment 'split by space: sms voice email dingtalk wecom',
|
||||
@@ -273,7 +259,6 @@ CREATE TABLE `alert_rule` (
|
||||
`callbacks` varchar(255) not null default '' comment 'split by space: http://a.com/api/x http://a.com/api/y',
|
||||
`runbook_url` varchar(255),
|
||||
`append_tags` varchar(255) not null default '' comment 'split by space: service=n9e mod=api',
|
||||
`annotations` text not null comment 'annotations',
|
||||
`create_at` bigint not null default 0,
|
||||
`create_by` varchar(64) not null default '',
|
||||
`update_at` bigint not null default 0,
|
||||
@@ -290,14 +275,11 @@ CREATE TABLE `alert_mute` (
|
||||
`note` varchar(1024) not null default '',
|
||||
`cate` varchar(128) not null,
|
||||
`cluster` varchar(128) not null,
|
||||
`datasource_ids` varchar(255) not null default '' comment 'datasource ids',
|
||||
`tags` varchar(4096) not null default '' comment 'json,map,tagkey->regexp|value',
|
||||
`cause` varchar(255) not null default '',
|
||||
`btime` bigint not null default 0 comment 'begin time',
|
||||
`etime` bigint not null default 0 comment 'end time',
|
||||
`disabled` tinyint(1) not null default 0 comment '0:enabled 1:disabled',
|
||||
`mute_time_type` tinyint(1) not null default 0,
|
||||
`periodic_mutes` varchar(4096) not null default '',
|
||||
`create_at` bigint not null default 0,
|
||||
`create_by` varchar(64) not null default '',
|
||||
`update_at` bigint not null default 0,
|
||||
@@ -312,9 +294,7 @@ CREATE TABLE `alert_subscribe` (
|
||||
`name` varchar(255) not null default '',
|
||||
`disabled` tinyint(1) not null default 0 comment '0:enabled 1:disabled',
|
||||
`group_id` bigint not null default 0 comment 'busi group id',
|
||||
`prod` varchar(255) not null default '',
|
||||
`cate` varchar(128) not null,
|
||||
`datasource_ids` varchar(255) not null default '' comment 'datasource ids',
|
||||
`cluster` varchar(128) not null,
|
||||
`rule_id` bigint not null default 0,
|
||||
`tags` varchar(4096) not null default '' comment 'json,map,tagkey->regexp|value',
|
||||
@@ -323,9 +303,6 @@ CREATE TABLE `alert_subscribe` (
|
||||
`redefine_channels` tinyint(1) default 0 comment 'is redefine channels?',
|
||||
`new_channels` varchar(255) not null default '' comment 'split by space: sms voice email dingtalk wecom',
|
||||
`user_group_ids` varchar(250) not null comment 'split by space 1 34 5, notify cc to user_group_ids',
|
||||
`webhooks` text not null,
|
||||
`redefine_webhooks` tinyint(1) default 0,
|
||||
`for_duration` bigint not null default 0,
|
||||
`create_at` bigint not null default 0,
|
||||
`create_by` varchar(64) not null default '',
|
||||
`update_at` bigint not null default 0,
|
||||
@@ -334,10 +311,11 @@ CREATE TABLE `alert_subscribe` (
|
||||
KEY (`update_at`),
|
||||
KEY (`group_id`)
|
||||
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
|
||||
|
||||
|
||||
CREATE TABLE `target` (
|
||||
`id` bigint unsigned not null auto_increment,
|
||||
`group_id` bigint not null default 0 comment 'busi group id',
|
||||
`cluster` varchar(128) not null comment 'append to alert event as field',
|
||||
`ident` varchar(191) not null comment 'target id',
|
||||
`note` varchar(255) not null default '' comment 'append to alert event as field',
|
||||
`tags` varchar(512) not null default '' comment 'append to series data as tags, split by space, append external space at suffix',
|
||||
@@ -347,8 +325,6 @@ CREATE TABLE `target` (
|
||||
KEY (`group_id`)
|
||||
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
|
||||
|
||||
|
||||
|
||||
-- case1: target_idents; case2: target_tags
|
||||
-- CREATE TABLE `collect_rule` (
|
||||
-- `id` bigint unsigned not null auto_increment,
|
||||
@@ -383,11 +359,10 @@ CREATE TABLE `metric_view` (
|
||||
) ENGINE=InnoDB DEFAULT CHARSET = utf8mb4;
|
||||
|
||||
insert into metric_view(name, cate, configs) values('Host View', 0, '{"filters":[{"oper":"=","label":"__name__","value":"cpu_usage_idle"}],"dynamicLabels":[],"dimensionLabels":[{"label":"ident","value":""}]}');
|
||||
|
||||
|
||||
CREATE TABLE `recording_rule` (
|
||||
`id` bigint unsigned not null auto_increment,
|
||||
`group_id` bigint not null default '0' comment 'group_id',
|
||||
`datasource_ids` varchar(255) not null default '' comment 'datasource ids',
|
||||
`cluster` varchar(128) not null,
|
||||
`name` varchar(255) not null comment 'new metric name',
|
||||
`note` varchar(255) not null comment 'rule note',
|
||||
@@ -422,7 +397,6 @@ insert into alert_aggr_view(name, rule, cate) values('By RuleName', 'field:rule_
|
||||
CREATE TABLE `alert_cur_event` (
|
||||
`id` bigint unsigned not null comment 'use alert_his_event.id',
|
||||
`cate` varchar(128) not null,
|
||||
`datasource_id` bigint not null default 0 comment 'datasource id',
|
||||
`cluster` varchar(128) not null,
|
||||
`group_id` bigint unsigned not null comment 'busi group id of rule',
|
||||
`group_name` varchar(255) not null default '' comment 'busi group name',
|
||||
@@ -448,8 +422,6 @@ CREATE TABLE `alert_cur_event` (
|
||||
`first_trigger_time` bigint,
|
||||
`trigger_time` bigint not null,
|
||||
`trigger_value` varchar(255) not null,
|
||||
`annotations` text not null comment 'annotations',
|
||||
`rule_config` text not null comment 'annotations',
|
||||
`tags` varchar(1024) not null default '' comment 'merge data_tags rule_tags, split by ,,',
|
||||
PRIMARY KEY (`id`),
|
||||
KEY (`hash`),
|
||||
@@ -462,7 +434,6 @@ CREATE TABLE `alert_his_event` (
|
||||
`id` bigint unsigned not null AUTO_INCREMENT,
|
||||
`is_recovered` tinyint(1) not null,
|
||||
`cate` varchar(128) not null,
|
||||
`datasource_id` bigint not null default 0 comment 'datasource id',
|
||||
`cluster` varchar(128) not null,
|
||||
`group_id` bigint unsigned not null comment 'busi group id of rule',
|
||||
`group_name` varchar(255) not null default '' comment 'busi group name',
|
||||
@@ -490,8 +461,6 @@ CREATE TABLE `alert_his_event` (
|
||||
`recover_time` bigint not null default 0,
|
||||
`last_eval_time` bigint not null default 0 comment 'for time filter',
|
||||
`tags` varchar(1024) not null default '' comment 'merge data_tags rule_tags, split by ,,',
|
||||
`annotations` text not null comment 'annotations',
|
||||
`rule_config` text not null comment 'annotations',
|
||||
PRIMARY KEY (`id`),
|
||||
KEY (`hash`),
|
||||
KEY (`rule_id`),
|
||||
@@ -531,7 +500,6 @@ CREATE TABLE `task_tpl_host`
|
||||
CREATE TABLE `task_record`
|
||||
(
|
||||
`id` bigint unsigned not null comment 'ibex task id',
|
||||
`event_id` bigint not null comment 'event id' default 0,
|
||||
`group_id` bigint not null comment 'busi group id',
|
||||
`ibex_address` varchar(128) not null,
|
||||
`ibex_auth_user` varchar(128) not null default '',
|
||||
@@ -548,62 +516,14 @@ CREATE TABLE `task_record`
|
||||
`create_by` varchar(64) not null default '',
|
||||
PRIMARY KEY (`id`),
|
||||
KEY (`create_at`, `group_id`),
|
||||
KEY (`create_by`),
|
||||
KEY (`event_id`)
|
||||
KEY (`create_by`)
|
||||
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
|
||||
|
||||
CREATE TABLE `alerting_engines`
|
||||
(
|
||||
`id` int unsigned NOT NULL AUTO_INCREMENT,
|
||||
`instance` varchar(128) not null default '' comment 'instance identification, e.g. 10.9.0.9:9090',
|
||||
`datasource_id` bigint not null default 0 comment 'datasource id',
|
||||
`engine_cluster` varchar(128) not null default '' comment 'n9e-alert cluster',
|
||||
`cluster` varchar(128) not null default '' comment 'target reader cluster',
|
||||
`clock` bigint not null,
|
||||
PRIMARY KEY (`id`)
|
||||
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
|
||||
|
||||
CREATE TABLE `datasource`
|
||||
(
|
||||
`id` int unsigned NOT NULL AUTO_INCREMENT,
|
||||
`name` varchar(191) not null default '',
|
||||
`description` varchar(255) not null default '',
|
||||
`category` varchar(255) not null default '',
|
||||
`plugin_id` int unsigned not null default 0,
|
||||
`plugin_type` varchar(255) not null default '',
|
||||
`plugin_type_name` varchar(255) not null default '',
|
||||
`cluster_name` varchar(255) not null default '',
|
||||
`settings` text not null,
|
||||
`status` varchar(255) not null default '',
|
||||
`http` varchar(4096) not null default '',
|
||||
`auth` varchar(8192) not null default '',
|
||||
`created_at` bigint not null default 0,
|
||||
`created_by` varchar(64) not null default '',
|
||||
`updated_at` bigint not null default 0,
|
||||
`updated_by` varchar(64) not null default '',
|
||||
UNIQUE KEY (`name`),
|
||||
PRIMARY KEY (`id`)
|
||||
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
|
||||
|
||||
CREATE TABLE `builtin_cate` (
|
||||
`id` bigint unsigned not null auto_increment,
|
||||
`name` varchar(191) not null,
|
||||
`user_id` bigint not null default 0,
|
||||
PRIMARY KEY (`id`)
|
||||
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
|
||||
|
||||
CREATE TABLE `notify_tpl` (
|
||||
`id` bigint unsigned not null auto_increment,
|
||||
`channel` varchar(32) not null,
|
||||
`name` varchar(255) not null,
|
||||
`content` text not null,
|
||||
PRIMARY KEY (`id`),
|
||||
UNIQUE KEY (`channel`)
|
||||
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
|
||||
|
||||
CREATE TABLE `sso_config` (
|
||||
`id` bigint unsigned not null auto_increment,
|
||||
`name` varchar(191) not null,
|
||||
`content` text not null,
|
||||
PRIMARY KEY (`id`),
|
||||
UNIQUE KEY (`name`)
|
||||
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
|
||||
@@ -1,3 +1,4 @@
|
||||
-- n9e version 5.8 for postgres
|
||||
CREATE TABLE users (
|
||||
id bigserial,
|
||||
username varchar(64) not null ,
|
||||
@@ -12,10 +13,11 @@ CREATE TABLE users (
|
||||
create_at bigint not null default 0,
|
||||
create_by varchar(64) not null default '',
|
||||
update_at bigint not null default 0,
|
||||
update_by varchar(64) not null default '',
|
||||
PRIMARY KEY (id),
|
||||
UNIQUE (username)
|
||||
update_by varchar(64) not null default ''
|
||||
) ;
|
||||
ALTER TABLE users ADD CONSTRAINT users_pk PRIMARY KEY (id);
|
||||
ALTER TABLE users ADD CONSTRAINT users_un UNIQUE (username);
|
||||
|
||||
COMMENT ON COLUMN users.username IS 'login name, cannot rename';
|
||||
COMMENT ON COLUMN users.nickname IS 'display name, chinese name';
|
||||
COMMENT ON COLUMN users.portrait IS 'portrait image url';
|
||||
@@ -24,6 +26,7 @@ COMMENT ON COLUMN users.contacts IS 'json e.g. {wecom:xx, dingtalk_robot_token:y
|
||||
|
||||
insert into users(id, username, nickname, password, roles, create_at, create_by, update_at, update_by) values(1, 'root', '超管', 'root.2020', 'Admin', date_part('epoch',current_timestamp)::int, 'system', date_part('epoch',current_timestamp)::int, 'system');
|
||||
|
||||
|
||||
CREATE TABLE user_group (
|
||||
id bigserial,
|
||||
name varchar(128) not null default '',
|
||||
@@ -31,9 +34,9 @@ CREATE TABLE user_group (
|
||||
create_at bigint not null default 0,
|
||||
create_by varchar(64) not null default '',
|
||||
update_at bigint not null default 0,
|
||||
update_by varchar(64) not null default '',
|
||||
PRIMARY KEY (id)
|
||||
update_by varchar(64) not null default ''
|
||||
) ;
|
||||
ALTER TABLE user_group ADD CONSTRAINT user_group_pk PRIMARY KEY (id);
|
||||
CREATE INDEX user_group_create_by_idx ON user_group (create_by);
|
||||
CREATE INDEX user_group_update_at_idx ON user_group (update_at);
|
||||
|
||||
@@ -42,9 +45,9 @@ insert into user_group(id, name, create_at, create_by, update_at, update_by) val
|
||||
CREATE TABLE user_group_member (
|
||||
id bigserial,
|
||||
group_id bigint not null,
|
||||
user_id bigint not null,
|
||||
PRIMARY KEY(id)
|
||||
user_id bigint not null
|
||||
) ;
|
||||
ALTER TABLE user_group_member ADD CONSTRAINT user_group_member_pk PRIMARY KEY (id);
|
||||
CREATE INDEX user_group_member_group_id_idx ON user_group_member (group_id);
|
||||
CREATE INDEX user_group_member_user_id_idx ON user_group_member (user_id);
|
||||
|
||||
@@ -53,18 +56,18 @@ insert into user_group_member(group_id, user_id) values(1, 1);
|
||||
CREATE TABLE configs (
|
||||
id bigserial,
|
||||
ckey varchar(191) not null,
|
||||
cval varchar(4096) not null default '',
|
||||
PRIMARY KEY (id),
|
||||
UNIQUE (ckey)
|
||||
cval varchar(4096) not null default ''
|
||||
) ;
|
||||
ALTER TABLE configs ADD CONSTRAINT configs_pk PRIMARY KEY (id);
|
||||
ALTER TABLE configs ADD CONSTRAINT configs_un UNIQUE (ckey);
|
||||
|
||||
CREATE TABLE role (
|
||||
id bigserial,
|
||||
name varchar(191) not null default '',
|
||||
note varchar(255) not null default '',
|
||||
PRIMARY KEY (id),
|
||||
UNIQUE (name)
|
||||
note varchar(255) not null default ''
|
||||
) ;
|
||||
ALTER TABLE role ADD CONSTRAINT role_pk PRIMARY KEY (id);
|
||||
ALTER TABLE role ADD CONSTRAINT role_un UNIQUE ("name");
|
||||
|
||||
insert into role(name, note) values('Admin', 'Administrator role');
|
||||
insert into role(name, note) values('Standard', 'Ordinary user role');
|
||||
@@ -73,9 +76,9 @@ insert into role(name, note) values('Guest', 'Readonly user role');
|
||||
CREATE TABLE role_operation(
|
||||
id bigserial,
|
||||
role_name varchar(128) not null,
|
||||
operation varchar(191) not null,
|
||||
PRIMARY KEY(id)
|
||||
operation varchar(191) not null
|
||||
) ;
|
||||
ALTER TABLE role_operation ADD CONSTRAINT role_operation_pk PRIMARY KEY (id);
|
||||
CREATE INDEX role_operation_role_name_idx ON role_operation (role_name);
|
||||
CREATE INDEX role_operation_operation_idx ON role_operation (operation);
|
||||
|
||||
@@ -83,21 +86,12 @@ CREATE INDEX role_operation_operation_idx ON role_operation (operation);
|
||||
-- Admin is special, who has no concrete operation but can do anything.
|
||||
insert into role_operation(role_name, operation) values('Guest', '/metric/explorer');
|
||||
insert into role_operation(role_name, operation) values('Guest', '/object/explorer');
|
||||
insert into role_operation(role_name, operation) values('Guest', '/log/explorer');
|
||||
insert into role_operation(role_name, operation) values('Guest', '/trace/explorer');
|
||||
insert into role_operation(role_name, operation) values('Guest', '/help/version');
|
||||
insert into role_operation(role_name, operation) values('Guest', '/help/contact');
|
||||
|
||||
insert into role_operation(role_name, operation) values('Standard', '/metric/explorer');
|
||||
insert into role_operation(role_name, operation) values('Standard', '/object/explorer');
|
||||
insert into role_operation(role_name, operation) values('Standard', '/log/explorer');
|
||||
insert into role_operation(role_name, operation) values('Standard', '/trace/explorer');
|
||||
insert into role_operation(role_name, operation) values('Standard', '/help/version');
|
||||
insert into role_operation(role_name, operation) values('Standard', '/help/contact');
|
||||
insert into role_operation(role_name, operation) values('Standard', '/alert-rules-built-in');
|
||||
insert into role_operation(role_name, operation) values('Standard', '/dashboards-built-in');
|
||||
insert into role_operation(role_name, operation) values('Standard', '/trace/dependencies');
|
||||
|
||||
insert into role_operation(role_name, operation) values('Standard', '/users');
|
||||
insert into role_operation(role_name, operation) values('Standard', '/user-groups');
|
||||
insert into role_operation(role_name, operation) values('Standard', '/user-groups/add');
|
||||
@@ -141,6 +135,32 @@ insert into role_operation(role_name, operation) values('Standard', '/recording-
|
||||
insert into role_operation(role_name, operation) values('Standard', '/recording-rules/put');
|
||||
insert into role_operation(role_name, operation) values('Standard', '/recording-rules/del');
|
||||
|
||||
CREATE TABLE recording_rule (
|
||||
id bigserial NOT NULL,
|
||||
group_id bigint not null default 0,
|
||||
cluster varchar(128) not null,
|
||||
name varchar(255) not null,
|
||||
note varchar(255) not null,
|
||||
disabled smallint not null,
|
||||
prom_ql varchar(8192) not null,
|
||||
prom_eval_interval int not null,
|
||||
append_tags varchar(255) default '',
|
||||
create_at bigint default 0,
|
||||
create_by varchar(64) default '',
|
||||
update_at bigint default 0,
|
||||
update_by varchar(64) default '',
|
||||
CONSTRAINT recording_rule_pk PRIMARY KEY (id)
|
||||
) ;
|
||||
CREATE INDEX recording_rule_group_id_idx ON recording_rule (group_id);
|
||||
CREATE INDEX recording_rule_update_at_idx ON recording_rule (update_at);
|
||||
COMMENT ON COLUMN recording_rule.group_id IS 'group_id';
|
||||
COMMENT ON COLUMN recording_rule.name IS 'new metric name';
|
||||
COMMENT ON COLUMN recording_rule.note IS 'rule note';
|
||||
COMMENT ON COLUMN recording_rule.disabled IS '0:enabled 1:disabled';
|
||||
COMMENT ON COLUMN recording_rule.prom_ql IS 'promql';
|
||||
COMMENT ON COLUMN recording_rule.prom_eval_interval IS 'evaluate interval';
|
||||
COMMENT ON COLUMN recording_rule.append_tags IS 'split by space: service=n9e mod=api';
|
||||
|
||||
-- for alert_rule | collect_rule | mute | dashboard grouping
|
||||
CREATE TABLE busi_group (
|
||||
id bigserial,
|
||||
@@ -150,10 +170,10 @@ CREATE TABLE busi_group (
|
||||
create_at bigint not null default 0,
|
||||
create_by varchar(64) not null default '',
|
||||
update_at bigint not null default 0,
|
||||
update_by varchar(64) not null default '',
|
||||
PRIMARY KEY (id),
|
||||
UNIQUE (name)
|
||||
update_by varchar(64) not null default ''
|
||||
) ;
|
||||
ALTER TABLE busi_group ADD CONSTRAINT busi_group_pk PRIMARY KEY (id);
|
||||
ALTER TABLE busi_group ADD CONSTRAINT busi_group_un UNIQUE ("name");
|
||||
COMMENT ON COLUMN busi_group.label_value IS 'if label_enable: label_value can not be blank';
|
||||
|
||||
insert into busi_group(id, name, create_at, create_by, update_at, update_by) values(1, 'Default Busi Group', date_part('epoch',current_timestamp)::int, 'root', date_part('epoch',current_timestamp)::int, 'root');
|
||||
@@ -162,49 +182,43 @@ CREATE TABLE busi_group_member (
|
||||
id bigserial,
|
||||
busi_group_id bigint not null ,
|
||||
user_group_id bigint not null ,
|
||||
perm_flag char(2) not null ,
|
||||
PRIMARY KEY (id)
|
||||
perm_flag char(2) not null
|
||||
) ;
|
||||
ALTER TABLE busi_group_member ADD CONSTRAINT busi_group_member_pk PRIMARY KEY (id);
|
||||
CREATE INDEX busi_group_member_busi_group_id_idx ON busi_group_member (busi_group_id);
|
||||
CREATE INDEX busi_group_member_user_group_id_idx ON busi_group_member (user_group_id);
|
||||
COMMENT ON COLUMN busi_group_member.busi_group_id IS 'busi group id';
|
||||
COMMENT ON COLUMN busi_group_member.user_group_id IS 'user group id';
|
||||
COMMENT ON COLUMN busi_group_member.perm_flag IS 'ro | rw';
|
||||
|
||||
|
||||
insert into busi_group_member(busi_group_id, user_group_id, perm_flag) values(1, 1, 'rw');
|
||||
|
||||
-- for dashboard new version
|
||||
CREATE TABLE board (
|
||||
id bigserial,
|
||||
id bigserial not null ,
|
||||
group_id bigint not null default 0 ,
|
||||
name varchar(191) not null,
|
||||
ident varchar(200) not null default '',
|
||||
tags varchar(255) not null ,
|
||||
public smallint not null default 0 ,
|
||||
built_in smallint not null default 0 ,
|
||||
hide smallint not null default 0 ,
|
||||
public smallint not null default 0,
|
||||
create_at bigint not null default 0,
|
||||
create_by varchar(64) not null default '',
|
||||
update_at bigint not null default 0,
|
||||
update_by varchar(64) not null default '',
|
||||
PRIMARY KEY (id),
|
||||
UNIQUE (group_id, name)
|
||||
update_by varchar(64) not null default ''
|
||||
) ;
|
||||
CREATE INDEX board_ident_idx ON board (ident);
|
||||
ALTER TABLE board ADD CONSTRAINT board_pk PRIMARY KEY (id);
|
||||
ALTER TABLE board ADD CONSTRAINT board_un UNIQUE (group_id,"name");
|
||||
COMMENT ON COLUMN board.group_id IS 'busi group id';
|
||||
COMMENT ON COLUMN board.tags IS 'split by space';
|
||||
COMMENT ON COLUMN board.public IS '0:false 1:true';
|
||||
COMMENT ON COLUMN board.built_in IS '0:false 1:true';
|
||||
COMMENT ON COLUMN board.hide IS '0:false 1:true';
|
||||
|
||||
CREATE INDEX board_ident_idx ON board (ident);
|
||||
|
||||
-- for dashboard new version
|
||||
CREATE TABLE board_payload (
|
||||
id bigint not null ,
|
||||
payload text not null,
|
||||
UNIQUE (id)
|
||||
) ;
|
||||
payload text not null
|
||||
);
|
||||
ALTER TABLE board_payload ADD CONSTRAINT board_payload_un UNIQUE (id);
|
||||
COMMENT ON COLUMN board_payload.id IS 'dashboard id';
|
||||
|
||||
-- deprecated
|
||||
@@ -217,13 +231,9 @@ CREATE TABLE dashboard (
|
||||
create_at bigint not null default 0,
|
||||
create_by varchar(64) not null default '',
|
||||
update_at bigint not null default 0,
|
||||
update_by varchar(64) not null default '',
|
||||
PRIMARY KEY (id),
|
||||
UNIQUE (group_id, name)
|
||||
update_by varchar(64) not null default ''
|
||||
) ;
|
||||
COMMENT ON COLUMN dashboard.group_id IS 'busi group id';
|
||||
COMMENT ON COLUMN dashboard.tags IS 'split by space';
|
||||
COMMENT ON COLUMN dashboard.configs IS 'dashboard variables';
|
||||
ALTER TABLE dashboard ADD CONSTRAINT dashboard_pk PRIMARY KEY (id);
|
||||
|
||||
-- deprecated
|
||||
-- auto create the first subclass 'Default chart group' of dashboard
|
||||
@@ -231,82 +241,71 @@ CREATE TABLE chart_group (
|
||||
id bigserial,
|
||||
dashboard_id bigint not null,
|
||||
name varchar(255) not null,
|
||||
weight int not null default 0,
|
||||
PRIMARY KEY (id)
|
||||
weight int not null default 0
|
||||
) ;
|
||||
CREATE INDEX chart_group_dashboard_id_idx ON chart_group (dashboard_id);
|
||||
ALTER TABLE chart_group ADD CONSTRAINT chart_group_pk PRIMARY KEY (id);
|
||||
|
||||
-- deprecated
|
||||
CREATE TABLE chart (
|
||||
id bigserial,
|
||||
group_id bigint not null ,
|
||||
configs text,
|
||||
weight int not null default 0,
|
||||
PRIMARY KEY (id)
|
||||
weight int not null default 0
|
||||
) ;
|
||||
CREATE INDEX chart_group_id_idx ON chart (group_id);
|
||||
COMMENT ON COLUMN chart.group_id IS 'chart group id';
|
||||
|
||||
ALTER TABLE chart ADD CONSTRAINT chart_pk PRIMARY KEY (id);
|
||||
|
||||
CREATE TABLE chart_share (
|
||||
id bigserial,
|
||||
cluster varchar(128) not null,
|
||||
dashboard_id bigint not null,
|
||||
datasource_id bigint not null default 0,
|
||||
configs text,
|
||||
create_at bigint not null default 0,
|
||||
create_by varchar(64) not null default '',
|
||||
primary key (id)
|
||||
create_by varchar(64) not null default ''
|
||||
) ;
|
||||
ALTER TABLE chart_share ADD CONSTRAINT chart_share_pk PRIMARY KEY (id);
|
||||
CREATE INDEX chart_share_create_at_idx ON chart_share (create_at);
|
||||
|
||||
|
||||
CREATE TABLE alert_rule (
|
||||
id bigserial,
|
||||
group_id bigint not null default 0 ,
|
||||
cate varchar(128) not null,
|
||||
datasource_ids varchar(255) not null default '' ,
|
||||
cluster varchar(128) not null,
|
||||
name varchar(255) not null,
|
||||
note varchar(1024) not null default '',
|
||||
prod varchar(255) not null default '',
|
||||
algorithm varchar(255) not null default '',
|
||||
algo_params varchar(255),
|
||||
delay int not null default 0,
|
||||
severity smallint not null ,
|
||||
disabled smallint not null ,
|
||||
prom_for_duration int not null ,
|
||||
rule_config text not null ,
|
||||
prom_ql text not null ,
|
||||
prom_eval_interval int not null ,
|
||||
enable_stime varchar(255) not null default '00:00',
|
||||
enable_etime varchar(255) not null default '23:59',
|
||||
enable_days_of_week varchar(255) not null default '' ,
|
||||
enable_in_bg smallint not null default 0 ,
|
||||
notify_recovered smallint not null ,
|
||||
notify_channels varchar(255) not null default '' ,
|
||||
notify_groups varchar(255) not null default '' ,
|
||||
notify_repeat_step int not null default 0 ,
|
||||
notify_max_number int not null default 0 ,
|
||||
recover_duration int not null default 0 ,
|
||||
callbacks varchar(255) not null default '' ,
|
||||
runbook_url varchar(255),
|
||||
append_tags varchar(255) not null default '' ,
|
||||
annotations text not null ,
|
||||
create_at bigint not null default 0,
|
||||
create_by varchar(64) not null default '',
|
||||
update_at bigint not null default 0,
|
||||
update_by varchar(64) not null default '',
|
||||
PRIMARY KEY (id)
|
||||
) ;
|
||||
CREATE INDEX alert_rule_group_id_idx ON alert_rule (group_id);
|
||||
CREATE INDEX alert_rule_update_at_idx ON alert_rule (update_at);
|
||||
id bigserial NOT NULL,
|
||||
group_id int8 NOT NULL DEFAULT 0,
|
||||
cate varchar(128) not null default '' ,
|
||||
"cluster" varchar(128) NOT NULL,
|
||||
"name" varchar(255) NOT NULL,
|
||||
note varchar(1024) NOT NULL,
|
||||
severity int2 NOT NULL,
|
||||
disabled int2 NOT NULL,
|
||||
prom_for_duration int4 NOT NULL,
|
||||
prom_ql text NOT NULL,
|
||||
prom_eval_interval int4 NOT NULL,
|
||||
enable_stime bpchar(5) NOT NULL DEFAULT '00:00'::bpchar,
|
||||
enable_etime bpchar(5) NOT NULL DEFAULT '23:59'::bpchar,
|
||||
enable_days_of_week varchar(32) NOT NULL DEFAULT ''::character varying,
|
||||
enable_in_bg int2 NOT NULL DEFAULT 0,
|
||||
notify_recovered int2 NOT NULL,
|
||||
notify_channels varchar(255) NOT NULL DEFAULT ''::character varying,
|
||||
notify_groups varchar(255) NOT NULL DEFAULT ''::character varying,
|
||||
notify_repeat_step int4 NOT NULL DEFAULT 0,
|
||||
notify_max_number int4 not null default 0,
|
||||
recover_duration int4 NOT NULL DEFAULT 0,
|
||||
callbacks varchar(255) NOT NULL DEFAULT ''::character varying,
|
||||
runbook_url varchar(255) NULL,
|
||||
append_tags varchar(255) NOT NULL DEFAULT ''::character varying,
|
||||
create_at int8 NOT NULL DEFAULT 0,
|
||||
create_by varchar(64) NOT NULL DEFAULT ''::character varying,
|
||||
update_at int8 NOT NULL DEFAULT 0,
|
||||
update_by varchar(64) NOT NULL DEFAULT ''::character varying,
|
||||
prod varchar(255) NOT NULL DEFAULT ''::character varying,
|
||||
algorithm varchar(255) NOT NULL DEFAULT ''::character varying,
|
||||
algo_params varchar(255) NULL,
|
||||
delay int4 NOT NULL DEFAULT 0,
|
||||
CONSTRAINT alert_rule_pk PRIMARY KEY (id)
|
||||
);
|
||||
CREATE INDEX alert_rule_group_id_idx ON alert_rule USING btree (group_id);
|
||||
CREATE INDEX alert_rule_update_at_idx ON alert_rule USING btree (update_at);
|
||||
|
||||
COMMENT ON COLUMN alert_rule.group_id IS 'busi group id';
|
||||
COMMENT ON COLUMN alert_rule.datasource_ids IS 'datasource ids';
|
||||
COMMENT ON COLUMN alert_rule.severity IS '1:Emergency 2:Warning 3:Notice';
|
||||
COMMENT ON COLUMN alert_rule.severity IS '0:Emergency 1:Warning 2:Notice';
|
||||
COMMENT ON COLUMN alert_rule.disabled IS '0:enabled 1:disabled';
|
||||
COMMENT ON COLUMN alert_rule.prom_for_duration IS 'prometheus for, unit:s';
|
||||
COMMENT ON COLUMN alert_rule.rule_config IS 'rule_config';
|
||||
COMMENT ON COLUMN alert_rule.prom_ql IS 'promql';
|
||||
COMMENT ON COLUMN alert_rule.prom_eval_interval IS 'evaluate interval';
|
||||
COMMENT ON COLUMN alert_rule.enable_stime IS '00:00';
|
||||
@@ -320,70 +319,57 @@ COMMENT ON COLUMN alert_rule.notify_repeat_step IS 'unit: min';
|
||||
COMMENT ON COLUMN alert_rule.recover_duration IS 'unit: s';
|
||||
COMMENT ON COLUMN alert_rule.callbacks IS 'split by space: http://a.com/api/x http://a.com/api/y';
|
||||
COMMENT ON COLUMN alert_rule.append_tags IS 'split by space: service=n9e mod=api';
|
||||
COMMENT ON COLUMN alert_rule.annotations IS 'annotations';
|
||||
|
||||
|
||||
CREATE TABLE alert_mute (
|
||||
id bigserial,
|
||||
group_id bigint not null default 0 ,
|
||||
prod varchar(255) not null default '',
|
||||
cate varchar(128) not null default '' ,
|
||||
prod varchar(255) NOT NULL DEFAULT '' ,
|
||||
note varchar(1024) not null default '',
|
||||
cate varchar(128) not null,
|
||||
cluster varchar(128) not null,
|
||||
datasource_ids varchar(255) not null default '' ,
|
||||
tags jsonb NOT NULL ,
|
||||
tags varchar(4096) not null default '' ,
|
||||
cause varchar(255) not null default '',
|
||||
btime bigint not null default 0 ,
|
||||
etime bigint not null default 0 ,
|
||||
disabled smallint not null default 0 ,
|
||||
mute_time_type smallint not null default 0,
|
||||
periodic_mutes varchar(4096) not null default '',
|
||||
create_at bigint not null default 0,
|
||||
create_by varchar(64) not null default '',
|
||||
update_at bigint not null default 0,
|
||||
update_by varchar(64) not null default '',
|
||||
PRIMARY KEY (id)
|
||||
update_by varchar(64) not null default ''
|
||||
) ;
|
||||
ALTER TABLE alert_mute ADD CONSTRAINT alert_mute_pk PRIMARY KEY (id);
|
||||
CREATE INDEX alert_mute_create_at_idx ON alert_mute (create_at);
|
||||
CREATE INDEX alert_mute_group_id_idx ON alert_mute (group_id);
|
||||
CREATE INDEX alert_mute_update_at_idx ON alert_mute (update_at);
|
||||
COMMENT ON COLUMN alert_mute.group_id IS 'busi group id';
|
||||
COMMENT ON COLUMN alert_mute.datasource_ids IS 'datasource ids';
|
||||
COMMENT ON COLUMN alert_mute.tags IS 'json,map,tagkey->regexp|value';
|
||||
COMMENT ON COLUMN alert_mute.btime IS 'begin time';
|
||||
COMMENT ON COLUMN alert_mute.etime IS 'end time';
|
||||
COMMENT ON COLUMN alert_mute.disabled IS '0:enabled 1:disabled';
|
||||
|
||||
|
||||
CREATE TABLE alert_subscribe (
|
||||
id bigserial,
|
||||
name varchar(255) not null default '',
|
||||
disabled smallint not null default 0 ,
|
||||
"name" varchar(255) NOT NULL default '',
|
||||
disabled int2 NOT NULL default 0 ,
|
||||
group_id bigint not null default 0 ,
|
||||
prod varchar(255) not null default '',
|
||||
cate varchar(128) not null,
|
||||
datasource_ids varchar(255) not null default '' ,
|
||||
cate varchar(128) not null default '' ,
|
||||
cluster varchar(128) not null,
|
||||
rule_id bigint not null default 0,
|
||||
tags varchar(4096) not null default '' ,
|
||||
tags jsonb not null ,
|
||||
redefine_severity smallint default 0 ,
|
||||
new_severity smallint not null ,
|
||||
redefine_channels smallint default 0 ,
|
||||
new_channels varchar(255) not null default '' ,
|
||||
user_group_ids varchar(250) not null ,
|
||||
webhooks text not null,
|
||||
redefine_webhooks smallint default 0,
|
||||
for_duration bigint not null default 0,
|
||||
create_at bigint not null default 0,
|
||||
create_by varchar(64) not null default '',
|
||||
update_at bigint not null default 0,
|
||||
update_by varchar(64) not null default '',
|
||||
PRIMARY KEY (id)
|
||||
update_by varchar(64) not null default ''
|
||||
) ;
|
||||
ALTER TABLE alert_subscribe ADD CONSTRAINT alert_subscribe_pk PRIMARY KEY (id);
|
||||
CREATE INDEX alert_subscribe_group_id_idx ON alert_subscribe (group_id);
|
||||
CREATE INDEX alert_subscribe_update_at_idx ON alert_subscribe (update_at);
|
||||
COMMENT ON COLUMN alert_subscribe.disabled IS '0:enabled 1:disabled';
|
||||
COMMENT ON COLUMN alert_subscribe.group_id IS 'busi group id';
|
||||
COMMENT ON COLUMN alert_subscribe.datasource_ids IS 'datasource ids';
|
||||
COMMENT ON COLUMN alert_subscribe.tags IS 'json,map,tagkey->regexp|value';
|
||||
COMMENT ON COLUMN alert_subscribe.redefine_severity IS 'is redefine severity?';
|
||||
COMMENT ON COLUMN alert_subscribe.new_severity IS '0:Emergency 1:Warning 2:Notice';
|
||||
@@ -391,43 +377,26 @@ COMMENT ON COLUMN alert_subscribe.redefine_channels IS 'is redefine channels?';
|
||||
COMMENT ON COLUMN alert_subscribe.new_channels IS 'split by space: sms voice email dingtalk wecom';
|
||||
COMMENT ON COLUMN alert_subscribe.user_group_ids IS 'split by space 1 34 5, notify cc to user_group_ids';
|
||||
|
||||
|
||||
|
||||
CREATE TABLE target (
|
||||
id bigserial,
|
||||
group_id bigint not null default 0 ,
|
||||
cluster varchar(128) not null ,
|
||||
ident varchar(191) not null ,
|
||||
note varchar(255) not null default '' ,
|
||||
tags varchar(512) not null default '' ,
|
||||
update_at bigint not null default 0,
|
||||
PRIMARY KEY (id),
|
||||
UNIQUE (ident)
|
||||
update_at bigint not null default 0
|
||||
) ;
|
||||
ALTER TABLE target ADD CONSTRAINT target_pk PRIMARY KEY (id);
|
||||
ALTER TABLE target ADD CONSTRAINT target_un UNIQUE (ident);
|
||||
CREATE INDEX target_group_id_idx ON target (group_id);
|
||||
|
||||
COMMENT ON COLUMN target.group_id IS 'busi group id';
|
||||
COMMENT ON COLUMN target."cluster" IS 'append to alert event as field';
|
||||
COMMENT ON COLUMN target.ident IS 'target id';
|
||||
COMMENT ON COLUMN target.note IS 'append to alert event as field';
|
||||
COMMENT ON COLUMN target.tags IS 'append to series data as tags, split by space, append external space at suffix';
|
||||
|
||||
-- case1: target_idents; case2: target_tags
|
||||
-- CREATE TABLE collect_rule (
|
||||
-- id bigserial,
|
||||
-- group_id bigint not null default 0 comment 'busi group id',
|
||||
-- cluster varchar(128) not null,
|
||||
-- target_idents varchar(512) not null default '' comment 'ident list, split by space',
|
||||
-- target_tags varchar(512) not null default '' comment 'filter targets by tags, split by space',
|
||||
-- name varchar(191) not null default '',
|
||||
-- note varchar(255) not null default '',
|
||||
-- step int not null,
|
||||
-- type varchar(64) not null comment 'e.g. port proc log plugin',
|
||||
-- data text not null,
|
||||
-- append_tags varchar(255) not null default '' comment 'split by space: e.g. mod=n9e dept=cloud',
|
||||
-- create_at bigint not null default 0,
|
||||
-- create_by varchar(64) not null default '',
|
||||
-- update_at bigint not null default 0,
|
||||
-- update_by varchar(64) not null default '',
|
||||
-- PRIMARY KEY (id),
|
||||
-- KEY (group_id, type, name)
|
||||
-- ) ;
|
||||
|
||||
CREATE TABLE metric_view (
|
||||
id bigserial,
|
||||
@@ -435,45 +404,16 @@ CREATE TABLE metric_view (
|
||||
cate smallint not null ,
|
||||
configs varchar(8192) not null default '',
|
||||
create_at bigint not null default 0,
|
||||
create_by bigint not null default 0,
|
||||
update_at bigint not null default 0,
|
||||
PRIMARY KEY (id)
|
||||
create_by bigint not null default 0 ,
|
||||
update_at bigint not null default 0
|
||||
) ;
|
||||
ALTER TABLE metric_view ADD CONSTRAINT metric_view_pk PRIMARY KEY (id);
|
||||
CREATE INDEX metric_view_create_by_idx ON metric_view (create_by);
|
||||
|
||||
COMMENT ON COLUMN metric_view.cate IS '0: preset 1: custom';
|
||||
COMMENT ON COLUMN metric_view.create_by IS 'user id';
|
||||
|
||||
|
||||
insert into metric_view(name, cate, configs) values('Host View', 0, '{"filters":[{"oper":"=","label":"__name__","value":"cpu_usage_idle"}],"dynamicLabels":[],"dimensionLabels":[{"label":"ident","value":""}]}');
|
||||
|
||||
CREATE TABLE recording_rule (
|
||||
id bigserial,
|
||||
group_id bigint not null default '0',
|
||||
datasource_ids varchar(255) not null default '',
|
||||
cluster varchar(128) not null,
|
||||
name varchar(255) not null ,
|
||||
note varchar(255) not null ,
|
||||
disabled smallint not null default 0 ,
|
||||
prom_ql varchar(8192) not null ,
|
||||
prom_eval_interval int not null ,
|
||||
append_tags varchar(255) default '' ,
|
||||
create_at bigint default '0',
|
||||
create_by varchar(64) default '',
|
||||
update_at bigint default '0',
|
||||
update_by varchar(64) default '',
|
||||
PRIMARY KEY (id)
|
||||
) ;
|
||||
CREATE INDEX recording_rule_group_id_idx ON recording_rule (group_id);
|
||||
CREATE INDEX recording_rule_update_at_idx ON recording_rule (update_at);
|
||||
COMMENT ON COLUMN recording_rule.group_id IS 'group_id';
|
||||
COMMENT ON COLUMN recording_rule.datasource_ids IS 'datasource ids';
|
||||
COMMENT ON COLUMN recording_rule.name IS 'new metric name';
|
||||
COMMENT ON COLUMN recording_rule.note IS 'rule note';
|
||||
COMMENT ON COLUMN recording_rule.disabled IS '0:enabled 1:disabled';
|
||||
COMMENT ON COLUMN recording_rule.prom_ql IS 'promql';
|
||||
COMMENT ON COLUMN recording_rule.prom_eval_interval IS 'evaluate interval';
|
||||
COMMENT ON COLUMN recording_rule.append_tags IS 'split by space: service=n9e mod=api';
|
||||
|
||||
|
||||
CREATE TABLE alert_aggr_view (
|
||||
id bigserial,
|
||||
@@ -481,58 +421,54 @@ CREATE TABLE alert_aggr_view (
|
||||
rule varchar(2048) not null default '',
|
||||
cate smallint not null ,
|
||||
create_at bigint not null default 0,
|
||||
create_by bigint not null default 0,
|
||||
update_at bigint not null default 0,
|
||||
PRIMARY KEY (id)
|
||||
create_by bigint not null default 0 ,
|
||||
update_at bigint not null default 0
|
||||
) ;
|
||||
ALTER TABLE alert_aggr_view ADD CONSTRAINT alert_aggr_view_pk PRIMARY KEY (id);
|
||||
CREATE INDEX alert_aggr_view_create_by_idx ON alert_aggr_view (create_by);
|
||||
|
||||
COMMENT ON COLUMN alert_aggr_view.cate IS '0: preset 1: custom';
|
||||
COMMENT ON COLUMN alert_aggr_view.create_by IS 'user id';
|
||||
|
||||
|
||||
insert into alert_aggr_view(name, rule, cate) values('By BusiGroup, Severity', 'field:group_name::field:severity', 0);
|
||||
insert into alert_aggr_view(name, rule, cate) values('By RuleName', 'field:rule_name', 0);
|
||||
|
||||
CREATE TABLE alert_cur_event (
|
||||
id bigint not null ,
|
||||
cate varchar(128) not null,
|
||||
datasource_id bigint not null default 0 ,
|
||||
cluster varchar(128) not null,
|
||||
group_id bigint not null ,
|
||||
group_name varchar(255) not null default '' ,
|
||||
hash varchar(64) not null ,
|
||||
rule_id bigint not null,
|
||||
rule_name varchar(255) not null,
|
||||
rule_note varchar(2048) not null ,
|
||||
rule_prod varchar(255) not null default '',
|
||||
rule_algo varchar(255) not null default '',
|
||||
severity smallint not null ,
|
||||
prom_for_duration int not null ,
|
||||
prom_ql varchar(8192) not null ,
|
||||
prom_eval_interval int not null ,
|
||||
callbacks varchar(255) not null default '' ,
|
||||
runbook_url varchar(255),
|
||||
notify_recovered smallint not null ,
|
||||
notify_channels varchar(255) not null default '' ,
|
||||
notify_groups varchar(255) not null default '' ,
|
||||
notify_repeat_next bigint not null default 0 ,
|
||||
notify_cur_number int not null default 0 ,
|
||||
target_ident varchar(191) not null default '' ,
|
||||
target_note varchar(191) not null default '' ,
|
||||
first_trigger_time bigint,
|
||||
trigger_time bigint not null,
|
||||
trigger_value varchar(255) not null,
|
||||
annotations text not null ,
|
||||
rule_config text not null ,
|
||||
tags varchar(1024) not null default '' ,
|
||||
PRIMARY KEY (id)
|
||||
) ;
|
||||
CREATE INDEX alert_cur_event_hash_idx ON alert_cur_event (hash);
|
||||
CREATE INDEX alert_cur_event_rule_id_idx ON alert_cur_event (rule_id);
|
||||
CREATE INDEX alert_cur_event_tg_idx ON alert_cur_event (trigger_time, group_id);
|
||||
CREATE INDEX alert_cur_event_nrn_idx ON alert_cur_event (notify_repeat_next);
|
||||
id bigserial NOT NULL,
|
||||
cate varchar(128) not null default '' ,
|
||||
"cluster" varchar(128) NOT NULL,
|
||||
group_id int8 NOT NULL,
|
||||
group_name varchar(255) NOT NULL DEFAULT ''::character varying,
|
||||
hash varchar(64) NOT NULL,
|
||||
rule_id int8 NOT NULL,
|
||||
rule_name varchar(255) NOT NULL,
|
||||
rule_note varchar(2048) NOT NULL DEFAULT 'alert rule note'::character varying,
|
||||
severity int2 NOT NULL,
|
||||
prom_for_duration int4 NOT NULL,
|
||||
prom_ql varchar(8192) NOT NULL,
|
||||
prom_eval_interval int4 NOT NULL,
|
||||
callbacks varchar(255) NOT NULL DEFAULT ''::character varying,
|
||||
runbook_url varchar(255) NULL,
|
||||
notify_recovered int2 NOT NULL,
|
||||
notify_channels varchar(255) NOT NULL DEFAULT ''::character varying,
|
||||
notify_groups varchar(255) NOT NULL DEFAULT ''::character varying,
|
||||
notify_repeat_next int8 NOT NULL DEFAULT 0,
|
||||
notify_cur_number int4 not null default 0,
|
||||
target_ident varchar(191) NOT NULL DEFAULT ''::character varying,
|
||||
target_note varchar(191) NOT NULL DEFAULT ''::character varying,
|
||||
first_trigger_time int8,
|
||||
trigger_time int8 NOT NULL,
|
||||
trigger_value varchar(255) NOT NULL,
|
||||
tags varchar(1024) NOT NULL DEFAULT ''::character varying,
|
||||
rule_prod varchar(255) NOT NULL DEFAULT ''::character varying,
|
||||
rule_algo varchar(255) NOT NULL DEFAULT ''::character varying,
|
||||
CONSTRAINT alert_cur_event_pk PRIMARY KEY (id)
|
||||
);
|
||||
CREATE INDEX alert_cur_event_hash_idx ON alert_cur_event USING btree (hash);
|
||||
CREATE INDEX alert_cur_event_notify_repeat_next_idx ON alert_cur_event USING btree (notify_repeat_next);
|
||||
CREATE INDEX alert_cur_event_rule_id_idx ON alert_cur_event USING btree (rule_id);
|
||||
CREATE INDEX alert_cur_event_trigger_time_idx ON alert_cur_event USING btree (trigger_time, group_id);
|
||||
COMMENT ON COLUMN alert_cur_event.id IS 'use alert_his_event.id';
|
||||
COMMENT ON COLUMN alert_cur_event.datasource_id IS 'datasource id';
|
||||
COMMENT ON COLUMN alert_cur_event.group_id IS 'busi group id of rule';
|
||||
COMMENT ON COLUMN alert_cur_event.group_name IS 'busi group name';
|
||||
COMMENT ON COLUMN alert_cur_event.hash IS 'rule_id + vector_pk';
|
||||
@@ -548,52 +484,46 @@ COMMENT ON COLUMN alert_cur_event.notify_groups IS 'split by space: 233 43';
|
||||
COMMENT ON COLUMN alert_cur_event.notify_repeat_next IS 'next timestamp to notify, get repeat settings from rule';
|
||||
COMMENT ON COLUMN alert_cur_event.target_ident IS 'target ident, also in tags';
|
||||
COMMENT ON COLUMN alert_cur_event.target_note IS 'target note';
|
||||
COMMENT ON COLUMN alert_cur_event.annotations IS 'annotations';
|
||||
COMMENT ON COLUMN alert_cur_event.rule_config IS 'rule_config';
|
||||
COMMENT ON COLUMN alert_cur_event.tags IS 'merge data_tags rule_tags, split by ,,';
|
||||
|
||||
|
||||
CREATE TABLE alert_his_event (
|
||||
id bigserial,
|
||||
is_recovered smallint not null,
|
||||
cate varchar(128) not null,
|
||||
datasource_id bigint not null default 0 ,
|
||||
cluster varchar(128) not null,
|
||||
group_id bigint not null ,
|
||||
group_name varchar(255) not null default '' ,
|
||||
hash varchar(64) not null ,
|
||||
rule_id bigint not null,
|
||||
rule_name varchar(255) not null,
|
||||
rule_note varchar(2048) not null default 'alert rule note',
|
||||
rule_prod varchar(255) not null default '',
|
||||
rule_algo varchar(255) not null default '',
|
||||
severity smallint not null ,
|
||||
prom_for_duration int not null ,
|
||||
prom_ql varchar(8192) not null ,
|
||||
prom_eval_interval int not null ,
|
||||
callbacks varchar(255) not null default '' ,
|
||||
runbook_url varchar(255),
|
||||
notify_recovered smallint not null ,
|
||||
notify_channels varchar(255) not null default '' ,
|
||||
notify_groups varchar(255) not null default '' ,
|
||||
notify_cur_number int not null default 0 ,
|
||||
target_ident varchar(191) not null default '' ,
|
||||
target_note varchar(191) not null default '' ,
|
||||
first_trigger_time bigint,
|
||||
trigger_time bigint not null,
|
||||
trigger_value varchar(255) not null,
|
||||
recover_time bigint not null default 0,
|
||||
last_eval_time bigint not null default 0 ,
|
||||
tags varchar(1024) not null default '' ,
|
||||
annotations text not null ,
|
||||
rule_config text not null ,
|
||||
PRIMARY KEY (id)
|
||||
) ;
|
||||
CREATE INDEX alert_his_event_hash_idx ON alert_his_event (hash);
|
||||
CREATE INDEX alert_his_event_rule_id_idx ON alert_his_event (rule_id);
|
||||
CREATE INDEX alert_his_event_tg_idx ON alert_his_event (trigger_time, group_id);
|
||||
id bigserial NOT NULL,
|
||||
is_recovered int2 NOT NULL,
|
||||
cate varchar(128) not null default '' ,
|
||||
"cluster" varchar(128) NOT NULL,
|
||||
group_id int8 NOT NULL,
|
||||
group_name varchar(255) NOT NULL DEFAULT ''::character varying,
|
||||
hash varchar(64) NOT NULL,
|
||||
rule_id int8 NOT NULL,
|
||||
rule_name varchar(255) NOT NULL,
|
||||
rule_note varchar(2048) NOT NULL DEFAULT 'alert rule note'::character varying,
|
||||
severity int2 NOT NULL,
|
||||
prom_for_duration int4 NOT NULL,
|
||||
prom_ql varchar(8192) NOT NULL,
|
||||
prom_eval_interval int4 NOT NULL,
|
||||
callbacks varchar(255) NOT NULL DEFAULT ''::character varying,
|
||||
runbook_url varchar(255) NULL,
|
||||
notify_recovered int2 NOT NULL,
|
||||
notify_channels varchar(255) NOT NULL DEFAULT ''::character varying,
|
||||
notify_groups varchar(255) NOT NULL DEFAULT ''::character varying,
|
||||
notify_cur_number int4 not null default 0,
|
||||
target_ident varchar(191) NOT NULL DEFAULT ''::character varying,
|
||||
target_note varchar(191) NOT NULL DEFAULT ''::character varying,
|
||||
first_trigger_time int8,
|
||||
trigger_time int8 NOT NULL,
|
||||
trigger_value varchar(255) NOT NULL,
|
||||
recover_time int8 NOT NULL DEFAULT 0,
|
||||
last_eval_time int8 NOT NULL DEFAULT 0,
|
||||
tags varchar(1024) NOT NULL DEFAULT ''::character varying,
|
||||
rule_prod varchar(255) NOT NULL DEFAULT ''::character varying,
|
||||
rule_algo varchar(255) NOT NULL DEFAULT ''::character varying,
|
||||
CONSTRAINT alert_his_event_pk PRIMARY KEY (id)
|
||||
);
|
||||
CREATE INDEX alert_his_event_hash_idx ON alert_his_event USING btree (hash);
|
||||
CREATE INDEX alert_his_event_rule_id_idx ON alert_his_event USING btree (rule_id);
|
||||
CREATE INDEX alert_his_event_trigger_time_idx ON alert_his_event USING btree (trigger_time, group_id);
|
||||
|
||||
COMMENT ON COLUMN alert_his_event.group_id IS 'busi group id of rule';
|
||||
COMMENT ON COLUMN alert_his_event.datasource_id IS 'datasource id';
|
||||
COMMENT ON COLUMN alert_his_event.group_name IS 'busi group name';
|
||||
COMMENT ON COLUMN alert_his_event.hash IS 'rule_id + vector_pk';
|
||||
COMMENT ON COLUMN alert_his_event.rule_note IS 'alert rule note';
|
||||
@@ -609,8 +539,6 @@ COMMENT ON COLUMN alert_his_event.target_ident IS 'target ident, also in tags';
|
||||
COMMENT ON COLUMN alert_his_event.target_note IS 'target note';
|
||||
COMMENT ON COLUMN alert_his_event.last_eval_time IS 'for time filter';
|
||||
COMMENT ON COLUMN alert_his_event.tags IS 'merge data_tags rule_tags, split by ,,';
|
||||
COMMENT ON COLUMN alert_his_event.annotations IS 'annotations';
|
||||
COMMENT ON COLUMN alert_his_event.rule_config IS 'rule_config';
|
||||
|
||||
CREATE TABLE task_tpl
|
||||
(
|
||||
@@ -628,10 +556,11 @@ CREATE TABLE task_tpl
|
||||
create_at bigint not null default 0,
|
||||
create_by varchar(64) not null default '',
|
||||
update_at bigint not null default 0,
|
||||
update_by varchar(64) not null default '',
|
||||
PRIMARY KEY (id)
|
||||
update_by varchar(64) not null default ''
|
||||
) ;
|
||||
ALTER TABLE task_tpl ADD CONSTRAINT task_tpl_pk PRIMARY KEY (id);
|
||||
CREATE INDEX task_tpl_group_id_idx ON task_tpl (group_id);
|
||||
|
||||
COMMENT ON COLUMN task_tpl.group_id IS 'busi group id';
|
||||
COMMENT ON COLUMN task_tpl.tags IS 'split by space';
|
||||
|
||||
@@ -640,18 +569,17 @@ CREATE TABLE task_tpl_host
|
||||
(
|
||||
ii serial,
|
||||
id int not null ,
|
||||
host varchar(128) not null ,
|
||||
PRIMARY KEY (ii)
|
||||
host varchar(128) not null
|
||||
) ;
|
||||
CREATE INDEX task_tpl_host_id_host_idx ON task_tpl_host (id, host);
|
||||
ALTER TABLE task_tpl_host ADD CONSTRAINT task_tpl_host_pk PRIMARY KEY (id);
|
||||
CREATE INDEX task_tpl_host_id_idx ON task_tpl_host (id,host);
|
||||
|
||||
COMMENT ON COLUMN task_tpl_host.id IS 'task tpl id';
|
||||
COMMENT ON COLUMN task_tpl_host.host IS 'ip or hostname';
|
||||
|
||||
|
||||
CREATE TABLE task_record
|
||||
(
|
||||
id bigint not null ,
|
||||
event_id bigint not null default 0,
|
||||
id bigserial,
|
||||
group_id bigint not null ,
|
||||
ibex_address varchar(128) not null,
|
||||
ibex_auth_user varchar(128) not null default '',
|
||||
@@ -665,72 +593,22 @@ CREATE TABLE task_record
|
||||
script text not null,
|
||||
args varchar(512) not null default '',
|
||||
create_at bigint not null default 0,
|
||||
create_by varchar(64) not null default '',
|
||||
PRIMARY KEY (id)
|
||||
create_by varchar(64) not null default ''
|
||||
) ;
|
||||
CREATE INDEX task_record_cg_idx ON task_record (create_at, group_id);
|
||||
ALTER TABLE task_record ADD CONSTRAINT task_record_pk PRIMARY KEY (id);
|
||||
CREATE INDEX task_record_create_at_idx ON task_record (create_at,group_id);
|
||||
CREATE INDEX task_record_create_by_idx ON task_record (create_by);
|
||||
CREATE INDEX task_record_event_id_idx ON task_record (event_id);
|
||||
|
||||
COMMENT ON COLUMN task_record.id IS 'ibex task id';
|
||||
COMMENT ON COLUMN task_record.group_id IS 'busi group id';
|
||||
COMMENT ON COLUMN task_record.event_id IS 'event id';
|
||||
|
||||
CREATE TABLE alerting_engines
|
||||
(
|
||||
id serial,
|
||||
instance varchar(128) not null default '' ,
|
||||
datasource_id bigint not null default 0 ,
|
||||
engine_cluster varchar(128) not null default '' ,
|
||||
clock bigint not null,
|
||||
PRIMARY KEY (id)
|
||||
id bigserial NOT NULL,
|
||||
instance varchar(128) not null default '',
|
||||
cluster varchar(128) not null default '',
|
||||
clock bigint not null
|
||||
) ;
|
||||
ALTER TABLE alerting_engines ADD CONSTRAINT alerting_engines_pk PRIMARY KEY (id);
|
||||
COMMENT ON COLUMN alerting_engines.instance IS 'instance identification, e.g. 10.9.0.9:9090';
|
||||
COMMENT ON COLUMN alerting_engines.datasource_id IS 'datasource id';
|
||||
COMMENT ON COLUMN alerting_engines.engine_cluster IS 'target reader cluster';
|
||||
|
||||
|
||||
CREATE TABLE datasource
|
||||
(
|
||||
id serial,
|
||||
name varchar(191) not null default '',
|
||||
description varchar(255) not null default '',
|
||||
category varchar(255) not null default '',
|
||||
plugin_id int not null default 0,
|
||||
plugin_type varchar(255) not null default '',
|
||||
plugin_type_name varchar(255) not null default '',
|
||||
cluster_name varchar(255) not null default '',
|
||||
settings text not null,
|
||||
status varchar(255) not null default '',
|
||||
http varchar(4096) not null default '',
|
||||
auth varchar(8192) not null default '',
|
||||
created_at bigint not null default 0,
|
||||
created_by varchar(64) not null default '',
|
||||
updated_at bigint not null default 0,
|
||||
updated_by varchar(64) not null default '',
|
||||
UNIQUE (name),
|
||||
PRIMARY KEY (id)
|
||||
) ;
|
||||
|
||||
CREATE TABLE builtin_cate (
|
||||
id bigserial,
|
||||
name varchar(191) not null,
|
||||
user_id bigint not null default 0,
|
||||
PRIMARY KEY (id)
|
||||
) ;
|
||||
|
||||
CREATE TABLE notify_tpl (
|
||||
id bigserial,
|
||||
channel varchar(32) not null,
|
||||
name varchar(255) not null,
|
||||
content text not null,
|
||||
PRIMARY KEY (id),
|
||||
UNIQUE (channel)
|
||||
) ;
|
||||
|
||||
CREATE TABLE sso_config (
|
||||
id bigserial,
|
||||
name varchar(191) not null,
|
||||
content text not null,
|
||||
PRIMARY KEY (id),
|
||||
UNIQUE (name)
|
||||
) ;
|
||||
COMMENT ON COLUMN alerting_engines.cluster IS 'target reader cluster';
|
||||
@@ -1,6 +1,6 @@
|
||||
[
|
||||
{
|
||||
"name": "http detect failed",
|
||||
"name": "HTTP地址探测失败",
|
||||
"note": "",
|
||||
"severity": 2,
|
||||
"disabled": 0,
|
||||
58
docker/n9eetc/alerts/kafka_by_exporter.json
Normal file
58
docker/n9eetc/alerts/kafka_by_exporter.json
Normal file
@@ -0,0 +1,58 @@
|
||||
[
|
||||
{
|
||||
"name": "数据有丢失风险-同步副本数小于3",
|
||||
"note": "",
|
||||
"severity": 2,
|
||||
"disabled": 0,
|
||||
"prom_for_duration": 60,
|
||||
"prom_ql": "sum(kafka_topic_partition_in_sync_replica) by (topic) < 3",
|
||||
"prom_eval_interval": 15,
|
||||
"enable_stime": "00:00",
|
||||
"enable_etime": "23:59",
|
||||
"enable_days_of_week": [
|
||||
"1",
|
||||
"2",
|
||||
"3",
|
||||
"4",
|
||||
"5",
|
||||
"6",
|
||||
"0"
|
||||
],
|
||||
"enable_in_bg": 0,
|
||||
"notify_recovered": 1,
|
||||
"notify_channels": [],
|
||||
"notify_repeat_step": 60,
|
||||
"recover_duration": 0,
|
||||
"callbacks": [],
|
||||
"runbook_url": "",
|
||||
"append_tags": []
|
||||
},
|
||||
{
|
||||
"name": "消费能力不足-积压消息数超过50条",
|
||||
"note": "",
|
||||
"severity": 2,
|
||||
"disabled": 0,
|
||||
"prom_for_duration": 60,
|
||||
"prom_ql": "sum(kafka_topic_partition_current_offset{instance=\"$instance\"}) by (topic) - sum(kafka_consumergroup_current_offset{instance=\"$instance\"}) by (topic) ",
|
||||
"prom_eval_interval": 15,
|
||||
"enable_stime": "00:00",
|
||||
"enable_etime": "23:59",
|
||||
"enable_days_of_week": [
|
||||
"1",
|
||||
"2",
|
||||
"3",
|
||||
"4",
|
||||
"5",
|
||||
"6",
|
||||
"0"
|
||||
],
|
||||
"enable_in_bg": 0,
|
||||
"notify_recovered": 1,
|
||||
"notify_channels": [],
|
||||
"notify_repeat_step": 60,
|
||||
"recover_duration": 0,
|
||||
"callbacks": [],
|
||||
"runbook_url": "",
|
||||
"append_tags": []
|
||||
}
|
||||
]
|
||||
@@ -1,54 +1,15 @@
|
||||
[
|
||||
{
|
||||
"cate": "host",
|
||||
"datasource_ids": null,
|
||||
"name": "Lost connection with monitoring target - categraf",
|
||||
"note": "",
|
||||
"prod": "host",
|
||||
"algorithm": "",
|
||||
"algo_params": null,
|
||||
"delay": 0,
|
||||
"severity": 0,
|
||||
"disabled": 0,
|
||||
"prom_for_duration": 0,
|
||||
"prom_ql": "",
|
||||
"rule_config": {
|
||||
"inhibit": false,
|
||||
"queries": [
|
||||
{
|
||||
"key": "all_hosts",
|
||||
"op": "==",
|
||||
"values": []
|
||||
}
|
||||
],
|
||||
"triggers": [
|
||||
{
|
||||
"duration": 60,
|
||||
"severity": 2,
|
||||
"type": "target_miss"
|
||||
}
|
||||
]
|
||||
},
|
||||
"prom_eval_interval": 15,
|
||||
"enable_stime": "00:00",
|
||||
"enable_stimes": [
|
||||
"00:00"
|
||||
],
|
||||
"enable_etime": "23:59",
|
||||
"enable_etimes": [
|
||||
"23:59"
|
||||
],
|
||||
"enable_days_of_week": [
|
||||
"1",
|
||||
"2",
|
||||
"3",
|
||||
"4",
|
||||
"5",
|
||||
"6",
|
||||
"0"
|
||||
],
|
||||
"enable_days_of_weeks": [
|
||||
[
|
||||
{
|
||||
"name": "监控对象失联",
|
||||
"note": "",
|
||||
"severity": 1,
|
||||
"disabled": 0,
|
||||
"prom_for_duration": 60,
|
||||
"prom_ql": "max_over_time(target_up[130s]) == 0",
|
||||
"prom_eval_interval": 15,
|
||||
"enable_stime": "00:00",
|
||||
"enable_etime": "23:59",
|
||||
"enable_days_of_week": [
|
||||
"1",
|
||||
"2",
|
||||
"3",
|
||||
@@ -56,21 +17,50 @@
|
||||
"5",
|
||||
"6",
|
||||
"0"
|
||||
]
|
||||
],
|
||||
"enable_in_bg": 0,
|
||||
"notify_recovered": 1,
|
||||
"notify_channels": [],
|
||||
"notify_repeat_step": 60,
|
||||
"notify_max_number": 0,
|
||||
"recover_duration": 0,
|
||||
"callbacks": [],
|
||||
"runbook_url": "",
|
||||
"append_tags": [],
|
||||
"annotations": {}
|
||||
},
|
||||
],
|
||||
"notify_recovered": 1,
|
||||
"notify_channels": [
|
||||
"email",
|
||||
"dingtalk",
|
||||
"wecom"
|
||||
],
|
||||
"notify_repeat_step": 60,
|
||||
"callbacks": [],
|
||||
"runbook_url": "",
|
||||
"append_tags": []
|
||||
},
|
||||
{
|
||||
"name": "Machine load - high memory, please pay attention - categraf",
|
||||
"name": "机器负载-CPU较高,请关注",
|
||||
"note": "",
|
||||
"severity": 3,
|
||||
"disabled": 0,
|
||||
"prom_for_duration": 60,
|
||||
"prom_ql": "cpu_usage_idle{cpu=\"cpu-total\"} < 25",
|
||||
"prom_eval_interval": 15,
|
||||
"enable_stime": "00:00",
|
||||
"enable_etime": "23:59",
|
||||
"enable_days_of_week": [
|
||||
"1",
|
||||
"2",
|
||||
"3",
|
||||
"4",
|
||||
"5",
|
||||
"6",
|
||||
"0"
|
||||
],
|
||||
"notify_recovered": 1,
|
||||
"notify_channels": [
|
||||
"email",
|
||||
"dingtalk",
|
||||
"wecom"
|
||||
],
|
||||
"notify_repeat_step": 60,
|
||||
"callbacks": [],
|
||||
"runbook_url": "",
|
||||
"append_tags": []
|
||||
},
|
||||
{
|
||||
"name": "机器负载-内存较高,请关注",
|
||||
"note": "",
|
||||
"severity": 2,
|
||||
"disabled": 0,
|
||||
@@ -100,7 +90,7 @@
|
||||
"append_tags": []
|
||||
},
|
||||
{
|
||||
"name": "Hard disk - IO is a bit busy - categraf",
|
||||
"name": "硬盘-IO有点繁忙",
|
||||
"note": "",
|
||||
"severity": 2,
|
||||
"disabled": 0,
|
||||
@@ -130,7 +120,7 @@
|
||||
"append_tags": []
|
||||
},
|
||||
{
|
||||
"name": "Hard disk - expected to be written full in 4 hours - categraf",
|
||||
"name": "硬盘-预计再有4小时写满",
|
||||
"note": "",
|
||||
"severity": 1,
|
||||
"disabled": 0,
|
||||
@@ -160,7 +150,7 @@
|
||||
"append_tags": []
|
||||
},
|
||||
{
|
||||
"name": "packet loss in the inbound direction - categraf",
|
||||
"name": "网卡-入向有丢包",
|
||||
"note": "",
|
||||
"severity": 3,
|
||||
"disabled": 0,
|
||||
@@ -190,7 +180,7 @@
|
||||
"append_tags": []
|
||||
},
|
||||
{
|
||||
"name": "packet loss in the outbound direction - categraf",
|
||||
"name": "网卡-出向有丢包",
|
||||
"note": "",
|
||||
"severity": 3,
|
||||
"disabled": 0,
|
||||
@@ -220,7 +210,7 @@
|
||||
"append_tags": []
|
||||
},
|
||||
{
|
||||
"name": "number of TME_WAIT exceeds 20,000 - categraf",
|
||||
"name": "网络连接-TME_WAIT数量超过2万",
|
||||
"note": "",
|
||||
"severity": 2,
|
||||
"disabled": 0,
|
||||
@@ -1,6 +1,6 @@
|
||||
[
|
||||
{
|
||||
"name": "Insufficient inode resources - usage exceeds 90% - node exporter",
|
||||
"name": "inode资源不足-使用率超过90",
|
||||
"note": "",
|
||||
"severity": 2,
|
||||
"disabled": 1,
|
||||
@@ -28,8 +28,8 @@
|
||||
"append_tags": []
|
||||
},
|
||||
{
|
||||
"name": "Insufficient memory resources - utilization is greater than 75% - node exporter",
|
||||
"note": "Expansion or upgrading of configuration is required",
|
||||
"name": "内存资源不足-利用率大于75%",
|
||||
"note": "需要扩容或者升级配置了",
|
||||
"severity": 2,
|
||||
"disabled": 1,
|
||||
"prom_for_duration": 60,
|
||||
@@ -58,8 +58,8 @@
|
||||
"append_tags": []
|
||||
},
|
||||
{
|
||||
"name": "Insufficient memory resources - utilization is greater than 95% - node exporter",
|
||||
"note": "",
|
||||
"name": "内存资源不足-利用率大于95%",
|
||||
"note": "需要扩容或者升级配置了",
|
||||
"severity": 1,
|
||||
"disabled": 1,
|
||||
"prom_for_duration": 60,
|
||||
@@ -88,8 +88,8 @@
|
||||
"append_tags": []
|
||||
},
|
||||
{
|
||||
"name": "Insufficient file handles - usage exceeds 90% - node exporter",
|
||||
"note": "You can increase the file handle limit or expand the capacity",
|
||||
"name": "文件句柄不足-使用率超过90%",
|
||||
"note": "可以将文件句柄limit调大,或者扩容",
|
||||
"severity": 2,
|
||||
"disabled": 1,
|
||||
"prom_for_duration": 60,
|
||||
@@ -116,7 +116,7 @@
|
||||
"append_tags": []
|
||||
},
|
||||
{
|
||||
"name": "A certain disk is unable to read/write normally",
|
||||
"name": "某磁盘无法正常读写",
|
||||
"note": "",
|
||||
"severity": 1,
|
||||
"disabled": 1,
|
||||
@@ -144,7 +144,7 @@
|
||||
"append_tags": []
|
||||
},
|
||||
{
|
||||
"name": "Disk needs to be cleaned - utilization has reached 92% - node exporter",
|
||||
"name": "磁盘需要清理了-利用率达到92%",
|
||||
"note": "",
|
||||
"severity": 1,
|
||||
"disabled": 1,
|
||||
@@ -174,7 +174,7 @@
|
||||
"append_tags": []
|
||||
},
|
||||
{
|
||||
"name": "System conntrack needs to be adjusted - usage exceeds 80% - node exporter",
|
||||
"name": "系统conntrack需要调整-使用率超过80%",
|
||||
"note": "",
|
||||
"severity": 2,
|
||||
"disabled": 1,
|
||||
@@ -202,7 +202,7 @@
|
||||
"append_tags": []
|
||||
},
|
||||
{
|
||||
"name": "System experiences OOM - node exporter",
|
||||
"name": "系统出现oom",
|
||||
"note": "",
|
||||
"severity": 2,
|
||||
"disabled": 1,
|
||||
@@ -230,7 +230,7 @@
|
||||
"append_tags": []
|
||||
},
|
||||
{
|
||||
"name": "Packet loss in the inbound direction of the network card - node exporter",
|
||||
"name": "网卡入方向丢包",
|
||||
"note": "",
|
||||
"severity": 2,
|
||||
"disabled": 1,
|
||||
@@ -258,7 +258,7 @@
|
||||
"append_tags": []
|
||||
},
|
||||
{
|
||||
"name": "Packet loss in the outbound direction of the network card - node exporter",
|
||||
"name": "网卡出方向丢包",
|
||||
"note": "",
|
||||
"severity": 2,
|
||||
"disabled": 1,
|
||||
@@ -286,8 +286,8 @@
|
||||
"append_tags": []
|
||||
},
|
||||
{
|
||||
"name": "Insufficient computational resources - average load per core of the machine is greater than 10 - node exporter",
|
||||
"note": "",
|
||||
"name": "计算资源不足-机器每个核平均负载大于10",
|
||||
"note": "需要扩容或者升级配置了",
|
||||
"severity": 2,
|
||||
"disabled": 1,
|
||||
"prom_for_duration": 60,
|
||||
@@ -314,8 +314,8 @@
|
||||
"append_tags": []
|
||||
},
|
||||
{
|
||||
"name": "Too many running processes - exceeding 3000 - node exporter",
|
||||
"note": "",
|
||||
"name": "运行进程数过多-超过3000",
|
||||
"note": "建议扩容",
|
||||
"severity": 2,
|
||||
"disabled": 1,
|
||||
"prom_for_duration": 60,
|
||||
@@ -1,6 +1,6 @@
|
||||
[
|
||||
{
|
||||
"name": "Address is unreachable by PING, please pay attention - telegraf",
|
||||
"name": "有地址PING不通,请注意",
|
||||
"note": "",
|
||||
"severity": 1,
|
||||
"disabled": 0,
|
||||
@@ -30,44 +30,15 @@
|
||||
"append_tags": []
|
||||
},
|
||||
{
|
||||
"cate": "host",
|
||||
"datasource_ids": null,
|
||||
"name": "Lost connection with monitoring target - telegraf",
|
||||
"name": "有监控对象失联",
|
||||
"note": "",
|
||||
"prod": "host",
|
||||
"algorithm": "",
|
||||
"algo_params": null,
|
||||
"delay": 0,
|
||||
"severity": 0,
|
||||
"severity": 1,
|
||||
"disabled": 0,
|
||||
"prom_for_duration": 0,
|
||||
"prom_ql": "",
|
||||
"rule_config": {
|
||||
"inhibit": false,
|
||||
"queries": [
|
||||
{
|
||||
"key": "all_hosts",
|
||||
"op": "==",
|
||||
"values": []
|
||||
}
|
||||
],
|
||||
"triggers": [
|
||||
{
|
||||
"duration": 60,
|
||||
"severity": 2,
|
||||
"type": "target_miss"
|
||||
}
|
||||
]
|
||||
},
|
||||
"prom_for_duration": 60,
|
||||
"prom_ql": "target_up != 1",
|
||||
"prom_eval_interval": 15,
|
||||
"enable_stime": "00:00",
|
||||
"enable_stimes": [
|
||||
"00:00"
|
||||
],
|
||||
"enable_etime": "23:59",
|
||||
"enable_etimes": [
|
||||
"23:59"
|
||||
],
|
||||
"enable_days_of_week": [
|
||||
"1",
|
||||
"2",
|
||||
@@ -77,30 +48,19 @@
|
||||
"6",
|
||||
"0"
|
||||
],
|
||||
"enable_days_of_weeks": [
|
||||
[
|
||||
"1",
|
||||
"2",
|
||||
"3",
|
||||
"4",
|
||||
"5",
|
||||
"6",
|
||||
"0"
|
||||
]
|
||||
],
|
||||
"enable_in_bg": 0,
|
||||
"notify_recovered": 1,
|
||||
"notify_channels": [],
|
||||
"notify_channels": [
|
||||
"email",
|
||||
"dingtalk",
|
||||
"wecom"
|
||||
],
|
||||
"notify_repeat_step": 60,
|
||||
"notify_max_number": 0,
|
||||
"recover_duration": 0,
|
||||
"callbacks": [],
|
||||
"runbook_url": "",
|
||||
"append_tags": [],
|
||||
"annotations": {}
|
||||
"append_tags": []
|
||||
},
|
||||
{
|
||||
"name": "Port detection failed, please pay attention - telegraf",
|
||||
"name": "有端口探测失败,请注意",
|
||||
"note": "",
|
||||
"severity": 1,
|
||||
"disabled": 0,
|
||||
@@ -130,7 +90,7 @@
|
||||
"append_tags": []
|
||||
},
|
||||
{
|
||||
"name": "Machine load - high CPU, please pay attention - telegraf",
|
||||
"name": "机器负载-CPU较高,请关注",
|
||||
"note": "",
|
||||
"severity": 3,
|
||||
"disabled": 0,
|
||||
@@ -160,7 +120,7 @@
|
||||
"append_tags": []
|
||||
},
|
||||
{
|
||||
"name": "Machine load - high memory, please pay attention - telegraf",
|
||||
"name": "机器负载-内存较高,请关注",
|
||||
"note": "",
|
||||
"severity": 2,
|
||||
"disabled": 0,
|
||||
@@ -190,7 +150,7 @@
|
||||
"append_tags": []
|
||||
},
|
||||
{
|
||||
"name": "Hard disk - IO is very busy - telegraf",
|
||||
"name": "硬盘-IO非常繁忙",
|
||||
"note": "",
|
||||
"severity": 2,
|
||||
"disabled": 0,
|
||||
@@ -220,7 +180,7 @@
|
||||
"append_tags": []
|
||||
},
|
||||
{
|
||||
"name": "Hard disk - expected to be written full in 4 hours - telegraf",
|
||||
"name": "硬盘-预计再有4小时写满",
|
||||
"note": "",
|
||||
"severity": 1,
|
||||
"disabled": 0,
|
||||
@@ -250,7 +210,7 @@
|
||||
"append_tags": []
|
||||
},
|
||||
{
|
||||
"name": "packet loss in the inbound direction - telegraf",
|
||||
"name": "网卡-入向有丢包",
|
||||
"note": "",
|
||||
"severity": 3,
|
||||
"disabled": 0,
|
||||
@@ -280,7 +240,7 @@
|
||||
"append_tags": []
|
||||
},
|
||||
{
|
||||
"name": "packet loss in the outbound direction - telegraf",
|
||||
"name": "网卡-出向有丢包",
|
||||
"note": "",
|
||||
"severity": 3,
|
||||
"disabled": 0,
|
||||
@@ -310,7 +270,7 @@
|
||||
"append_tags": []
|
||||
},
|
||||
{
|
||||
"name": "Network connection - number of TME_WAIT exceeds 20,000 - telegraf",
|
||||
"name": "网络连接-TME_WAIT数量超过2万",
|
||||
"note": "",
|
||||
"severity": 2,
|
||||
"disabled": 0,
|
||||
@@ -340,7 +300,7 @@
|
||||
"append_tags": []
|
||||
},
|
||||
{
|
||||
"name": "there are processes with 0 count, a certain process may have crashed - telegraf",
|
||||
"name": "进程监控-有进程数为0,某进程可能挂了",
|
||||
"note": "",
|
||||
"severity": 1,
|
||||
"disabled": 0,
|
||||
@@ -370,7 +330,7 @@
|
||||
"append_tags": []
|
||||
},
|
||||
{
|
||||
"name": "process handle limit is too small - telegraf",
|
||||
"name": "进程监控-进程句柄限制过小",
|
||||
"note": "",
|
||||
"severity": 3,
|
||||
"disabled": 0,
|
||||
@@ -400,7 +360,7 @@
|
||||
"append_tags": []
|
||||
},
|
||||
{
|
||||
"name": "Process monitoring - lookup failure - telegraf",
|
||||
"name": "进程监控-采集失败",
|
||||
"note": "",
|
||||
"severity": 1,
|
||||
"disabled": 0,
|
||||
@@ -1,6 +1,6 @@
|
||||
[
|
||||
{
|
||||
"name": "Mongo has encountered an Assert error - exporter",
|
||||
"name": "Mongo出现Assert错误",
|
||||
"note": "",
|
||||
"severity": 3,
|
||||
"disabled": 0,
|
||||
@@ -30,7 +30,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Mongo has encountered cursor timeout - exporter",
|
||||
"name": "Mongo出现游标超时",
|
||||
"note": "",
|
||||
"severity": 2,
|
||||
"disabled": 0,
|
||||
@@ -60,7 +60,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Mongo has encountered page fault interrupt - exporter",
|
||||
"name": "Mongo出现页错误中断",
|
||||
"note": "",
|
||||
"severity": 2,
|
||||
"disabled": 0,
|
||||
@@ -90,7 +90,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Mongo has just restarted, please pay attention - exporter",
|
||||
"name": "Mongo刚刚有重启,请注意",
|
||||
"note": "",
|
||||
"severity": 3,
|
||||
"disabled": 0,
|
||||
@@ -120,7 +120,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Mongo replica set master-slave delay exceeds 30 seconds - exporter",
|
||||
"name": "Mongo副本集主从延迟超过30s",
|
||||
"note": "",
|
||||
"severity": 1,
|
||||
"disabled": 0,
|
||||
@@ -150,7 +150,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Mongo instance has crashed - exporter",
|
||||
"name": "Mongo实例挂了",
|
||||
"note": "",
|
||||
"severity": 1,
|
||||
"disabled": 0,
|
||||
@@ -180,7 +180,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Average MongoDB operation time exceeds 250 seconds - exporter",
|
||||
"name": "Mongo操作平均耗时超过250秒",
|
||||
"note": "",
|
||||
"severity": 2,
|
||||
"disabled": 0,
|
||||
@@ -210,7 +210,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Mongo connection number has exceeded 80% - exporter",
|
||||
"name": "Mongo连接数已超过80%",
|
||||
"note": "",
|
||||
"severity": 2,
|
||||
"disabled": 0,
|
||||
@@ -1,6 +1,6 @@
|
||||
[
|
||||
{
|
||||
"name": "MysqlInnodbLogWaits - categraf",
|
||||
"name": "MysqlInnodbLogWaits",
|
||||
"note": "MySQL innodb log writes stalling",
|
||||
"severity": 2,
|
||||
"disabled": 0,
|
||||
@@ -30,7 +30,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "MysqlSlaveIoThreadNotRunning - categraf",
|
||||
"name": "MysqlSlaveIoThreadNotRunning",
|
||||
"note": "MySQL Slave IO thread not running",
|
||||
"severity": 1,
|
||||
"disabled": 0,
|
||||
@@ -60,7 +60,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "MysqlSlaveReplicationLag - categraf",
|
||||
"name": "MysqlSlaveReplicationLag",
|
||||
"note": "",
|
||||
"severity": 1,
|
||||
"disabled": 0,
|
||||
@@ -90,7 +90,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "MysqlSlaveSqlThreadNotRunning - categraf",
|
||||
"name": "MysqlSlaveSqlThreadNotRunning",
|
||||
"note": "MySQL Slave SQL thread not running",
|
||||
"severity": 1,
|
||||
"disabled": 0,
|
||||
@@ -120,7 +120,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Mysql has just restarted. Please be advised - categraf",
|
||||
"name": "Mysql刚刚有重启,请注意",
|
||||
"note": "MySQL has just been restarted, less than one minute ago",
|
||||
"severity": 3,
|
||||
"disabled": 0,
|
||||
@@ -150,7 +150,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Mysql instance has crashed - categraf",
|
||||
"name": "Mysql实例挂了",
|
||||
"note": "",
|
||||
"severity": 1,
|
||||
"disabled": 0,
|
||||
@@ -180,7 +180,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Mysql has opened a large number of file handles. Please be aware - categraf",
|
||||
"name": "Mysql打开了很多文件句柄,请注意",
|
||||
"note": "More than 80% of MySQL files open",
|
||||
"severity": 2,
|
||||
"disabled": 0,
|
||||
@@ -210,7 +210,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "A slow query has occurred in Mysql within the last minute - categraf",
|
||||
"name": "Mysql最近一分钟有慢查询出现",
|
||||
"note": "MySQL server mysql has some new slow query",
|
||||
"severity": 2,
|
||||
"disabled": 0,
|
||||
@@ -240,7 +240,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "More than 60% of the connections in Mysql are in a running state - categraf",
|
||||
"name": "Mysql有超过60%的连接是running状态",
|
||||
"note": "",
|
||||
"severity": 2,
|
||||
"disabled": 0,
|
||||
@@ -270,7 +270,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "The number of connections in Mysql has exceeded 80% - categraf",
|
||||
"name": "Mysql连接数已超过80%",
|
||||
"note": "More than 80% of MySQL connections are in use",
|
||||
"severity": 2,
|
||||
"disabled": 0,
|
||||
@@ -1,6 +1,6 @@
|
||||
[
|
||||
{
|
||||
"name": "MysqlInnodbLogWaits - exporter",
|
||||
"name": "MysqlInnodbLogWaits",
|
||||
"note": "MySQL innodb log writes stalling",
|
||||
"severity": 2,
|
||||
"disabled": 0,
|
||||
@@ -30,7 +30,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "MysqlSlaveIoThreadNotRunning - exporter",
|
||||
"name": "MysqlSlaveIoThreadNotRunning",
|
||||
"note": "MySQL Slave IO thread not running",
|
||||
"severity": 1,
|
||||
"disabled": 0,
|
||||
@@ -60,7 +60,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "MysqlSlaveReplicationLag - exporter",
|
||||
"name": "MysqlSlaveReplicationLag",
|
||||
"note": "",
|
||||
"severity": 1,
|
||||
"disabled": 0,
|
||||
@@ -90,7 +90,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "MysqlSlaveSqlThreadNotRunning - exporter",
|
||||
"name": "MysqlSlaveSqlThreadNotRunning",
|
||||
"note": "MySQL Slave SQL thread not running",
|
||||
"severity": 1,
|
||||
"disabled": 0,
|
||||
@@ -120,7 +120,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Mysql has just restarted. Please be advised - exporter",
|
||||
"name": "Mysql刚刚有重启,请注意",
|
||||
"note": "MySQL has just been restarted, less than one minute ago",
|
||||
"severity": 3,
|
||||
"disabled": 0,
|
||||
@@ -150,7 +150,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "The MySQL instance is down",
|
||||
"name": "Mysql实例挂了",
|
||||
"note": "",
|
||||
"severity": 1,
|
||||
"disabled": 0,
|
||||
@@ -180,7 +180,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "More than 80% of MySQL files open",
|
||||
"name": "Mysql打开了很多文件句柄,请注意",
|
||||
"note": "More than 80% of MySQL files open",
|
||||
"severity": 2,
|
||||
"disabled": 0,
|
||||
@@ -210,8 +210,8 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "MySQL server mysql has some new slow query",
|
||||
"note": "",
|
||||
"name": "Mysql最近一分钟有慢查询出现",
|
||||
"note": "MySQL server mysql has some new slow query",
|
||||
"severity": 2,
|
||||
"disabled": 0,
|
||||
"prom_for_duration": 120,
|
||||
@@ -240,7 +240,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "More than 60% of the connections in MySQL are in a running state",
|
||||
"name": "Mysql有超过60%的连接是running状态",
|
||||
"note": "",
|
||||
"severity": 2,
|
||||
"disabled": 0,
|
||||
@@ -270,7 +270,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "MySQL connection count has exceeded 80%",
|
||||
"name": "Mysql连接数已超过80%",
|
||||
"note": "More than 80% of MySQL connections are in use",
|
||||
"severity": 2,
|
||||
"disabled": 0,
|
||||
@@ -1,6 +1,6 @@
|
||||
[
|
||||
{
|
||||
"name": "Network address probe failed",
|
||||
"name": "网络地址探活失败",
|
||||
"note": "",
|
||||
"severity": 2,
|
||||
"disabled": 0,
|
||||
@@ -1,6 +1,6 @@
|
||||
[
|
||||
{
|
||||
"name": "NTP time offset is too large",
|
||||
"name": "NTP时间偏移太大",
|
||||
"note": "",
|
||||
"severity": 2,
|
||||
"disabled": 0,
|
||||
@@ -1,6 +1,6 @@
|
||||
[
|
||||
{
|
||||
"name": "PING address detection failed",
|
||||
"name": "PING地址探测失败",
|
||||
"note": "",
|
||||
"severity": 2,
|
||||
"disabled": 0,
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user