mirror of
https://github.com/ccfos/nightingale.git
synced 2026-03-03 06:29:16 +00:00
Compare commits
313 Commits
v6.7.0
...
webhook-ba
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b057940134 | ||
|
|
4500c4aba8 | ||
|
|
726e994d58 | ||
|
|
71c4c24f00 | ||
|
|
d14a834149 | ||
|
|
5b2513b7a1 | ||
|
|
7cec16eaf0 | ||
|
|
5f895552a9 | ||
|
|
1e3f62b92f | ||
|
|
17dbb3ec77 | ||
|
|
00822c8404 | ||
|
|
55de30d6c7 | ||
|
|
8b7dbed27e | ||
|
|
71b8fa27d0 | ||
|
|
31174d719e | ||
|
|
5b5bb22ffd | ||
|
|
e98fe9ea2e | ||
|
|
32e9ded393 | ||
|
|
8293ca20be | ||
|
|
6c4ddfc349 | ||
|
|
cd0c478515 | ||
|
|
2cd25ac0e5 | ||
|
|
bb99ba3d1c | ||
|
|
64405dca5d | ||
|
|
69ea9ca8f8 | ||
|
|
41d0f2fcda | ||
|
|
93df1c0fbc | ||
|
|
86e952788d | ||
|
|
e890f2616f | ||
|
|
6c2ee584e5 | ||
|
|
5f07fc3010 | ||
|
|
20fa310ba9 | ||
|
|
0e3b08be9a | ||
|
|
b7d971d7c8 | ||
|
|
4373ae7f0b | ||
|
|
053325a691 | ||
|
|
c54267aa3a | ||
|
|
74dc430886 | ||
|
|
dc79ee4687 | ||
|
|
e154c946e6 | ||
|
|
08bfc0b388 | ||
|
|
5338270aef | ||
|
|
00550ba2c7 | ||
|
|
c58bec23bf | ||
|
|
a5b77be0ab | ||
|
|
f529681c35 | ||
|
|
e3042dd6d5 | ||
|
|
1ebab4fcb0 | ||
|
|
ccf38b6da7 | ||
|
|
9a0a687727 | ||
|
|
d00510978d | ||
|
|
9b478d98fd | ||
|
|
4845ca5bdb | ||
|
|
a844d2b091 | ||
|
|
69ca7f3b93 | ||
|
|
b9c6c33ceb | ||
|
|
5099d3c040 | ||
|
|
e34f8ac701 | ||
|
|
ab82a6f910 | ||
|
|
57f8bd3612 | ||
|
|
8ab96e2cea | ||
|
|
0a2e23c285 | ||
|
|
5c1d4077e2 | ||
|
|
2a46d9f98e | ||
|
|
ce5c213593 | ||
|
|
771a8d121b | ||
|
|
af88b0e283 | ||
|
|
8e5d7f2a5b | ||
|
|
1a22211a5d | ||
|
|
0a0049c6fb | ||
|
|
1b56ebe62e | ||
|
|
a5e92b95b0 | ||
|
|
8e9d06d43e | ||
|
|
ab289de785 | ||
|
|
8667b7743a | ||
|
|
45b9436f69 | ||
|
|
3d03bcf329 | ||
|
|
1851601889 | ||
|
|
fa9745decf | ||
|
|
6f007deeaa | ||
|
|
8fad705065 | ||
|
|
675076779e | ||
|
|
b9e78eee22 | ||
|
|
2219584abb | ||
|
|
ebe31fd6bc | ||
|
|
95ca69e170 | ||
|
|
ef1b5d8d16 | ||
|
|
5b375cf037 | ||
|
|
108b729cae | ||
|
|
a385972fa9 | ||
|
|
98a0a9d94c | ||
|
|
c79eec648d | ||
|
|
603eadd1f2 | ||
|
|
61a2f552be | ||
|
|
e3453328a7 | ||
|
|
4424a6b89c | ||
|
|
9fdb2f0753 | ||
|
|
3d358e367f | ||
|
|
5264874628 | ||
|
|
e0a3ff248c | ||
|
|
1fecf78ede | ||
|
|
839b45904b | ||
|
|
cd0f43f808 | ||
|
|
8047f3deee | ||
|
|
f209ed5bee | ||
|
|
8c61d8c14d | ||
|
|
f7372b1c3b | ||
|
|
a39ced86aa | ||
|
|
f365b7db2a | ||
|
|
7eaec13b6c | ||
|
|
2e824a165e | ||
|
|
f2909b6029 | ||
|
|
a543a5ad09 | ||
|
|
2ee34bf1f9 | ||
|
|
4623622dd0 | ||
|
|
4f259137e5 | ||
|
|
75f1e8a80b | ||
|
|
3648d8dc45 | ||
|
|
8c90d7ab33 | ||
|
|
c6ac3fb959 | ||
|
|
ce854b3166 | ||
|
|
a2be5230fa | ||
|
|
21276a77b6 | ||
|
|
cffd012ec6 | ||
|
|
a9ebdad1cd | ||
|
|
785c577728 | ||
|
|
0e2a66570e | ||
|
|
76583a6227 | ||
|
|
48e0e1a9f8 | ||
|
|
17bb7fa468 | ||
|
|
fc2638680a | ||
|
|
e01a899ae1 | ||
|
|
07c1ef6bd4 | ||
|
|
bfa7059098 | ||
|
|
096a2d3675 | ||
|
|
2232733922 | ||
|
|
b15f638688 | ||
|
|
4f818e3642 | ||
|
|
638c62da2f | ||
|
|
e1a9c995c2 | ||
|
|
1898675075 | ||
|
|
ce7f0272d8 | ||
|
|
93159f07fd | ||
|
|
7d410baa2d | ||
|
|
20b30c3e2c | ||
|
|
8805bf6598 | ||
|
|
fe6a64dae8 | ||
|
|
2c564a2c58 | ||
|
|
ae3c13224d | ||
|
|
9a4015f13f | ||
|
|
274ca09551 | ||
|
|
3d9b4fc14e | ||
|
|
07436a5e0d | ||
|
|
f7b2f1acb9 | ||
|
|
4f4287030a | ||
|
|
e25e712c48 | ||
|
|
66951d7e77 | ||
|
|
f5ff27cd18 | ||
|
|
9e3f6e6285 | ||
|
|
48e3df2cb4 | ||
|
|
ac5d69dba4 | ||
|
|
597351c424 | ||
|
|
1f6b2e341a | ||
|
|
035752ace2 | ||
|
|
60a1437207 | ||
|
|
e31414bc8c | ||
|
|
785a294845 | ||
|
|
98933eee34 | ||
|
|
20905810d7 | ||
|
|
c1bde83639 | ||
|
|
782a0e9616 | ||
|
|
6a3720bc8b | ||
|
|
de252359d6 | ||
|
|
deb313ca3d | ||
|
|
d119de56be | ||
|
|
f05417fa23 | ||
|
|
9ab2eb591f | ||
|
|
3f476d770f | ||
|
|
ced6759686 | ||
|
|
eba3014c59 | ||
|
|
3aeb4e16e9 | ||
|
|
3b62722251 | ||
|
|
fb1cc4868e | ||
|
|
4a0dcf0dbf | ||
|
|
4f913f146e | ||
|
|
533560f432 | ||
|
|
cf7b479a1b | ||
|
|
2e4c29a0de | ||
|
|
6f0ceb94c6 | ||
|
|
800d7ba04b | ||
|
|
fb6a6d2b93 | ||
|
|
cf2b19ae90 | ||
|
|
fb1cc93613 | ||
|
|
c2bba796c2 | ||
|
|
a02bf83842 | ||
|
|
cd9f129e2d | ||
|
|
e85c80bdcf | ||
|
|
7e83e0c482 | ||
|
|
92ac3125f3 | ||
|
|
a61feca369 | ||
|
|
8b0b811919 | ||
|
|
8742526c7f | ||
|
|
ee757cfd92 | ||
|
|
b12cfea379 | ||
|
|
45365e3e03 | ||
|
|
1b676eefd2 | ||
|
|
0092dc44fd | ||
|
|
4941b376f3 | ||
|
|
e46813cd17 | ||
|
|
58ebd224c2 | ||
|
|
95ece6e16f | ||
|
|
b82cbd06fa | ||
|
|
16210892da | ||
|
|
a452d63a56 | ||
|
|
51c7abedd3 | ||
|
|
6d0a2420a8 | ||
|
|
9cf687b73d | ||
|
|
49c9e41df5 | ||
|
|
2ec2e64213 | ||
|
|
867a61c8dc | ||
|
|
12263d1453 | ||
|
|
c0cacb2e64 | ||
|
|
0637b343b1 | ||
|
|
2473e144ef | ||
|
|
00a37d6de7 | ||
|
|
50c664e6bf | ||
|
|
22b7d20455 | ||
|
|
141262e5a5 | ||
|
|
4717abfa77 | ||
|
|
1bf1a01c32 | ||
|
|
05b714de38 | ||
|
|
11377d4e5f | ||
|
|
46ea46fdfe | ||
|
|
d4f0483238 | ||
|
|
a79610f5ea | ||
|
|
d9fb71b9a0 | ||
|
|
37057fa0cf | ||
|
|
b234128a45 | ||
|
|
67a2d57966 | ||
|
|
3a1516877e | ||
|
|
53f31d175f | ||
|
|
25323e9ce2 | ||
|
|
3136596add | ||
|
|
e7200b0b23 | ||
|
|
dfb19c1dde | ||
|
|
2363b35263 | ||
|
|
99367aaf88 | ||
|
|
ad17ef328f | ||
|
|
5f149f6a38 | ||
|
|
73ed57301b | ||
|
|
138b929db4 | ||
|
|
4585e94cd1 | ||
|
|
69ad6344f5 | ||
|
|
a55665bd14 | ||
|
|
b5e2053b0c | ||
|
|
94265eab9f | ||
|
|
eb79d473b0 | ||
|
|
c4e0a9962f | ||
|
|
ee613616ca | ||
|
|
6bbf00c371 | ||
|
|
f9f45d315d | ||
|
|
84f215b7f1 | ||
|
|
016220bb2a | ||
|
|
ba1eb73ace | ||
|
|
b304091fb3 | ||
|
|
840eaea667 | ||
|
|
956cc9fd68 | ||
|
|
e78e212f83 | ||
|
|
cdc2d4c039 | ||
|
|
cd4b0c4f94 | ||
|
|
53ada6cc40 | ||
|
|
2e6cb0f21d | ||
|
|
4287591a6b | ||
|
|
2fe0c21e36 | ||
|
|
bfa043aeba | ||
|
|
f4336ca5e9 | ||
|
|
8125cb7090 | ||
|
|
0ae1e7fbc4 | ||
|
|
88f8111a56 | ||
|
|
dbfaa519ba | ||
|
|
402e803146 | ||
|
|
5eae14a3c9 | ||
|
|
e0bfc45f5a | ||
|
|
7d8fb7aab7 | ||
|
|
846ef00aed | ||
|
|
f2f730e88c | ||
|
|
311a9405e4 | ||
|
|
6c53981883 | ||
|
|
f23f960368 | ||
|
|
f593c6d310 | ||
|
|
3fb5ea96bc | ||
|
|
30c697a3df | ||
|
|
1d50d05329 | ||
|
|
840221d9ec | ||
|
|
e52a76921f | ||
|
|
80fdb37129 | ||
|
|
bbef4aa8d9 | ||
|
|
35eba3b1e1 | ||
|
|
28a1230d26 | ||
|
|
86dd6a9608 | ||
|
|
f7a40b7324 | ||
|
|
e2e8eb837d | ||
|
|
020f7ae07e | ||
|
|
8311667930 | ||
|
|
741ab94150 | ||
|
|
5d6ca183be | ||
|
|
0f937ad6d0 | ||
|
|
ab38f220f7 | ||
|
|
a8c0b3bfd5 | ||
|
|
5d1629bf0b | ||
|
|
da7fa40c70 | ||
|
|
f1f0ee193f | ||
|
|
deccccead0 |
67
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
67
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
@@ -1,67 +0,0 @@
|
||||
name: Bug Report
|
||||
description: Report a bug encountered while running Nightingale
|
||||
labels: ["kind/bug"]
|
||||
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Thanks for taking time to fill out this bug report!
|
||||
The more detailed the form is filled in, the easier the problem will be solved.
|
||||
- type: textarea
|
||||
id: config
|
||||
attributes:
|
||||
label: Relevant server.conf | webapi.conf
|
||||
description: Place config in the toml code section. This will be automatically formatted into toml, so no need for backticks.
|
||||
render: toml
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: logs
|
||||
attributes:
|
||||
label: Relevant logs
|
||||
description: categraf | telegraf | server | webapi | prometheus | chrome request/response ...
|
||||
render: text
|
||||
validations:
|
||||
required: true
|
||||
- type: input
|
||||
id: system-info
|
||||
attributes:
|
||||
label: System info
|
||||
description: Include nightingale version, operating system, and other relevant details
|
||||
placeholder: ex. n9e 5.9.2, n9e-fe 5.5.0, categraf 0.1.0, Ubuntu 20.04, Docker 20.10.8
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: reproduce
|
||||
attributes:
|
||||
label: Steps to reproduce
|
||||
description: Describe the steps to reproduce the bug.
|
||||
value: |
|
||||
1.
|
||||
2.
|
||||
3.
|
||||
...
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: expected-behavior
|
||||
attributes:
|
||||
label: Expected behavior
|
||||
description: Describe what you expected to happen when you performed the above steps.
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: actual-behavior
|
||||
attributes:
|
||||
label: Actual behavior
|
||||
description: Describe what actually happened when you performed the above steps.
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: additional-info
|
||||
attributes:
|
||||
label: Additional info
|
||||
description: Include gist of relevant config, logs, etc.
|
||||
validations:
|
||||
required: false
|
||||
33
.github/ISSUE_TEMPLATE/question.yml
vendored
Normal file
33
.github/ISSUE_TEMPLATE/question.yml
vendored
Normal file
@@ -0,0 +1,33 @@
|
||||
name: Bug Report & Usage Question
|
||||
description: Reporting a bug or asking a question about how to use Nightingale
|
||||
labels: []
|
||||
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
The more detailed the form is filled in, the easier the problem will be solved.
|
||||
提供的信息越详细,问题解决的可能性就越大。另外, 提问之前请先搜索历史 issue (包括 close 的), 以免重复提问。
|
||||
- type: textarea
|
||||
id: question
|
||||
attributes:
|
||||
label: Question and Steps to reproduce
|
||||
description: Describe your question and steps to reproduce the bug. 描述问题以及复现步骤
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: logs
|
||||
attributes:
|
||||
label: Relevant logs and configurations
|
||||
description: Relevant logs and configurations. 报错日志([查看方法](https://flashcat.cloud/docs/content/flashcat-monitor/nightingale-v6/faq/how-to-check-logs/))以及各个相关组件的配置信息
|
||||
render: text
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: system-info
|
||||
attributes:
|
||||
label: Version
|
||||
description: Include nightingale version, operating system, and other relevant details. 请告知夜莺的版本、操作系统的版本、CPU架构等信息
|
||||
validations:
|
||||
required: true
|
||||
|
||||
3
.github/workflows/n9e.yml
vendored
3
.github/workflows/n9e.yml
vendored
@@ -26,7 +26,8 @@ jobs:
|
||||
- name: Run GoReleaser
|
||||
uses: goreleaser/goreleaser-action@v3
|
||||
with:
|
||||
version: latest
|
||||
distribution: goreleaser
|
||||
version: '~> v1'
|
||||
args: release --rm-dist
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -46,6 +46,8 @@ _test
|
||||
/docker/n9e
|
||||
/docker/compose-bridge/mysqldata
|
||||
/docker/compose-host-network/mysqldata
|
||||
/docker/compose-host-network-metric-log/mysqldata
|
||||
/docker/compose-host-network-metric-log/n9e-logs
|
||||
/docker/compose-postgres/pgdata
|
||||
/etc.local*
|
||||
/front/statik/statik.go
|
||||
|
||||
120
README.md
120
README.md
@@ -1,104 +1,100 @@
|
||||
<p align="center">
|
||||
<a href="https://github.com/ccfos/nightingale">
|
||||
<img src="doc/img/Nightingale_L_V.png" alt="nightingale - cloud native monitoring" width="240" /></a>
|
||||
<img src="doc/img/Nightingale_L_V.png" alt="nightingale - cloud native monitoring" width="100" /></a>
|
||||
</p>
|
||||
<p align="center">
|
||||
<b>开源告警管理专家 一体化的可观测平台</b>
|
||||
</p>
|
||||
|
||||
<p align="center">
|
||||
<img alt="GitHub latest release" src="https://img.shields.io/github/v/release/ccfos/nightingale"/>
|
||||
<a href="https://n9e.github.io">
|
||||
<a href="https://flashcat.cloud/docs/">
|
||||
<img alt="Docs" src="https://img.shields.io/badge/docs-get%20started-brightgreen"/></a>
|
||||
<a href="https://hub.docker.com/u/flashcatcloud">
|
||||
<img alt="Docker pulls" src="https://img.shields.io/docker/pulls/flashcatcloud/nightingale"/></a>
|
||||
<img alt="GitHub Repo stars" src="https://img.shields.io/github/stars/ccfos/nightingale">
|
||||
<img alt="GitHub Repo issues" src="https://img.shields.io/github/issues/ccfos/nightingale">
|
||||
<img alt="GitHub Repo issues closed" src="https://img.shields.io/github/issues-closed/ccfos/nightingale">
|
||||
<img alt="GitHub forks" src="https://img.shields.io/github/forks/ccfos/nightingale">
|
||||
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
|
||||
<img alt="GitHub contributors" src="https://img.shields.io/github/contributors-anon/ccfos/nightingale"/></a>
|
||||
<img alt="GitHub Repo stars" src="https://img.shields.io/github/stars/ccfos/nightingale">
|
||||
<img alt="GitHub forks" src="https://img.shields.io/github/forks/ccfos/nightingale">
|
||||
<br/><img alt="GitHub Repo issues" src="https://img.shields.io/github/issues/ccfos/nightingale">
|
||||
<img alt="GitHub Repo issues closed" src="https://img.shields.io/github/issues-closed/ccfos/nightingale">
|
||||
<img alt="GitHub latest release" src="https://img.shields.io/github/v/release/ccfos/nightingale"/>
|
||||
<img alt="License" src="https://img.shields.io/badge/license-Apache--2.0-blue"/>
|
||||
<a href="https://n9e-talk.slack.com/">
|
||||
<img alt="GitHub contributors" src="https://img.shields.io/badge/join%20slack-%23n9e-brightgreen.svg"/></a>
|
||||
<img alt="License" src="https://img.shields.io/badge/license-Apache--2.0-blue"/>
|
||||
</p>
|
||||
<p align="center">
|
||||
An open-source cloud-native monitoring system that is <b>all-in-one</b> <br/>
|
||||
<b>Out-of-the-box</b>, it integrates data collection, visualization, and monitoring alert <br/>
|
||||
We recommend upgrading your <b>Prometheus + AlertManager + Grafana</b> combination to Nightingale!
|
||||
</p>
|
||||
|
||||
[English](./README.md) | [中文](./README_zh.md)
|
||||
|
||||
|
||||
## Highlighted Features
|
||||
[English](./README_en.md) | [中文](./README.md)
|
||||
|
||||
- **Out-of-the-box**
|
||||
- Supports multiple deployment methods such as **Docker, Helm Chart, and cloud services**, integrates data collection, monitoring, and alerting into one system, and comes with various monitoring dashboards, quick views, and alert rule templates. **It greatly reduces the construction cost, learning cost, and usage cost of cloud-native monitoring systems**.
|
||||
- **Professional Alerting**
|
||||
- Provides visual alert configuration and management, supports various alert rules, offers the ability to configure silence and subscription rules, supports multiple alert delivery channels, and has features such as alert self-healing and event management.
|
||||
- **Cloud-Native**
|
||||
- Quickly builds an enterprise-level cloud-native monitoring system through a turnkey approach, supports multiple collectors such as [Categraf](https://github.com/flashcatcloud/categraf), Telegraf, and Grafana-agent, supports multiple data sources such as Prometheus, VictoriaMetrics, M3DB, ElasticSearch, and Jaeger, and is compatible with importing Grafana dashboards. **It seamlessly integrates with the cloud-native ecosystem**.
|
||||
- **High Performance and High Availability**
|
||||
- Due to the multi-data-source management engine of Nightingale and its excellent architecture design, and utilizing a high-performance time-series database, it can handle data collection, storage, and alert analysis scenarios with billions of time-series data, saving a lot of costs.
|
||||
- Nightingale components can be horizontally scaled with no single point of failure. It has been deployed in thousands of enterprises and tested in harsh production practices. Many leading Internet companies have used Nightingale for cluster machines with hundreds of nodes, processing billions of time-series data.
|
||||
- **Flexible Extension and Centralized Management**
|
||||
- Nightingale can be deployed on a 1-core 1G cloud host, deployed in a cluster of hundreds of machines, or run in Kubernetes. Time-series databases, alert engines, and other components can also be decentralized to various data centers and regions, balancing edge deployment with centralized management. **It solves the problem of data fragmentation and lack of unified views**.
|
||||
## 夜莺 Nightingale 是什么
|
||||
|
||||
夜莺监控是一款开源云原生观测分析工具,采用 All-in-One 的设计理念,集数据采集、可视化、监控告警、数据分析于一体,与云原生生态紧密集成,提供开箱即用的企业级监控分析和告警能力。夜莺于 2020 年 3 月 20 日,在 github 上发布 v1 版本,已累计迭代 100 多个版本。
|
||||
|
||||
夜莺最初由滴滴开发和开源,并于 2022 年 5 月 11 日,捐赠予中国计算机学会开源发展委员会(CCF ODC),为 CCF ODC 成立后接受捐赠的第一个开源项目。夜莺的核心研发团队,也是 Open-Falcon 项目原核心研发人员,从 2014 年(Open-Falcon 是 2014 年开源)算起来,也有 10 年了,只为把监控这个事情做好。
|
||||
|
||||
|
||||
#### If you are using Prometheus and have one or more of the following requirement scenarios, it is recommended that you upgrade to Nightingale:
|
||||
## 快速开始
|
||||
- 👉[文档中心](https://flashcat.cloud/docs/) | [下载中心](https://flashcat.cloud/download/nightingale/)
|
||||
- ❤️[报告 Bug](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=&projects=&template=question.yml)
|
||||
- ℹ️为了提供更快速的访问体验,上述文档和下载站点托管于 [FlashcatCloud](https://flashcat.cloud)
|
||||
|
||||
- Multiple systems such as Prometheus, Alertmanager, Grafana, etc. are fragmented and lack a unified view and cannot be used out of the box;
|
||||
- The way to manage Prometheus and Alertmanager by modifying configuration files has a big learning curve and is difficult to collaborate;
|
||||
- Too much data to scale-up your Prometheus cluster;
|
||||
- Multiple Prometheus clusters running in production environments, which faced high management and usage costs;
|
||||
## 功能特点
|
||||
|
||||
#### If you are using Zabbix and have the following scenarios, it is recommended that you upgrade to Nightingale:
|
||||
|
||||
- Monitoring too much data and wanting a better scalable solution;
|
||||
- A high learning curve and a desire for better efficiency of collaborative use in a multi-person, multi-team model;
|
||||
- Microservice and cloud-native architectures with variable monitoring data lifecycles and high monitoring data dimension bases, which are not easily adaptable to the Zabbix data model;
|
||||
- 对接多种时序库:支持对接 Prometheus、VictoriaMetrics、Thanos、Mimir、M3DB、TDengine 等多种时序库,实现统一告警管理。
|
||||
- 专业告警能力:内置支持多种告警规则,可以扩展支持常见通知媒介,支持告警屏蔽/抑制/订阅/自愈、告警事件管理。
|
||||
- 高性能可视化引擎:支持多种图表样式,内置众多 Dashboard 模版,也可导入 Grafana 模版,开箱即用,开源协议商业友好。
|
||||
- 支持常见采集器:支持 [Categraf](https://flashcat.cloud/product/categraf)、Telegraf、Grafana-agent、Datadog-agent、各种 Exporter 作为采集器,没有什么数据是不能监控的。
|
||||
- 👀无缝搭配 [Flashduty](https://flashcat.cloud/product/flashcat-duty/):实现告警聚合收敛、认领、升级、排班、IM集成,确保告警处理不遗漏,减少打扰,高效协同。
|
||||
|
||||
|
||||
#### If you are using [open-falcon](https://github.com/open-falcon/falcon-plus), we recommend you to upgrade to Nightingale:
|
||||
- For more information about open-falcon and Nightingale, please refer to read [Ten features and trends of cloud-native monitoring](https://mp.weixin.qq.com/s?__biz=MzkzNjI5OTM5Nw==&mid=2247483738&idx=1&sn=e8bdbb974a2cd003c1abcc2b5405dd18&chksm=c2a19fb0f5d616a63185cd79277a79a6b80118ef2185890d0683d2bb20451bd9303c78d083c5#rd)。
|
||||
## 截图演示
|
||||
|
||||
## Getting Started
|
||||
即时查询,类似 Prometheus 内置的查询分析页面,做 ad-hoc 查询,夜莺做了一些 UI 优化,同时提供了一些内置 promql 指标,让不太了解 promql 的用户也可以快速查询。
|
||||
|
||||
[https://n9e.github.io/](https://n9e.github.io/)
|
||||

|
||||
|
||||
## Screenshots
|
||||
当然,也可以直接通过指标视图查看,有了指标视图,即时查询基本可以不用了,或者只有高端玩家使用即时查询,普通用户直接通过指标视图查询即可。
|
||||
|
||||
https://user-images.githubusercontent.com/792850/216888712-2565fcea-9df5-47bd-a49e-d60af9bd76e8.mp4
|
||||

|
||||
|
||||
## Architecture
|
||||
夜莺内置了常用仪表盘,可以直接导入使用。也可以导入 Grafana 仪表盘,不过只能兼容 Grafana 基本图表,如果已经习惯了 Grafana 建议继续使用 Grafana 看图,把夜莺作为一个告警引擎使用。
|
||||
|
||||
<img src="doc/img/arch-product.png" width="600">
|
||||

|
||||
|
||||
Nightingale monitoring can receive monitoring data reported by various collectors (such as [Categraf](https://github.com/flashcatcloud/categraf) , telegraf, grafana-agent, Prometheus, etc.) and write them to various popular time-series databases (such as Prometheus, M3DB, VictoriaMetrics, Thanos, TDEngine, etc.). It provides configuration capabilities for alert rules, silence rules, and subscription rules, as well as the ability to view monitoring data. It also provides automatic alarm self-healing mechanisms (such as automatically calling back to a webhook address or executing a script after an alarm is triggered), and the ability to store and manage historical alarm events and view them in groups.
|
||||
除了内置的仪表盘,也内置了很多告警规则,开箱即用。
|
||||
|
||||
If the performance of a standalone time-series database (such as Prometheus) has bottlenecks or poor disaster recovery, we recommend using [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics). The VictoriaMetrics architecture is relatively simple, has excellent performance, and is easy to deploy and maintain. The architecture diagram is as shown above. For more detailed documentation on VictoriaMetrics, please refer to its [official website](https://victoriametrics.com/).
|
||||
|
||||
**We welcome you to participate in the Nightingale open-source project and community in various ways, including but not limited to**:
|
||||
- Adding and improving documentation => [n9e.github.io](https://n9e.github.io/)
|
||||
- Sharing your best practices and experience in using Nightingale monitoring => [Article sharing]((https://n9e.github.io/docs/prologue/share/))
|
||||
- Submitting product suggestions => [github issue](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Ffeature&template=enhancement.md)
|
||||
- Submitting code to make Nightingale monitoring faster, more stable, and easier to use => [github pull request](https://github.com/didi/nightingale/pulls)
|
||||

|
||||
|
||||
|
||||
**Respecting, recognizing, and recording the work of every contributor** is the first guiding principle of the Nightingale open-source community. We advocate effective questioning, which not only respects the developer's time but also contributes to the accumulation of knowledge in the entire community
|
||||
- Before asking a question, please first refer to the [FAQ](https://www.gitlink.org.cn/ccfos/nightingale/wiki/faq)
|
||||
- We use [GitHub Discussions](https://github.com/ccfos/nightingale/discussions) as the communication forum. You can search and ask questions here.
|
||||
- We also recommend that you join ours [Slack channel](https://n9e-talk.slack.com/) to exchange experiences with other Nightingale users.
|
||||
|
||||
## 产品架构
|
||||
|
||||
社区使用夜莺最多的场景就是使用夜莺做告警引擎,对接多套时序库,统一告警规则管理。绘图仍然使用 Grafana 居多。作为一个告警引擎,夜莺的产品架构如下:
|
||||
|
||||

|
||||
|
||||
对于个别边缘机房,如果和中心夜莺服务端网络链路不好,希望提升告警可用性,我们也提供边缘机房告警引擎下沉部署模式,这个模式下,即便网络割裂,告警功能也不受影响。
|
||||
|
||||

|
||||
|
||||
|
||||
## Who is using Nightingale
|
||||
You can register your usage and share your experience by posting on **[Who is Using Nightingale](https://github.com/ccfos/nightingale/issues/897)**.
|
||||
## 交流渠道
|
||||
- 报告Bug,优先推荐提交[夜莺GitHub Issue](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Fbug&projects=&template=bug_report.yml)
|
||||
- 推荐完整浏览[夜莺文档站点](https://flashcat.cloud/docs/content/flashcat-monitor/nightingale-v7/introduction/),了解更多信息
|
||||
- 推荐搜索关注夜莺公众号,第一时间获取社区动态:`夜莺监控Nightingale`
|
||||
- 日常问题交流推荐加入[知识星球](https://download.flashcat.cloud/ulric/20240319095409.png),也可以加我微信 `picobyte`,备注:`夜莺加群-<公司>-<姓名>` 拉入微信群,不过研发人员主要是关注 github issue 和星球,微信群关注较少
|
||||
|
||||
## Stargazers over time
|
||||
[](https://starchart.cc/ccfos/nightingale)
|
||||
## 广受关注
|
||||
[](https://star-history.com/#ccfos/nightingale&Date)
|
||||
|
||||
## Contributors
|
||||
|
||||
## 社区共建
|
||||
- ❇️请阅读浏览[夜莺开源项目和社区治理架构草案](./doc/community-governance.md),真诚欢迎每一位用户、开发者、公司以及组织,使用夜莺监控、积极反馈 Bug、提交功能需求、分享最佳实践,共建专业、活跃的夜莺开源社区。
|
||||
- 夜莺贡献者❤️
|
||||
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
|
||||
<img src="https://contrib.rocks/image?repo=ccfos/nightingale" />
|
||||
</a>
|
||||
|
||||
## License
|
||||
[Apache License V2.0](https://github.com/didi/nightingale/blob/main/LICENSE)
|
||||
- [Apache License V2.0](https://github.com/didi/nightingale/blob/main/LICENSE)
|
||||
|
||||
104
README_en.md
Normal file
104
README_en.md
Normal file
@@ -0,0 +1,104 @@
|
||||
<p align="center">
|
||||
<a href="https://github.com/ccfos/nightingale">
|
||||
<img src="doc/img/Nightingale_L_V.png" alt="nightingale - cloud native monitoring" width="240" /></a>
|
||||
</p>
|
||||
|
||||
<p align="center">
|
||||
<img alt="GitHub latest release" src="https://img.shields.io/github/v/release/ccfos/nightingale"/>
|
||||
<a href="https://n9e.github.io">
|
||||
<img alt="Docs" src="https://img.shields.io/badge/docs-get%20started-brightgreen"/></a>
|
||||
<a href="https://hub.docker.com/u/flashcatcloud">
|
||||
<img alt="Docker pulls" src="https://img.shields.io/docker/pulls/flashcatcloud/nightingale"/></a>
|
||||
<img alt="GitHub Repo stars" src="https://img.shields.io/github/stars/ccfos/nightingale">
|
||||
<img alt="GitHub Repo issues" src="https://img.shields.io/github/issues/ccfos/nightingale">
|
||||
<img alt="GitHub Repo issues closed" src="https://img.shields.io/github/issues-closed/ccfos/nightingale">
|
||||
<img alt="GitHub forks" src="https://img.shields.io/github/forks/ccfos/nightingale">
|
||||
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
|
||||
<img alt="GitHub contributors" src="https://img.shields.io/github/contributors-anon/ccfos/nightingale"/></a>
|
||||
<a href="https://n9e-talk.slack.com/">
|
||||
<img alt="GitHub contributors" src="https://img.shields.io/badge/join%20slack-%23n9e-brightgreen.svg"/></a>
|
||||
<img alt="License" src="https://img.shields.io/badge/license-Apache--2.0-blue"/>
|
||||
</p>
|
||||
<p align="center">
|
||||
An open-source cloud-native monitoring system that is <b>all-in-one</b> <br/>
|
||||
<b>Out-of-the-box</b>, it integrates data collection, visualization, and monitoring alert <br/>
|
||||
We recommend upgrading your <b>Prometheus + AlertManager + Grafana</b> combination to Nightingale!
|
||||
</p>
|
||||
|
||||
[English](./README_en.md) | [中文](./README.md)
|
||||
|
||||
|
||||
## Highlighted Features
|
||||
|
||||
- **Out-of-the-box**
|
||||
- Supports multiple deployment methods such as **Docker, Helm Chart, and cloud services**, integrates data collection, monitoring, and alerting into one system, and comes with various monitoring dashboards, quick views, and alert rule templates. **It greatly reduces the construction cost, learning cost, and usage cost of cloud-native monitoring systems**.
|
||||
- **Professional Alerting**
|
||||
- Provides visual alert configuration and management, supports various alert rules, offers the ability to configure silence and subscription rules, supports multiple alert delivery channels, and has features such as alert self-healing and event management.
|
||||
- **Cloud-Native**
|
||||
- Quickly builds an enterprise-level cloud-native monitoring system through a turnkey approach, supports multiple collectors such as [Categraf](https://github.com/flashcatcloud/categraf), Telegraf, and Grafana-agent, supports multiple data sources such as Prometheus, VictoriaMetrics, M3DB, ElasticSearch, and Jaeger, and is compatible with importing Grafana dashboards. **It seamlessly integrates with the cloud-native ecosystem**.
|
||||
- **High Performance and High Availability**
|
||||
- Due to the multi-data-source management engine of Nightingale and its excellent architecture design, and utilizing a high-performance time-series database, it can handle data collection, storage, and alert analysis scenarios with billions of time-series data, saving a lot of costs.
|
||||
- Nightingale components can be horizontally scaled with no single point of failure. It has been deployed in thousands of enterprises and tested in harsh production practices. Many leading Internet companies have used Nightingale for cluster machines with hundreds of nodes, processing billions of time-series data.
|
||||
- **Flexible Extension and Centralized Management**
|
||||
- Nightingale can be deployed on a 1-core 1G cloud host, deployed in a cluster of hundreds of machines, or run in Kubernetes. Time-series databases, alert engines, and other components can also be decentralized to various data centers and regions, balancing edge deployment with centralized management. **It solves the problem of data fragmentation and lack of unified views**.
|
||||
|
||||
|
||||
#### If you are using Prometheus and have one or more of the following requirement scenarios, it is recommended that you upgrade to Nightingale:
|
||||
|
||||
- Multiple systems such as Prometheus, Alertmanager, Grafana, etc. are fragmented and lack a unified view and cannot be used out of the box;
|
||||
- The way to manage Prometheus and Alertmanager by modifying configuration files has a big learning curve and is difficult to collaborate;
|
||||
- Too much data to scale-up your Prometheus cluster;
|
||||
- Multiple Prometheus clusters running in production environments, which faced high management and usage costs;
|
||||
|
||||
#### If you are using Zabbix and have the following scenarios, it is recommended that you upgrade to Nightingale:
|
||||
|
||||
- Monitoring too much data and wanting a better scalable solution;
|
||||
- A high learning curve and a desire for better efficiency of collaborative use in a multi-person, multi-team model;
|
||||
- Microservice and cloud-native architectures with variable monitoring data lifecycles and high monitoring data dimension bases, which are not easily adaptable to the Zabbix data model;
|
||||
|
||||
|
||||
#### If you are using [open-falcon](https://github.com/open-falcon/falcon-plus), we recommend you to upgrade to Nightingale:
|
||||
- For more information about open-falcon and Nightingale, please refer to read [Ten features and trends of cloud-native monitoring](https://mp.weixin.qq.com/s?__biz=MzkzNjI5OTM5Nw==&mid=2247483738&idx=1&sn=e8bdbb974a2cd003c1abcc2b5405dd18&chksm=c2a19fb0f5d616a63185cd79277a79a6b80118ef2185890d0683d2bb20451bd9303c78d083c5#rd)。
|
||||
|
||||
## Getting Started
|
||||
|
||||
[https://n9e.github.io/](https://n9e.github.io/)
|
||||
|
||||
## Screenshots
|
||||
|
||||
https://user-images.githubusercontent.com/792850/216888712-2565fcea-9df5-47bd-a49e-d60af9bd76e8.mp4
|
||||
|
||||
## Architecture
|
||||
|
||||
<img src="doc/img/arch-product.png" width="600">
|
||||
|
||||
Nightingale monitoring can receive monitoring data reported by various collectors (such as [Categraf](https://github.com/flashcatcloud/categraf) , telegraf, grafana-agent, Prometheus, etc.) and write them to various popular time-series databases (such as Prometheus, M3DB, VictoriaMetrics, Thanos, TDEngine, etc.). It provides configuration capabilities for alert rules, silence rules, and subscription rules, as well as the ability to view monitoring data. It also provides automatic alarm self-healing mechanisms (such as automatically calling back to a webhook address or executing a script after an alarm is triggered), and the ability to store and manage historical alarm events and view them in groups.
|
||||
|
||||
If the performance of a standalone time-series database (such as Prometheus) has bottlenecks or poor disaster recovery, we recommend using [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics). The VictoriaMetrics architecture is relatively simple, has excellent performance, and is easy to deploy and maintain. The architecture diagram is as shown above. For more detailed documentation on VictoriaMetrics, please refer to its [official website](https://victoriametrics.com/).
|
||||
|
||||
**We welcome you to participate in the Nightingale open-source project and community in various ways, including but not limited to**:
|
||||
- Adding and improving documentation => [n9e.github.io](https://n9e.github.io/)
|
||||
- Sharing your best practices and experience in using Nightingale monitoring => [Article sharing]((https://n9e.github.io/docs/prologue/share/))
|
||||
- Submitting product suggestions => [github issue](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Ffeature&template=enhancement.md)
|
||||
- Submitting code to make Nightingale monitoring faster, more stable, and easier to use => [github pull request](https://github.com/didi/nightingale/pulls)
|
||||
|
||||
|
||||
**Respecting, recognizing, and recording the work of every contributor** is the first guiding principle of the Nightingale open-source community. We advocate effective questioning, which not only respects the developer's time but also contributes to the accumulation of knowledge in the entire community
|
||||
- Before asking a question, please first refer to the [FAQ](https://www.gitlink.org.cn/ccfos/nightingale/wiki/faq)
|
||||
- We use [GitHub Discussions](https://github.com/ccfos/nightingale/discussions) as the communication forum. You can search and ask questions here.
|
||||
- We also recommend that you join ours [Slack channel](https://n9e-talk.slack.com/) to exchange experiences with other Nightingale users.
|
||||
|
||||
|
||||
## Who is using Nightingale
|
||||
You can register your usage and share your experience by posting on **[Who is Using Nightingale](https://github.com/ccfos/nightingale/issues/897)**.
|
||||
|
||||
## Stargazers over time
|
||||
[](https://starchart.cc/ccfos/nightingale)
|
||||
|
||||
## Contributors
|
||||
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
|
||||
<img src="https://contrib.rocks/image?repo=ccfos/nightingale" />
|
||||
</a>
|
||||
|
||||
## License
|
||||
[Apache License V2.0](https://github.com/didi/nightingale/blob/main/LICENSE)
|
||||
74
README_zh.md
74
README_zh.md
@@ -1,74 +0,0 @@
|
||||
<p align="center">
|
||||
<a href="https://github.com/ccfos/nightingale">
|
||||
<img src="doc/img/Nightingale_L_V.png" alt="nightingale - cloud native monitoring" width="240" /></a>
|
||||
</p>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://flashcat.cloud/docs/">
|
||||
<img alt="Docs" src="https://img.shields.io/badge/docs-get%20started-brightgreen"/></a>
|
||||
<a href="https://hub.docker.com/u/flashcatcloud">
|
||||
<img alt="Docker pulls" src="https://img.shields.io/docker/pulls/flashcatcloud/nightingale"/></a>
|
||||
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
|
||||
<img alt="GitHub contributors" src="https://img.shields.io/github/contributors-anon/ccfos/nightingale"/></a>
|
||||
<img alt="GitHub Repo stars" src="https://img.shields.io/github/stars/ccfos/nightingale">
|
||||
<br/><img alt="GitHub Repo issues" src="https://img.shields.io/github/issues/ccfos/nightingale">
|
||||
<img alt="GitHub Repo issues closed" src="https://img.shields.io/github/issues-closed/ccfos/nightingale">
|
||||
<img alt="GitHub forks" src="https://img.shields.io/github/forks/ccfos/nightingale">
|
||||
<img alt="GitHub latest release" src="https://img.shields.io/github/v/release/ccfos/nightingale"/>
|
||||
<img alt="License" src="https://img.shields.io/badge/license-Apache--2.0-blue"/>
|
||||
<a href="https://n9e-talk.slack.com/">
|
||||
<img alt="GitHub contributors" src="https://img.shields.io/badge/join%20slack-%23n9e-brightgreen.svg"/></a>
|
||||
</p>
|
||||
|
||||
<p align="center">
|
||||
告警管理专家,一体化的开源可观测平台
|
||||
</p>
|
||||
|
||||
[English](./README.md) | [中文](./README_zh.md)
|
||||
|
||||
夜莺Nightingale是中国计算机学会托管的开源云原生可观测工具,最早由滴滴于 2020 年孵化并开源,并于 2022 年正式捐赠予中国计算机学会。夜莺采用 All-in-One 的设计理念,集数据采集、可视化、监控告警、数据分析于一体,与云原生生态紧密集成,融入了顶级互联网公司可观测性最佳实践,沉淀了众多社区专家经验,开箱即用。
|
||||
|
||||
## 资料
|
||||
|
||||
- 文档:[flashcat.cloud/docs](https://flashcat.cloud/docs/)
|
||||
- 提问:[answer.flashcat.cloud](https://answer.flashcat.cloud/)
|
||||
- 报Bug:[github.com/ccfos/nightingale/issues](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Fbug&projects=&template=bug_report.yml)
|
||||
|
||||
|
||||
## 功能和特点
|
||||
|
||||
- 统一接入各种时序库:支持对接 Prometheus、VictoriaMetrics、Thanos、Mimir、M3DB 等多种时序库,实现统一告警管理
|
||||
- 专业告警能力:内置支持多种告警规则,可以扩展支持所有通知媒介,支持告警屏蔽、告警抑制、告警自愈、告警事件管理
|
||||
- 高性能可视化引擎:支持多种图表样式,内置众多Dashboard模版,也可导入Grafana模版,开箱即用,开源协议商业友好
|
||||
- 无缝搭配 [Flashduty](https://flashcat.cloud/product/flashcat-duty/):实现告警聚合收敛、认领、升级、排班、IM集成,确保告警处理不遗漏,减少打扰,更好协同
|
||||
- 支持所有常见采集器:支持 [Categraf](https://flashcat.cloud/product/categraf)、telegraf、grafana-agent、datadog-agent、各种 exporter 作为采集器,没有什么数据是不能监控的
|
||||
- 一体化观测平台:从 v6 版本开始,支持接入 ElasticSearch、Jaeger 数据源,实现日志、链路、指标多维度的统一可观测
|
||||
|
||||
|
||||
## 产品演示
|
||||
|
||||

|
||||
|
||||
## 部署架构
|
||||
|
||||

|
||||
|
||||
## 加入交流群
|
||||
|
||||
欢迎加入 QQ 交流群,群号:479290895,QQ 群适合群友互助,夜莺研发人员通常不在群里。如果要报 bug 请到[这里](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Fbug&projects=&template=bug_report.yml),提问到[这里](https://answer.flashcat.cloud/)。
|
||||
|
||||
## Stargazers over time
|
||||
|
||||
[](https://star-history.com/#ccfos/nightingale&Date)
|
||||
|
||||
|
||||
## Contributors
|
||||
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
|
||||
<img src="https://contrib.rocks/image?repo=ccfos/nightingale" />
|
||||
</a>
|
||||
|
||||
## 社区治理
|
||||
[夜莺开源项目和社区治理架构(草案)](./doc/community-governance.md)
|
||||
|
||||
## License
|
||||
[Apache License V2.0](https://github.com/didi/nightingale/blob/main/LICENSE)
|
||||
@@ -32,6 +32,7 @@ type Alerting struct {
|
||||
Timeout int64
|
||||
TemplatesDir string
|
||||
NotifyConcurrency int
|
||||
WebhookBatchSend bool
|
||||
}
|
||||
|
||||
type CallPlugin struct {
|
||||
@@ -46,13 +47,6 @@ type RedisPub struct {
|
||||
ChannelKey string
|
||||
}
|
||||
|
||||
type Ibex struct {
|
||||
Address string
|
||||
BasicAuthUser string
|
||||
BasicAuthPass string
|
||||
Timeout int64
|
||||
}
|
||||
|
||||
func (a *Alert) PreCheck(configDir string) {
|
||||
if a.Alerting.TemplatesDir == "" {
|
||||
a.Alerting.TemplatesDir = path.Join(configDir, "template")
|
||||
|
||||
@@ -24,7 +24,10 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/prom"
|
||||
"github.com/ccfos/nightingale/v6/pushgw/pconf"
|
||||
"github.com/ccfos/nightingale/v6/pushgw/writer"
|
||||
"github.com/ccfos/nightingale/v6/storage"
|
||||
"github.com/ccfos/nightingale/v6/tdengine"
|
||||
|
||||
"github.com/flashcatcloud/ibex/src/cmd/ibex"
|
||||
)
|
||||
|
||||
func Initialize(configDir string, cryptoKey string) (func(), error) {
|
||||
@@ -40,11 +43,17 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
|
||||
|
||||
ctx := ctx.NewContext(context.Background(), nil, false, config.CenterApi)
|
||||
|
||||
var redis storage.Redis
|
||||
redis, err = storage.NewRedis(config.Redis)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
syncStats := memsto.NewSyncStats()
|
||||
alertStats := astats.NewSyncStats()
|
||||
|
||||
configCache := memsto.NewConfigCache(ctx, syncStats, nil, "")
|
||||
targetCache := memsto.NewTargetCache(ctx, syncStats, nil)
|
||||
targetCache := memsto.NewTargetCache(ctx, syncStats, redis)
|
||||
busiGroupCache := memsto.NewBusiGroupCache(ctx, syncStats)
|
||||
alertMuteCache := memsto.NewAlertMuteCache(ctx, syncStats)
|
||||
alertRuleCache := memsto.NewAlertRuleCache(ctx, syncStats)
|
||||
@@ -52,16 +61,22 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
|
||||
dsCache := memsto.NewDatasourceCache(ctx, syncStats)
|
||||
userCache := memsto.NewUserCache(ctx, syncStats)
|
||||
userGroupCache := memsto.NewUserGroupCache(ctx, syncStats)
|
||||
taskTplsCache := memsto.NewTaskTplCache(ctx)
|
||||
|
||||
promClients := prom.NewPromClient(ctx)
|
||||
tdengineClients := tdengine.NewTdengineClient(ctx, config.Alert.Heartbeat)
|
||||
|
||||
externalProcessors := process.NewExternalProcessors()
|
||||
|
||||
Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache, alertRuleCache, notifyConfigCache, dsCache, ctx, promClients, tdengineClients, userCache, userGroupCache)
|
||||
Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache, alertRuleCache, notifyConfigCache, taskTplsCache, dsCache, ctx, promClients, tdengineClients, userCache, userGroupCache)
|
||||
|
||||
r := httpx.GinEngine(config.Global.RunMode, config.HTTP)
|
||||
rt := router.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors)
|
||||
|
||||
if config.Ibex.Enable {
|
||||
ibex.ServerStart(false, nil, redis, config.HTTP.APIForService.BasicAuth, config.Alert.Heartbeat, &config.CenterApi, r, nil, config.Ibex, config.HTTP.Port)
|
||||
}
|
||||
|
||||
rt.Config(r)
|
||||
dumper.ConfigRouter(r)
|
||||
|
||||
@@ -74,10 +89,11 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
|
||||
}
|
||||
|
||||
func Start(alertc aconf.Alert, pushgwc pconf.Pushgw, syncStats *memsto.Stats, alertStats *astats.Stats, externalProcessors *process.ExternalProcessorsType, targetCache *memsto.TargetCacheType, busiGroupCache *memsto.BusiGroupCacheType,
|
||||
alertMuteCache *memsto.AlertMuteCacheType, alertRuleCache *memsto.AlertRuleCacheType, notifyConfigCache *memsto.NotifyConfigCacheType, datasourceCache *memsto.DatasourceCacheType, ctx *ctx.Context,
|
||||
alertMuteCache *memsto.AlertMuteCacheType, alertRuleCache *memsto.AlertRuleCacheType, notifyConfigCache *memsto.NotifyConfigCacheType, taskTplsCache *memsto.TaskTplCache, datasourceCache *memsto.DatasourceCacheType, ctx *ctx.Context,
|
||||
promClients *prom.PromClientMap, tdendgineClients *tdengine.TdengineClientMap, userCache *memsto.UserCacheType, userGroupCache *memsto.UserGroupCacheType) {
|
||||
alertSubscribeCache := memsto.NewAlertSubscribeCache(ctx, syncStats)
|
||||
recordingRuleCache := memsto.NewRecordingRuleCache(ctx, syncStats)
|
||||
targetsOfAlertRulesCache := memsto.NewTargetOfAlertRuleCache(ctx, alertc.Heartbeat.EngineName, syncStats)
|
||||
|
||||
go models.InitNotifyConfig(ctx, alertc.Alerting.TemplatesDir)
|
||||
|
||||
@@ -86,14 +102,15 @@ func Start(alertc aconf.Alert, pushgwc pconf.Pushgw, syncStats *memsto.Stats, al
|
||||
writers := writer.NewWriters(pushgwc)
|
||||
record.NewScheduler(alertc, recordingRuleCache, promClients, writers, alertStats)
|
||||
|
||||
eval.NewScheduler(alertc, externalProcessors, alertRuleCache, targetCache, busiGroupCache, alertMuteCache, datasourceCache, promClients, tdendgineClients, naming, ctx, alertStats)
|
||||
eval.NewScheduler(alertc, externalProcessors, alertRuleCache, targetCache, targetsOfAlertRulesCache,
|
||||
busiGroupCache, alertMuteCache, datasourceCache, promClients, tdendgineClients, naming, ctx, alertStats)
|
||||
|
||||
dp := dispatch.NewDispatch(alertRuleCache, userCache, userGroupCache, alertSubscribeCache, targetCache, notifyConfigCache, alertc.Alerting, ctx, alertStats)
|
||||
consumer := dispatch.NewConsumer(alertc.Alerting, ctx, dp)
|
||||
dp := dispatch.NewDispatch(alertRuleCache, userCache, userGroupCache, alertSubscribeCache, targetCache, notifyConfigCache, taskTplsCache, alertc.Alerting, ctx, alertStats)
|
||||
consumer := dispatch.NewConsumer(alertc.Alerting, ctx, dp, promClients)
|
||||
|
||||
go dp.ReloadTpls()
|
||||
go consumer.LoopConsume()
|
||||
|
||||
go queue.ReportQueueSize(alertStats)
|
||||
go sender.InitEmailSender(notifyConfigCache.GetSMTP())
|
||||
go sender.InitEmailSender(notifyConfigCache)
|
||||
}
|
||||
|
||||
@@ -16,6 +16,7 @@ type AnomalyPoint struct {
|
||||
Severity int `json:"severity"`
|
||||
Triggered bool `json:"triggered"`
|
||||
Query string `json:"query"`
|
||||
Values string `json:"values"`
|
||||
}
|
||||
|
||||
func NewAnomalyPoint(key string, labels map[string]string, ts int64, value float64, severity int) AnomalyPoint {
|
||||
|
||||
@@ -1,14 +1,19 @@
|
||||
package dispatch
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/aconf"
|
||||
"github.com/ccfos/nightingale/v6/alert/common"
|
||||
"github.com/ccfos/nightingale/v6/alert/queue"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/poster"
|
||||
promsdk "github.com/ccfos/nightingale/v6/pkg/prom"
|
||||
"github.com/ccfos/nightingale/v6/prom"
|
||||
|
||||
"github.com/toolkits/pkg/concurrent/semaphore"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
@@ -18,15 +23,17 @@ type Consumer struct {
|
||||
alerting aconf.Alerting
|
||||
ctx *ctx.Context
|
||||
|
||||
dispatch *Dispatch
|
||||
dispatch *Dispatch
|
||||
promClients *prom.PromClientMap
|
||||
}
|
||||
|
||||
// 创建一个 Consumer 实例
|
||||
func NewConsumer(alerting aconf.Alerting, ctx *ctx.Context, dispatch *Dispatch) *Consumer {
|
||||
func NewConsumer(alerting aconf.Alerting, ctx *ctx.Context, dispatch *Dispatch, promClients *prom.PromClientMap) *Consumer {
|
||||
return &Consumer{
|
||||
alerting: alerting,
|
||||
ctx: ctx,
|
||||
dispatch: dispatch,
|
||||
alerting: alerting,
|
||||
ctx: ctx,
|
||||
dispatch: dispatch,
|
||||
promClients: promClients,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -73,17 +80,19 @@ func (e *Consumer) consumeOne(event *models.AlertCurEvent) {
|
||||
event.RuleName = fmt.Sprintf("failed to parse rule name: %v", err)
|
||||
}
|
||||
|
||||
if err := event.ParseRule("rule_note"); err != nil {
|
||||
logger.Warningf("ruleid:%d failed to parse rule note: %v", event.RuleId, err)
|
||||
event.RuleNote = fmt.Sprintf("failed to parse rule note: %v", err)
|
||||
}
|
||||
|
||||
if err := event.ParseRule("annotations"); err != nil {
|
||||
logger.Warningf("ruleid:%d failed to parse annotations: %v", event.RuleId, err)
|
||||
event.Annotations = fmt.Sprintf("failed to parse annotations: %v", err)
|
||||
event.AnnotationsJSON["error"] = event.Annotations
|
||||
}
|
||||
|
||||
e.queryRecoveryVal(event)
|
||||
|
||||
if err := event.ParseRule("rule_note"); err != nil {
|
||||
logger.Warningf("ruleid:%d failed to parse rule note: %v", event.RuleId, err)
|
||||
event.RuleNote = fmt.Sprintf("failed to parse rule note: %v", err)
|
||||
}
|
||||
|
||||
e.persist(event)
|
||||
|
||||
if event.IsRecovered && event.NotifyRecovered == 0 {
|
||||
@@ -115,3 +124,68 @@ func (e *Consumer) persist(event *models.AlertCurEvent) {
|
||||
e.dispatch.Astats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", event.DatasourceId), "persist_event").Inc()
|
||||
}
|
||||
}
|
||||
|
||||
func (e *Consumer) queryRecoveryVal(event *models.AlertCurEvent) {
|
||||
if !event.IsRecovered {
|
||||
return
|
||||
}
|
||||
|
||||
// If the event is a recovery event, execute the recovery_promql query
|
||||
promql, ok := event.AnnotationsJSON["recovery_promql"]
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
|
||||
promql = strings.TrimSpace(promql)
|
||||
if promql == "" {
|
||||
logger.Warningf("rule_eval:%s promql is blank", getKey(event))
|
||||
return
|
||||
}
|
||||
|
||||
if e.promClients.IsNil(event.DatasourceId) {
|
||||
logger.Warningf("rule_eval:%s error reader client is nil", getKey(event))
|
||||
return
|
||||
}
|
||||
|
||||
readerClient := e.promClients.GetCli(event.DatasourceId)
|
||||
|
||||
var warnings promsdk.Warnings
|
||||
value, warnings, err := readerClient.Query(e.ctx.Ctx, promql, time.Now())
|
||||
if err != nil {
|
||||
logger.Errorf("rule_eval:%s promql:%s, error:%v", getKey(event), promql, err)
|
||||
event.AnnotationsJSON["recovery_promql_error"] = fmt.Sprintf("promql:%s error:%v", promql, err)
|
||||
|
||||
b, err := json.Marshal(event.AnnotationsJSON)
|
||||
if err != nil {
|
||||
event.AnnotationsJSON = make(map[string]string)
|
||||
event.AnnotationsJSON["error"] = fmt.Sprintf("failed to parse annotations: %v", err)
|
||||
} else {
|
||||
event.Annotations = string(b)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if len(warnings) > 0 {
|
||||
logger.Errorf("rule_eval:%s promql:%s, warnings:%v", getKey(event), promql, warnings)
|
||||
}
|
||||
|
||||
anomalyPoints := common.ConvertAnomalyPoints(value)
|
||||
if len(anomalyPoints) == 0 {
|
||||
logger.Warningf("rule_eval:%s promql:%s, result is empty", getKey(event), promql)
|
||||
event.AnnotationsJSON["recovery_promql_error"] = fmt.Sprintf("promql:%s error:%s", promql, "result is empty")
|
||||
} else {
|
||||
event.AnnotationsJSON["recovery_value"] = fmt.Sprintf("%v", anomalyPoints[0].Value)
|
||||
}
|
||||
|
||||
b, err := json.Marshal(event.AnnotationsJSON)
|
||||
if err != nil {
|
||||
event.AnnotationsJSON = make(map[string]string)
|
||||
event.AnnotationsJSON["error"] = fmt.Sprintf("failed to parse annotations: %v", err)
|
||||
} else {
|
||||
event.Annotations = string(b)
|
||||
}
|
||||
}
|
||||
|
||||
func getKey(event *models.AlertCurEvent) string {
|
||||
return common.RuleKey(event.DatasourceId, event.RuleId)
|
||||
}
|
||||
|
||||
@@ -4,7 +4,9 @@ import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"html/template"
|
||||
"net/url"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@@ -26,10 +28,12 @@ type Dispatch struct {
|
||||
alertSubscribeCache *memsto.AlertSubscribeCacheType
|
||||
targetCache *memsto.TargetCacheType
|
||||
notifyConfigCache *memsto.NotifyConfigCacheType
|
||||
taskTplsCache *memsto.TaskTplCache
|
||||
|
||||
alerting aconf.Alerting
|
||||
|
||||
Senders map[string]sender.Sender
|
||||
CallBacks map[string]sender.CallBacker
|
||||
tpls map[string]*template.Template
|
||||
ExtraSenders map[string]sender.Sender
|
||||
BeforeSenderHook func(*models.AlertCurEvent) bool
|
||||
@@ -43,7 +47,7 @@ type Dispatch struct {
|
||||
// 创建一个 Notify 实例
|
||||
func NewDispatch(alertRuleCache *memsto.AlertRuleCacheType, userCache *memsto.UserCacheType, userGroupCache *memsto.UserGroupCacheType,
|
||||
alertSubscribeCache *memsto.AlertSubscribeCacheType, targetCache *memsto.TargetCacheType, notifyConfigCache *memsto.NotifyConfigCacheType,
|
||||
alerting aconf.Alerting, ctx *ctx.Context, astats *astats.Stats) *Dispatch {
|
||||
taskTplsCache *memsto.TaskTplCache, alerting aconf.Alerting, ctx *ctx.Context, astats *astats.Stats) *Dispatch {
|
||||
notify := &Dispatch{
|
||||
alertRuleCache: alertRuleCache,
|
||||
userCache: userCache,
|
||||
@@ -51,6 +55,7 @@ func NewDispatch(alertRuleCache *memsto.AlertRuleCacheType, userCache *memsto.Us
|
||||
alertSubscribeCache: alertSubscribeCache,
|
||||
targetCache: targetCache,
|
||||
notifyConfigCache: notifyConfigCache,
|
||||
taskTplsCache: taskTplsCache,
|
||||
|
||||
alerting: alerting,
|
||||
|
||||
@@ -95,6 +100,21 @@ func (e *Dispatch) relaodTpls() error {
|
||||
models.Mm: sender.NewSender(models.Mm, tmpTpls),
|
||||
models.Telegram: sender.NewSender(models.Telegram, tmpTpls),
|
||||
models.FeishuCard: sender.NewSender(models.FeishuCard, tmpTpls),
|
||||
models.Lark: sender.NewSender(models.Lark, tmpTpls),
|
||||
models.LarkCard: sender.NewSender(models.LarkCard, tmpTpls),
|
||||
}
|
||||
|
||||
// domain -> Callback()
|
||||
callbacks := map[string]sender.CallBacker{
|
||||
models.DingtalkDomain: sender.NewCallBacker(models.DingtalkDomain, e.targetCache, e.userCache, e.taskTplsCache, tmpTpls),
|
||||
models.WecomDomain: sender.NewCallBacker(models.WecomDomain, e.targetCache, e.userCache, e.taskTplsCache, tmpTpls),
|
||||
models.FeishuDomain: sender.NewCallBacker(models.FeishuDomain, e.targetCache, e.userCache, e.taskTplsCache, tmpTpls),
|
||||
models.TelegramDomain: sender.NewCallBacker(models.TelegramDomain, e.targetCache, e.userCache, e.taskTplsCache, tmpTpls),
|
||||
models.FeishuCardDomain: sender.NewCallBacker(models.FeishuCardDomain, e.targetCache, e.userCache, e.taskTplsCache, tmpTpls),
|
||||
models.IbexDomain: sender.NewCallBacker(models.IbexDomain, e.targetCache, e.userCache, e.taskTplsCache, tmpTpls),
|
||||
models.LarkDomain: sender.NewCallBacker(models.LarkDomain, e.targetCache, e.userCache, e.taskTplsCache, tmpTpls),
|
||||
models.DefaultDomain: sender.NewCallBacker(models.DefaultDomain, e.targetCache, e.userCache, e.taskTplsCache, tmpTpls),
|
||||
models.LarkCardDomain: sender.NewCallBacker(models.LarkCardDomain, e.targetCache, e.userCache, e.taskTplsCache, tmpTpls),
|
||||
}
|
||||
|
||||
e.RwLock.RLock()
|
||||
@@ -106,6 +126,7 @@ func (e *Dispatch) relaodTpls() error {
|
||||
e.RwLock.Lock()
|
||||
e.tpls = tmpTpls
|
||||
e.Senders = senders
|
||||
e.CallBacks = callbacks
|
||||
e.RwLock.Unlock()
|
||||
return nil
|
||||
}
|
||||
@@ -229,20 +250,78 @@ func (e *Dispatch) Send(rule *models.AlertRule, event *models.AlertCurEvent, not
|
||||
logger.Debugf("no sender for channel: %s", channel)
|
||||
continue
|
||||
}
|
||||
|
||||
var event *models.AlertCurEvent
|
||||
if len(msgCtx.Events) > 0 {
|
||||
event = msgCtx.Events[0]
|
||||
}
|
||||
|
||||
logger.Debugf("send to channel:%s event:%+v users:%+v", channel, event, msgCtx.Users)
|
||||
s.Send(msgCtx)
|
||||
}
|
||||
}
|
||||
|
||||
// handle event callbacks
|
||||
sender.SendCallbacks(e.ctx, notifyTarget.ToCallbackList(), event, e.targetCache, e.userCache, e.notifyConfigCache.GetIbex(), e.Astats)
|
||||
e.SendCallbacks(rule, notifyTarget, event)
|
||||
|
||||
// handle global webhooks
|
||||
sender.SendWebhooks(notifyTarget.ToWebhookList(), event, e.Astats)
|
||||
if e.alerting.WebhookBatchSend {
|
||||
sender.BatchSendWebhooks(notifyTarget.ToWebhookList(), event, e.Astats)
|
||||
} else {
|
||||
sender.SingleSendWebhooks(notifyTarget.ToWebhookList(), event, e.Astats)
|
||||
}
|
||||
|
||||
// handle plugin call
|
||||
go sender.MayPluginNotify(e.genNoticeBytes(event), e.notifyConfigCache.GetNotifyScript(), e.Astats)
|
||||
}
|
||||
|
||||
func (e *Dispatch) SendCallbacks(rule *models.AlertRule, notifyTarget *NotifyTarget, event *models.AlertCurEvent) {
|
||||
|
||||
uids := notifyTarget.ToUidList()
|
||||
urls := notifyTarget.ToCallbackList()
|
||||
for _, urlStr := range urls {
|
||||
if len(urlStr) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
cbCtx := sender.BuildCallBackContext(e.ctx, urlStr, rule, []*models.AlertCurEvent{event}, uids, e.userCache, e.alerting.WebhookBatchSend, e.Astats)
|
||||
|
||||
if strings.HasPrefix(urlStr, "${ibex}") {
|
||||
e.CallBacks[models.IbexDomain].CallBack(cbCtx)
|
||||
continue
|
||||
}
|
||||
|
||||
if !(strings.HasPrefix(urlStr, "http://") || strings.HasPrefix(urlStr, "https://")) {
|
||||
cbCtx.CallBackURL = "http://" + urlStr
|
||||
}
|
||||
|
||||
parsedURL, err := url.Parse(urlStr)
|
||||
if err != nil {
|
||||
logger.Errorf("SendCallbacks: failed to url.Parse(urlStr=%s): %v", urlStr, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// process feishu card
|
||||
if parsedURL.Host == models.FeishuDomain && parsedURL.Query().Get("card") == "1" {
|
||||
e.CallBacks[models.FeishuCardDomain].CallBack(cbCtx)
|
||||
continue
|
||||
}
|
||||
|
||||
// process lark card
|
||||
if parsedURL.Host == models.LarkDomain && parsedURL.Query().Get("card") == "1" {
|
||||
e.CallBacks[models.LarkCardDomain].CallBack(cbCtx)
|
||||
continue
|
||||
}
|
||||
|
||||
callBacker, ok := e.CallBacks[parsedURL.Host]
|
||||
if ok {
|
||||
callBacker.CallBack(cbCtx)
|
||||
} else {
|
||||
e.CallBacks[models.DefaultDomain].CallBack(cbCtx)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type Notice struct {
|
||||
Event *models.AlertCurEvent `json:"event"`
|
||||
Tpls map[string]string `json:"tpls"`
|
||||
|
||||
@@ -79,11 +79,35 @@ func (s *NotifyTarget) ToCallbackList() []string {
|
||||
func (s *NotifyTarget) ToWebhookList() []*models.Webhook {
|
||||
webhooks := make([]*models.Webhook, 0, len(s.webhooks))
|
||||
for _, wh := range s.webhooks {
|
||||
if wh.Batch == 0 {
|
||||
wh.Batch = 1000
|
||||
}
|
||||
|
||||
if wh.Timeout == 0 {
|
||||
wh.Timeout = 10
|
||||
}
|
||||
|
||||
if wh.RetryCount == 0 {
|
||||
wh.RetryCount = 10
|
||||
}
|
||||
|
||||
if wh.RetryInterval == 0 {
|
||||
wh.RetryInterval = 10
|
||||
}
|
||||
|
||||
webhooks = append(webhooks, wh)
|
||||
}
|
||||
return webhooks
|
||||
}
|
||||
|
||||
func (s *NotifyTarget) ToUidList() []int64 {
|
||||
uids := make([]int64, len(s.userMap))
|
||||
for uid, _ := range s.userMap {
|
||||
uids = append(uids, uid)
|
||||
}
|
||||
return uids
|
||||
}
|
||||
|
||||
// Dispatch 抽象由告警事件到信息接收者的路由策略
|
||||
// rule: 告警规则
|
||||
// event: 告警事件
|
||||
|
||||
@@ -3,6 +3,7 @@ package eval
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/aconf"
|
||||
@@ -25,11 +26,12 @@ type Scheduler struct {
|
||||
|
||||
aconf aconf.Alert
|
||||
|
||||
alertRuleCache *memsto.AlertRuleCacheType
|
||||
targetCache *memsto.TargetCacheType
|
||||
busiGroupCache *memsto.BusiGroupCacheType
|
||||
alertMuteCache *memsto.AlertMuteCacheType
|
||||
datasourceCache *memsto.DatasourceCacheType
|
||||
alertRuleCache *memsto.AlertRuleCacheType
|
||||
targetCache *memsto.TargetCacheType
|
||||
targetsOfAlertRuleCache *memsto.TargetsOfAlertRuleCacheType
|
||||
busiGroupCache *memsto.BusiGroupCacheType
|
||||
alertMuteCache *memsto.AlertMuteCacheType
|
||||
datasourceCache *memsto.DatasourceCacheType
|
||||
|
||||
promClients *prom.PromClientMap
|
||||
tdengineClients *tdengine.TdengineClientMap
|
||||
@@ -40,7 +42,8 @@ type Scheduler struct {
|
||||
stats *astats.Stats
|
||||
}
|
||||
|
||||
func NewScheduler(aconf aconf.Alert, externalProcessors *process.ExternalProcessorsType, arc *memsto.AlertRuleCacheType, targetCache *memsto.TargetCacheType,
|
||||
func NewScheduler(aconf aconf.Alert, externalProcessors *process.ExternalProcessorsType, arc *memsto.AlertRuleCacheType,
|
||||
targetCache *memsto.TargetCacheType, toarc *memsto.TargetsOfAlertRuleCacheType,
|
||||
busiGroupCache *memsto.BusiGroupCacheType, alertMuteCache *memsto.AlertMuteCacheType, datasourceCache *memsto.DatasourceCacheType,
|
||||
promClients *prom.PromClientMap, tdengineClients *tdengine.TdengineClientMap, naming *naming.Naming, ctx *ctx.Context, stats *astats.Stats) *Scheduler {
|
||||
scheduler := &Scheduler{
|
||||
@@ -49,11 +52,12 @@ func NewScheduler(aconf aconf.Alert, externalProcessors *process.ExternalProcess
|
||||
|
||||
ExternalProcessors: externalProcessors,
|
||||
|
||||
alertRuleCache: arc,
|
||||
targetCache: targetCache,
|
||||
busiGroupCache: busiGroupCache,
|
||||
alertMuteCache: alertMuteCache,
|
||||
datasourceCache: datasourceCache,
|
||||
alertRuleCache: arc,
|
||||
targetCache: targetCache,
|
||||
targetsOfAlertRuleCache: toarc,
|
||||
busiGroupCache: busiGroupCache,
|
||||
alertMuteCache: alertMuteCache,
|
||||
datasourceCache: datasourceCache,
|
||||
|
||||
promClients: promClients,
|
||||
tdengineClients: tdengineClients,
|
||||
@@ -95,7 +99,7 @@ func (s *Scheduler) syncAlertRules() {
|
||||
datasourceIds := s.promClients.Hit(rule.DatasourceIdsJson)
|
||||
datasourceIds = append(datasourceIds, s.tdengineClients.Hit(rule.DatasourceIdsJson)...)
|
||||
for _, dsId := range datasourceIds {
|
||||
if !naming.DatasourceHashRing.IsHit(dsId, fmt.Sprintf("%d", rule.Id), s.aconf.Heartbeat.Endpoint) {
|
||||
if !naming.DatasourceHashRing.IsHit(strconv.FormatInt(dsId, 10), fmt.Sprintf("%d", rule.Id), s.aconf.Heartbeat.Endpoint) {
|
||||
continue
|
||||
}
|
||||
ds := s.datasourceCache.GetById(dsId)
|
||||
@@ -113,17 +117,17 @@ func (s *Scheduler) syncAlertRules() {
|
||||
logger.Debugf("datasource %d status is %s", dsId, ds.Status)
|
||||
continue
|
||||
}
|
||||
processor := process.NewProcessor(rule, dsId, s.alertRuleCache, s.targetCache, s.busiGroupCache, s.alertMuteCache, s.datasourceCache, s.ctx, s.stats)
|
||||
processor := process.NewProcessor(s.aconf.Heartbeat.EngineName, rule, dsId, s.alertRuleCache, s.targetCache, s.targetsOfAlertRuleCache, s.busiGroupCache, s.alertMuteCache, s.datasourceCache, s.ctx, s.stats)
|
||||
|
||||
alertRule := NewAlertRuleWorker(rule, dsId, processor, s.promClients, s.tdengineClients, s.ctx)
|
||||
alertRuleWorkers[alertRule.Hash()] = alertRule
|
||||
}
|
||||
} else if rule.IsHostRule() && s.ctx.IsCenter {
|
||||
} else if rule.IsHostRule() {
|
||||
// all host rule will be processed by center instance
|
||||
if !naming.DatasourceHashRing.IsHit(naming.HostDatasource, fmt.Sprintf("%d", rule.Id), s.aconf.Heartbeat.Endpoint) {
|
||||
if !naming.DatasourceHashRing.IsHit(s.aconf.Heartbeat.EngineName, strconv.FormatInt(rule.Id, 10), s.aconf.Heartbeat.Endpoint) {
|
||||
continue
|
||||
}
|
||||
processor := process.NewProcessor(rule, 0, s.alertRuleCache, s.targetCache, s.busiGroupCache, s.alertMuteCache, s.datasourceCache, s.ctx, s.stats)
|
||||
processor := process.NewProcessor(s.aconf.Heartbeat.EngineName, rule, 0, s.alertRuleCache, s.targetCache, s.targetsOfAlertRuleCache, s.busiGroupCache, s.alertMuteCache, s.datasourceCache, s.ctx, s.stats)
|
||||
alertRule := NewAlertRuleWorker(rule, 0, processor, s.promClients, s.tdengineClients, s.ctx)
|
||||
alertRuleWorkers[alertRule.Hash()] = alertRule
|
||||
} else {
|
||||
@@ -140,7 +144,7 @@ func (s *Scheduler) syncAlertRules() {
|
||||
logger.Debugf("datasource %d status is %s", dsId, ds.Status)
|
||||
continue
|
||||
}
|
||||
processor := process.NewProcessor(rule, dsId, s.alertRuleCache, s.targetCache, s.busiGroupCache, s.alertMuteCache, s.datasourceCache, s.ctx, s.stats)
|
||||
processor := process.NewProcessor(s.aconf.Heartbeat.EngineName, rule, dsId, s.alertRuleCache, s.targetCache, s.targetsOfAlertRuleCache, s.busiGroupCache, s.alertMuteCache, s.datasourceCache, s.ctx, s.stats)
|
||||
externalRuleWorkers[processor.Key()] = processor
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,6 +4,8 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@@ -123,15 +125,44 @@ func (arw *AlertRuleWorker) Eval() {
|
||||
|
||||
if arw.processor == nil {
|
||||
logger.Warningf("rule_eval:%s processor is nil", arw.Key())
|
||||
arw.processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.processor.DatasourceId()), GET_PROCESSOR).Inc()
|
||||
return
|
||||
}
|
||||
|
||||
arw.processor.Handle(anomalyPoints, "inner", arw.inhibit)
|
||||
for _, point := range recoverPoints {
|
||||
str := fmt.Sprintf("%v", point.Value)
|
||||
arw.processor.RecoverSingle(process.Hash(cachedRule.Id, arw.processor.DatasourceId(), point), point.Timestamp, &str)
|
||||
if arw.inhibit {
|
||||
pointsMap := make(map[string]common.AnomalyPoint)
|
||||
for _, point := range recoverPoints {
|
||||
// 对于恢复的事件,合并处理
|
||||
tagHash := process.TagHash(point)
|
||||
|
||||
p, exists := pointsMap[tagHash]
|
||||
if !exists {
|
||||
pointsMap[tagHash] = point
|
||||
continue
|
||||
}
|
||||
|
||||
if p.Severity > point.Severity {
|
||||
hash := process.Hash(cachedRule.Id, arw.processor.DatasourceId(), p)
|
||||
arw.processor.DeleteProcessEvent(hash)
|
||||
models.AlertCurEventDelByHash(arw.ctx, hash)
|
||||
|
||||
pointsMap[tagHash] = point
|
||||
}
|
||||
}
|
||||
|
||||
now := time.Now().Unix()
|
||||
for _, point := range pointsMap {
|
||||
str := fmt.Sprintf("%v", point.Value)
|
||||
arw.processor.RecoverSingle(process.Hash(cachedRule.Id, arw.processor.DatasourceId(), point), now, &str)
|
||||
}
|
||||
} else {
|
||||
now := time.Now().Unix()
|
||||
for _, point := range recoverPoints {
|
||||
str := fmt.Sprintf("%v", point.Value)
|
||||
arw.processor.RecoverSingle(process.Hash(cachedRule.Id, arw.processor.DatasourceId(), point), now, &str)
|
||||
}
|
||||
}
|
||||
|
||||
arw.processor.Handle(anomalyPoints, "inner", arw.inhibit)
|
||||
}
|
||||
|
||||
func (arw *AlertRuleWorker) Stop() {
|
||||
@@ -191,7 +222,6 @@ func (arw *AlertRuleWorker) GetPromAnomalyPoint(ruleConfig string) []common.Anom
|
||||
logger.Errorf("rule_eval:%s promql:%s, warnings:%v", arw.Key(), promql, warnings)
|
||||
arw.processor.Stats.CounterQueryDataErrorTotal.WithLabelValues(fmt.Sprintf("%d", arw.datasourceId)).Inc()
|
||||
arw.processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.processor.DatasourceId()), QUERY_DATA).Inc()
|
||||
continue
|
||||
}
|
||||
|
||||
logger.Debugf("rule_eval:%s query:%+v, value:%v", arw.Key(), query, value)
|
||||
@@ -227,7 +257,7 @@ func (arw *AlertRuleWorker) GetTdengineAnomalyPoint(rule *models.AlertRule, dsId
|
||||
|
||||
arw.inhibit = ruleQuery.Inhibit
|
||||
if len(ruleQuery.Queries) > 0 {
|
||||
seriesStore := make(map[uint64]*models.DataResp)
|
||||
seriesStore := make(map[uint64]models.DataResp)
|
||||
seriesTagIndex := make(map[uint64][]uint64)
|
||||
|
||||
for _, query := range ruleQuery.Queries {
|
||||
@@ -251,69 +281,10 @@ func (arw *AlertRuleWorker) GetTdengineAnomalyPoint(rule *models.AlertRule, dsId
|
||||
|
||||
// 此条日志很重要,是告警判断的现场值
|
||||
logger.Debugf("rule_eval rid:%d req:%+v resp:%+v", rule.Id, query, series)
|
||||
for i := 0; i < len(series); i++ {
|
||||
serieHash := hash.GetHash(series[i].Metric, series[i].Ref)
|
||||
tagHash := hash.GetTagHash(series[i].Metric)
|
||||
seriesStore[serieHash] = series[i]
|
||||
|
||||
// 将曲线按照相同的 tag 分组
|
||||
if _, exists := seriesTagIndex[tagHash]; !exists {
|
||||
seriesTagIndex[tagHash] = make([]uint64, 0)
|
||||
}
|
||||
seriesTagIndex[tagHash] = append(seriesTagIndex[tagHash], serieHash)
|
||||
}
|
||||
MakeSeriesMap(series, seriesTagIndex, seriesStore)
|
||||
}
|
||||
|
||||
// 判断
|
||||
for _, trigger := range ruleQuery.Triggers {
|
||||
for _, seriesHash := range seriesTagIndex {
|
||||
m := make(map[string]float64)
|
||||
var ts int64
|
||||
var sample *models.DataResp
|
||||
var value float64
|
||||
for _, serieHash := range seriesHash {
|
||||
series, exists := seriesStore[serieHash]
|
||||
if !exists {
|
||||
logger.Warningf("rule_eval rid:%d series:%+v not found", rule.Id, series)
|
||||
continue
|
||||
}
|
||||
t, v, exists := series.Last()
|
||||
if !exists {
|
||||
logger.Warningf("rule_eval rid:%d series:%+v value not found", rule.Id, series)
|
||||
continue
|
||||
}
|
||||
|
||||
if !strings.Contains(trigger.Exp, "$"+series.Ref) {
|
||||
// 表达式中不包含该变量
|
||||
continue
|
||||
}
|
||||
|
||||
m["$"+series.Ref] = v
|
||||
m["$"+series.Ref+"."+series.MetricName()] = v
|
||||
ts = int64(t)
|
||||
sample = series
|
||||
value = v
|
||||
}
|
||||
isTriggered := parser.Calc(trigger.Exp, m)
|
||||
// 此条日志很重要,是告警判断的现场值
|
||||
logger.Debugf("rule_eval rid:%d trigger:%+v exp:%s res:%v m:%v", rule.Id, trigger, trigger.Exp, isTriggered, m)
|
||||
|
||||
point := common.AnomalyPoint{
|
||||
Key: sample.MetricName(),
|
||||
Labels: sample.Metric,
|
||||
Timestamp: int64(ts),
|
||||
Value: value,
|
||||
Severity: trigger.Severity,
|
||||
Triggered: isTriggered,
|
||||
}
|
||||
|
||||
if isTriggered {
|
||||
points = append(points, point)
|
||||
} else {
|
||||
recoverPoints = append(recoverPoints, point)
|
||||
}
|
||||
}
|
||||
}
|
||||
points, recoverPoints = GetAnomalyPoint(rule.Id, ruleQuery, seriesTagIndex, seriesStore)
|
||||
}
|
||||
|
||||
return points, recoverPoints
|
||||
@@ -343,17 +314,42 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) []common.Anom
|
||||
arw.severity = trigger.Severity
|
||||
}
|
||||
|
||||
query := models.GetHostsQuery(rule.Queries)
|
||||
switch trigger.Type {
|
||||
case "target_miss":
|
||||
t := now - int64(trigger.Duration)
|
||||
targets, err := models.MissTargetGetsByFilter(arw.ctx, query, t)
|
||||
if err != nil {
|
||||
logger.Errorf("rule_eval:%s query:%v, error:%v", arw.Key(), query, err)
|
||||
|
||||
var idents, engineIdents, missEngineIdents []string
|
||||
var exists bool
|
||||
if arw.ctx.IsCenter {
|
||||
// 如果是中心节点, 将不再上报数据的主机 engineName 为空的机器,也加入到 targets 中
|
||||
missEngineIdents, exists = arw.processor.TargetsOfAlertRuleCache.Get("", arw.rule.Id)
|
||||
if !exists {
|
||||
logger.Debugf("rule_eval:%s targets not found engineName:%s", arw.Key(), arw.processor.EngineName)
|
||||
arw.processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.processor.DatasourceId()), QUERY_DATA).Inc()
|
||||
}
|
||||
}
|
||||
idents = append(idents, missEngineIdents...)
|
||||
|
||||
engineIdents, exists = arw.processor.TargetsOfAlertRuleCache.Get(arw.processor.EngineName, arw.rule.Id)
|
||||
if !exists {
|
||||
logger.Warningf("rule_eval:%s targets not found engineName:%s", arw.Key(), arw.processor.EngineName)
|
||||
arw.processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.processor.DatasourceId()), QUERY_DATA).Inc()
|
||||
arw.processor.Stats.CounterQueryDataErrorTotal.WithLabelValues(fmt.Sprintf("%d", arw.datasourceId)).Inc()
|
||||
}
|
||||
idents = append(idents, engineIdents...)
|
||||
|
||||
if len(idents) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
var missTargets []string
|
||||
targetUpdateTimeMap := arw.processor.TargetCache.GetHostUpdateTime(idents)
|
||||
for ident, updateTime := range targetUpdateTimeMap {
|
||||
if updateTime < t {
|
||||
missTargets = append(missTargets, ident)
|
||||
}
|
||||
}
|
||||
logger.Debugf("rule_eval:%s missTargets:%v", arw.Key(), missTargets)
|
||||
targets := arw.processor.TargetCache.Gets(missTargets)
|
||||
for _, target := range targets {
|
||||
m := make(map[string]string)
|
||||
target.FillTagsMap()
|
||||
@@ -370,22 +366,43 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) []common.Anom
|
||||
lst = append(lst, common.NewAnomalyPoint(trigger.Type, m, now, float64(now-target.UpdateAt), trigger.Severity))
|
||||
}
|
||||
case "offset":
|
||||
targets, err := models.TargetGetsByFilter(arw.ctx, query, 0, 0)
|
||||
if err != nil {
|
||||
logger.Errorf("rule_eval:%s query:%v, error:%v", arw.Key(), query, err)
|
||||
idents, exists := arw.processor.TargetsOfAlertRuleCache.Get(arw.processor.EngineName, arw.rule.Id)
|
||||
if !exists {
|
||||
logger.Warningf("rule_eval:%s targets not found", arw.Key())
|
||||
arw.processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.processor.DatasourceId()), QUERY_DATA).Inc()
|
||||
arw.processor.Stats.CounterQueryDataErrorTotal.WithLabelValues(fmt.Sprintf("%d", arw.datasourceId)).Inc()
|
||||
continue
|
||||
}
|
||||
var targetMap = make(map[string]*models.Target)
|
||||
|
||||
targets := arw.processor.TargetCache.Gets(idents)
|
||||
targetMap := make(map[string]*models.Target)
|
||||
for _, target := range targets {
|
||||
targetMap[target.Ident] = target
|
||||
}
|
||||
|
||||
hostOffsetMap := arw.processor.TargetCache.GetOffsetHost(targets, now, int64(trigger.Duration))
|
||||
for host, offset := range hostOffsetMap {
|
||||
offsetIdents := make(map[string]int64)
|
||||
targetsMeta := arw.processor.TargetCache.GetHostMetas(targets)
|
||||
for ident, meta := range targetsMeta {
|
||||
if meta.CpuNum <= 0 {
|
||||
// means this target is not collect by categraf, do not check offset
|
||||
continue
|
||||
}
|
||||
if target, exists := targetMap[ident]; exists {
|
||||
if now-target.UpdateAt > 120 {
|
||||
// means this target is not a active host, do not check offset
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
offset := meta.Offset
|
||||
if math.Abs(float64(offset)) > float64(trigger.Duration) {
|
||||
offsetIdents[ident] = offset
|
||||
}
|
||||
}
|
||||
|
||||
logger.Debugf("rule_eval:%s offsetIdents:%v", arw.Key(), offsetIdents)
|
||||
for host, offset := range offsetIdents {
|
||||
m := make(map[string]string)
|
||||
target, exists := targetMap[host]
|
||||
target, exists := arw.processor.TargetCache.Get(host)
|
||||
if exists {
|
||||
target.FillTagsMap()
|
||||
for k, v := range target.TagsMap {
|
||||
@@ -403,22 +420,22 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) []common.Anom
|
||||
}
|
||||
case "pct_target_miss":
|
||||
t := now - int64(trigger.Duration)
|
||||
count, err := models.MissTargetCountByFilter(arw.ctx, query, t)
|
||||
if err != nil {
|
||||
logger.Errorf("rule_eval:%s query:%v, error:%v", arw.Key(), query, err)
|
||||
idents, exists := arw.processor.TargetsOfAlertRuleCache.Get(arw.processor.EngineName, arw.rule.Id)
|
||||
if !exists {
|
||||
logger.Warningf("rule_eval:%s targets not found", arw.Key())
|
||||
arw.processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.processor.DatasourceId()), QUERY_DATA).Inc()
|
||||
arw.processor.Stats.CounterQueryDataErrorTotal.WithLabelValues(fmt.Sprintf("%d", arw.datasourceId)).Inc()
|
||||
continue
|
||||
}
|
||||
|
||||
total, err := models.TargetCountByFilter(arw.ctx, query)
|
||||
if err != nil {
|
||||
logger.Errorf("rule_eval:%s query:%v, error:%v", arw.Key(), query, err)
|
||||
arw.processor.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", arw.processor.DatasourceId()), QUERY_DATA).Inc()
|
||||
arw.processor.Stats.CounterQueryDataErrorTotal.WithLabelValues(fmt.Sprintf("%d", arw.datasourceId)).Inc()
|
||||
continue
|
||||
var missTargets []string
|
||||
targetUpdateTimeMap := arw.processor.TargetCache.GetHostUpdateTime(idents)
|
||||
for ident, updateTime := range targetUpdateTimeMap {
|
||||
if updateTime < t {
|
||||
missTargets = append(missTargets, ident)
|
||||
}
|
||||
}
|
||||
pct := float64(count) / float64(total) * 100
|
||||
logger.Debugf("rule_eval:%s missTargets:%v", arw.Key(), missTargets)
|
||||
pct := float64(len(missTargets)) / float64(len(idents)) * 100
|
||||
if pct >= float64(trigger.Percent) {
|
||||
lst = append(lst, common.NewAnomalyPoint(trigger.Type, nil, now, pct, trigger.Severity))
|
||||
}
|
||||
@@ -426,3 +443,92 @@ func (arw *AlertRuleWorker) GetHostAnomalyPoint(ruleConfig string) []common.Anom
|
||||
}
|
||||
return lst
|
||||
}
|
||||
|
||||
func GetAnomalyPoint(ruleId int64, ruleQuery models.RuleQuery, seriesTagIndex map[uint64][]uint64, seriesStore map[uint64]models.DataResp) ([]common.AnomalyPoint, []common.AnomalyPoint) {
|
||||
points := []common.AnomalyPoint{}
|
||||
recoverPoints := []common.AnomalyPoint{}
|
||||
|
||||
for _, trigger := range ruleQuery.Triggers {
|
||||
for _, seriesHash := range seriesTagIndex {
|
||||
sort.Slice(seriesHash, func(i, j int) bool {
|
||||
return seriesHash[i] < seriesHash[j]
|
||||
})
|
||||
|
||||
m := make(map[string]float64)
|
||||
var ts int64
|
||||
var sample models.DataResp
|
||||
var value float64
|
||||
for _, serieHash := range seriesHash {
|
||||
series, exists := seriesStore[serieHash]
|
||||
if !exists {
|
||||
logger.Warningf("rule_eval rid:%d series:%+v not found", ruleId, series)
|
||||
continue
|
||||
}
|
||||
t, v, exists := series.Last()
|
||||
if !exists {
|
||||
logger.Warningf("rule_eval rid:%d series:%+v value not found", ruleId, series)
|
||||
continue
|
||||
}
|
||||
|
||||
if !strings.Contains(trigger.Exp, "$"+series.Ref) {
|
||||
// 表达式中不包含该变量
|
||||
continue
|
||||
}
|
||||
|
||||
m["$"+series.Ref] = v
|
||||
m["$"+series.Ref+"."+series.MetricName()] = v
|
||||
ts = int64(t)
|
||||
sample = series
|
||||
value = v
|
||||
}
|
||||
isTriggered := parser.Calc(trigger.Exp, m)
|
||||
// 此条日志很重要,是告警判断的现场值
|
||||
logger.Infof("rule_eval rid:%d trigger:%+v exp:%s res:%v m:%v", ruleId, trigger, trigger.Exp, isTriggered, m)
|
||||
|
||||
var values string
|
||||
for k, v := range m {
|
||||
if !strings.Contains(k, ".") {
|
||||
continue
|
||||
}
|
||||
values += fmt.Sprintf("%s:%v ", k, v)
|
||||
}
|
||||
|
||||
point := common.AnomalyPoint{
|
||||
Key: sample.MetricName(),
|
||||
Labels: sample.Metric,
|
||||
Timestamp: int64(ts),
|
||||
Value: value,
|
||||
Values: values,
|
||||
Severity: trigger.Severity,
|
||||
Triggered: isTriggered,
|
||||
Query: fmt.Sprintf("query:%+v trigger:%+v", ruleQuery.Queries, trigger),
|
||||
}
|
||||
|
||||
if sample.Query != "" {
|
||||
point.Query = sample.Query
|
||||
}
|
||||
|
||||
if isTriggered {
|
||||
points = append(points, point)
|
||||
} else {
|
||||
recoverPoints = append(recoverPoints, point)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return points, recoverPoints
|
||||
}
|
||||
|
||||
func MakeSeriesMap(series []models.DataResp, seriesTagIndex map[uint64][]uint64, seriesStore map[uint64]models.DataResp) {
|
||||
for i := 0; i < len(series); i++ {
|
||||
serieHash := hash.GetHash(series[i].Metric, series[i].Ref)
|
||||
tagHash := hash.GetTagHash(series[i].Metric)
|
||||
seriesStore[serieHash] = series[i]
|
||||
|
||||
// 将曲线按照相同的 tag 分组
|
||||
if _, exists := seriesTagIndex[tagHash]; !exists {
|
||||
seriesTagIndex[tagHash] = make([]uint64, 0)
|
||||
}
|
||||
seriesTagIndex[tagHash] = append(seriesTagIndex[tagHash], serieHash)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,28 +12,28 @@ import (
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
func IsMuted(rule *models.AlertRule, event *models.AlertCurEvent, targetCache *memsto.TargetCacheType, alertMuteCache *memsto.AlertMuteCacheType) bool {
|
||||
func IsMuted(rule *models.AlertRule, event *models.AlertCurEvent, targetCache *memsto.TargetCacheType, alertMuteCache *memsto.AlertMuteCacheType) (bool, string) {
|
||||
if rule.Disabled == 1 {
|
||||
return true
|
||||
return true, "rule disabled"
|
||||
}
|
||||
|
||||
if TimeSpanMuteStrategy(rule, event) {
|
||||
return true
|
||||
return true, "rule is not effective for period of time"
|
||||
}
|
||||
|
||||
if IdentNotExistsMuteStrategy(rule, event, targetCache) {
|
||||
return true
|
||||
return true, "ident not exists mute"
|
||||
}
|
||||
|
||||
if BgNotMatchMuteStrategy(rule, event, targetCache) {
|
||||
return true
|
||||
return true, "bg not match mute"
|
||||
}
|
||||
|
||||
if EventMuteStrategy(event, alertMuteCache) {
|
||||
return true
|
||||
return true, "match mute rule"
|
||||
}
|
||||
|
||||
return false
|
||||
return false, ""
|
||||
}
|
||||
|
||||
// TimeSpanMuteStrategy 根据规则配置的告警生效时间段过滤,如果产生的告警不在规则配置的告警生效时间段内,则不告警,即被mute
|
||||
@@ -147,7 +147,7 @@ func matchMute(event *models.AlertCurEvent, mute *models.AlertMute, clock ...int
|
||||
}
|
||||
|
||||
// 如果不是全局的,判断 匹配的 datasource id
|
||||
if !(len(mute.DatasourceIdsJson) != 0 && mute.DatasourceIdsJson[0] == 0) && event.DatasourceId != 0 {
|
||||
if len(mute.DatasourceIdsJson) != 0 && mute.DatasourceIdsJson[0] != 0 && event.DatasourceId != 0 {
|
||||
idm := make(map[int64]struct{}, len(mute.DatasourceIdsJson))
|
||||
for i := 0; i < len(mute.DatasourceIdsJson); i++ {
|
||||
idm[mute.DatasourceIdsJson[i]] = struct{}{}
|
||||
@@ -172,7 +172,7 @@ func matchMute(event *models.AlertCurEvent, mute *models.AlertMute, clock ...int
|
||||
|
||||
for i := 0; i < len(mute.PeriodicMutesJson); i++ {
|
||||
if strings.Contains(mute.PeriodicMutesJson[i].EnableDaysOfWeek, triggerWeek) {
|
||||
if mute.PeriodicMutesJson[i].EnableStime == mute.PeriodicMutesJson[i].EnableEtime {
|
||||
if mute.PeriodicMutesJson[i].EnableStime == mute.PeriodicMutesJson[i].EnableEtime || (mute.PeriodicMutesJson[i].EnableStime == "00:00" && mute.PeriodicMutesJson[i].EnableEtime == "23:59") {
|
||||
matchTime = true
|
||||
break
|
||||
} else if mute.PeriodicMutesJson[i].EnableStime < mute.PeriodicMutesJson[i].EnableEtime {
|
||||
@@ -209,5 +209,9 @@ func matchMute(event *models.AlertCurEvent, mute *models.AlertMute, clock ...int
|
||||
return false
|
||||
}
|
||||
|
||||
if mute.ITags == nil || len(mute.ITags) == 0 {
|
||||
return true
|
||||
}
|
||||
|
||||
return common.MatchTags(event.TagsMap, mute.ITags)
|
||||
}
|
||||
|
||||
@@ -12,12 +12,12 @@ const NodeReplicas = 500
|
||||
|
||||
type DatasourceHashRingType struct {
|
||||
sync.RWMutex
|
||||
Rings map[int64]*consistent.Consistent
|
||||
Rings map[string]*consistent.Consistent
|
||||
}
|
||||
|
||||
// for alert_rule sharding
|
||||
var HostDatasource int64 = 99999999
|
||||
var DatasourceHashRing = DatasourceHashRingType{Rings: make(map[int64]*consistent.Consistent)}
|
||||
var DatasourceHashRing = DatasourceHashRingType{Rings: make(map[string]*consistent.Consistent)}
|
||||
|
||||
func NewConsistentHashRing(replicas int32, nodes []string) *consistent.Consistent {
|
||||
ret := consistent.New()
|
||||
@@ -28,7 +28,7 @@ func NewConsistentHashRing(replicas int32, nodes []string) *consistent.Consisten
|
||||
return ret
|
||||
}
|
||||
|
||||
func RebuildConsistentHashRing(datasourceId int64, nodes []string) {
|
||||
func RebuildConsistentHashRing(datasourceId string, nodes []string) {
|
||||
r := consistent.New()
|
||||
r.NumberOfReplicas = NodeReplicas
|
||||
for i := 0; i < len(nodes); i++ {
|
||||
@@ -36,10 +36,10 @@ func RebuildConsistentHashRing(datasourceId int64, nodes []string) {
|
||||
}
|
||||
|
||||
DatasourceHashRing.Set(datasourceId, r)
|
||||
logger.Infof("hash ring %d rebuild %+v", datasourceId, r.Members())
|
||||
logger.Infof("hash ring %s rebuild %+v", datasourceId, r.Members())
|
||||
}
|
||||
|
||||
func (chr *DatasourceHashRingType) GetNode(datasourceId int64, pk string) (string, error) {
|
||||
func (chr *DatasourceHashRingType) GetNode(datasourceId string, pk string) (string, error) {
|
||||
chr.Lock()
|
||||
defer chr.Unlock()
|
||||
_, exists := chr.Rings[datasourceId]
|
||||
@@ -50,28 +50,34 @@ func (chr *DatasourceHashRingType) GetNode(datasourceId int64, pk string) (strin
|
||||
return chr.Rings[datasourceId].Get(pk)
|
||||
}
|
||||
|
||||
func (chr *DatasourceHashRingType) IsHit(datasourceId int64, pk string, currentNode string) bool {
|
||||
func (chr *DatasourceHashRingType) IsHit(datasourceId string, pk string, currentNode string) bool {
|
||||
node, err := chr.GetNode(datasourceId, pk)
|
||||
if err != nil {
|
||||
if !errors.Is(err, consistent.ErrEmptyCircle) {
|
||||
logger.Errorf("rule id:%s is not work, datasource id:%d failed to get node from hashring:%v", pk, datasourceId, err)
|
||||
logger.Errorf("rule id:%s is not work, datasource id:%s failed to get node from hashring:%v", pk, datasourceId, err)
|
||||
}
|
||||
return false
|
||||
}
|
||||
return node == currentNode
|
||||
}
|
||||
|
||||
func (chr *DatasourceHashRingType) Set(datasourceId int64, r *consistent.Consistent) {
|
||||
func (chr *DatasourceHashRingType) Set(datasourceId string, r *consistent.Consistent) {
|
||||
chr.Lock()
|
||||
defer chr.Unlock()
|
||||
chr.Rings[datasourceId] = r
|
||||
}
|
||||
|
||||
func (chr *DatasourceHashRingType) Clear() {
|
||||
func (chr *DatasourceHashRingType) Del(datasourceId string) {
|
||||
chr.Lock()
|
||||
defer chr.Unlock()
|
||||
delete(chr.Rings, datasourceId)
|
||||
}
|
||||
|
||||
func (chr *DatasourceHashRingType) Clear(engineName string) {
|
||||
chr.Lock()
|
||||
defer chr.Unlock()
|
||||
for id := range chr.Rings {
|
||||
if id == HostDatasource {
|
||||
if id == engineName {
|
||||
continue
|
||||
}
|
||||
delete(chr.Rings, id)
|
||||
|
||||
@@ -33,9 +33,11 @@ func NewNaming(ctx *ctx.Context, heartbeat aconf.HeartbeatConfig, alertStats *as
|
||||
|
||||
// local servers
|
||||
var localss map[int64]string
|
||||
var localHostServers map[string]string
|
||||
|
||||
func (n *Naming) Heartbeats() error {
|
||||
localss = make(map[int64]string)
|
||||
localHostServers = make(map[string]string)
|
||||
if err := n.heartbeat(); err != nil {
|
||||
fmt.Println("failed to heartbeat:", err)
|
||||
return err
|
||||
@@ -102,16 +104,15 @@ func (n *Naming) heartbeat() error {
|
||||
}
|
||||
|
||||
if len(datasourceIds) == 0 {
|
||||
DatasourceHashRing.Clear()
|
||||
DatasourceHashRing.Clear(n.heartbeatConfig.EngineName)
|
||||
for dsId := range localss {
|
||||
if dsId == HostDatasource {
|
||||
continue
|
||||
}
|
||||
delete(localss, dsId)
|
||||
}
|
||||
}
|
||||
|
||||
newDatasource := make(map[int64]struct{})
|
||||
for i := 0; i < len(datasourceIds); i++ {
|
||||
newDatasource[datasourceIds[i]] = struct{}{}
|
||||
servers, err := n.ActiveServers(datasourceIds[i])
|
||||
if err != nil {
|
||||
logger.Warningf("hearbeat %d get active server err:%v", datasourceIds[i], err)
|
||||
@@ -127,38 +128,42 @@ func (n *Naming) heartbeat() error {
|
||||
continue
|
||||
}
|
||||
|
||||
RebuildConsistentHashRing(datasourceIds[i], servers)
|
||||
RebuildConsistentHashRing(fmt.Sprintf("%d", datasourceIds[i]), servers)
|
||||
localss[datasourceIds[i]] = newss
|
||||
}
|
||||
|
||||
if n.ctx.IsCenter {
|
||||
// 如果是中心节点,还需要处理 host 类型的告警规则,host 类型告警规则,和数据源无关,想复用下数据源的 hash ring,想用一个虚假的数据源 id 来处理
|
||||
// if is center node, we need to handle host type alerting rules, host type alerting rules are not related to datasource, we want to reuse the hash ring of datasource, we want to use a fake datasource id to handle it
|
||||
err := models.AlertingEngineHeartbeatWithCluster(n.ctx, n.heartbeatConfig.Endpoint, n.heartbeatConfig.EngineName, HostDatasource)
|
||||
if err != nil {
|
||||
logger.Warningf("heartbeat with cluster %s err:%v", "", err)
|
||||
n.astats.CounterHeartbeatErrorTotal.WithLabelValues().Inc()
|
||||
for dsId := range localss {
|
||||
if _, exists := newDatasource[dsId]; !exists {
|
||||
delete(localss, dsId)
|
||||
DatasourceHashRing.Del(fmt.Sprintf("%d", dsId))
|
||||
}
|
||||
|
||||
servers, err := n.ActiveServers(HostDatasource)
|
||||
if err != nil {
|
||||
logger.Warningf("hearbeat %d get active server err:%v", HostDatasource, err)
|
||||
n.astats.CounterHeartbeatErrorTotal.WithLabelValues().Inc()
|
||||
return nil
|
||||
}
|
||||
|
||||
sort.Strings(servers)
|
||||
newss := strings.Join(servers, " ")
|
||||
|
||||
oldss, exists := localss[HostDatasource]
|
||||
if exists && oldss == newss {
|
||||
return nil
|
||||
}
|
||||
|
||||
RebuildConsistentHashRing(HostDatasource, servers)
|
||||
localss[HostDatasource] = newss
|
||||
}
|
||||
|
||||
// host 告警使用的是 hash ring
|
||||
err = models.AlertingEngineHeartbeatWithCluster(n.ctx, n.heartbeatConfig.Endpoint, n.heartbeatConfig.EngineName, HostDatasource)
|
||||
if err != nil {
|
||||
logger.Warningf("heartbeat with cluster %s err:%v", "", err)
|
||||
n.astats.CounterHeartbeatErrorTotal.WithLabelValues().Inc()
|
||||
}
|
||||
|
||||
servers, err := n.ActiveServersByEngineName()
|
||||
if err != nil {
|
||||
logger.Warningf("hearbeat %d get active server err:%v", HostDatasource, err)
|
||||
n.astats.CounterHeartbeatErrorTotal.WithLabelValues().Inc()
|
||||
return nil
|
||||
}
|
||||
|
||||
sort.Strings(servers)
|
||||
newss := strings.Join(servers, " ")
|
||||
|
||||
oldss, exists := localHostServers[n.heartbeatConfig.EngineName]
|
||||
if exists && oldss == newss {
|
||||
return nil
|
||||
}
|
||||
|
||||
RebuildConsistentHashRing(n.heartbeatConfig.EngineName, servers)
|
||||
localHostServers[n.heartbeatConfig.EngineName] = newss
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
28
alert/naming/leader.go
Normal file
28
alert/naming/leader.go
Normal file
@@ -0,0 +1,28 @@
|
||||
package naming
|
||||
|
||||
import (
|
||||
"sort"
|
||||
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
func (n *Naming) IamLeader() bool {
|
||||
if !n.ctx.IsCenter {
|
||||
return false
|
||||
}
|
||||
|
||||
servers, err := n.ActiveServersByEngineName()
|
||||
if err != nil {
|
||||
logger.Errorf("failed to get active servers: %v", err)
|
||||
return false
|
||||
}
|
||||
|
||||
if len(servers) == 0 {
|
||||
logger.Errorf("active servers empty")
|
||||
return false
|
||||
}
|
||||
|
||||
sort.Strings(servers)
|
||||
|
||||
return n.heartbeatConfig.Endpoint == servers[0]
|
||||
}
|
||||
@@ -18,6 +18,9 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/tplx"
|
||||
"github.com/ccfos/nightingale/v6/pushgw/writer"
|
||||
|
||||
"github.com/prometheus/prometheus/prompb"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
"github.com/toolkits/pkg/str"
|
||||
)
|
||||
@@ -48,6 +51,7 @@ type HandleEventFunc func(event *models.AlertCurEvent)
|
||||
|
||||
type Processor struct {
|
||||
datasourceId int64
|
||||
EngineName string
|
||||
|
||||
rule *models.AlertRule
|
||||
fires *AlertCurEventMap
|
||||
@@ -60,11 +64,12 @@ type Processor struct {
|
||||
targetNote string
|
||||
groupName string
|
||||
|
||||
atertRuleCache *memsto.AlertRuleCacheType
|
||||
TargetCache *memsto.TargetCacheType
|
||||
BusiGroupCache *memsto.BusiGroupCacheType
|
||||
alertMuteCache *memsto.AlertMuteCacheType
|
||||
datasourceCache *memsto.DatasourceCacheType
|
||||
alertRuleCache *memsto.AlertRuleCacheType
|
||||
TargetCache *memsto.TargetCacheType
|
||||
TargetsOfAlertRuleCache *memsto.TargetsOfAlertRuleCacheType
|
||||
BusiGroupCache *memsto.BusiGroupCacheType
|
||||
alertMuteCache *memsto.AlertMuteCacheType
|
||||
datasourceCache *memsto.DatasourceCacheType
|
||||
|
||||
ctx *ctx.Context
|
||||
Stats *astats.Stats
|
||||
@@ -91,19 +96,22 @@ func (p *Processor) Hash() string {
|
||||
))
|
||||
}
|
||||
|
||||
func NewProcessor(rule *models.AlertRule, datasourceId int64, atertRuleCache *memsto.AlertRuleCacheType, targetCache *memsto.TargetCacheType,
|
||||
func NewProcessor(engineName string, rule *models.AlertRule, datasourceId int64, alertRuleCache *memsto.AlertRuleCacheType,
|
||||
targetCache *memsto.TargetCacheType, targetsOfAlertRuleCache *memsto.TargetsOfAlertRuleCacheType,
|
||||
busiGroupCache *memsto.BusiGroupCacheType, alertMuteCache *memsto.AlertMuteCacheType, datasourceCache *memsto.DatasourceCacheType, ctx *ctx.Context,
|
||||
stats *astats.Stats) *Processor {
|
||||
|
||||
p := &Processor{
|
||||
EngineName: engineName,
|
||||
datasourceId: datasourceId,
|
||||
rule: rule,
|
||||
|
||||
TargetCache: targetCache,
|
||||
BusiGroupCache: busiGroupCache,
|
||||
alertMuteCache: alertMuteCache,
|
||||
atertRuleCache: atertRuleCache,
|
||||
datasourceCache: datasourceCache,
|
||||
TargetCache: targetCache,
|
||||
TargetsOfAlertRuleCache: targetsOfAlertRuleCache,
|
||||
BusiGroupCache: busiGroupCache,
|
||||
alertMuteCache: alertMuteCache,
|
||||
alertRuleCache: alertRuleCache,
|
||||
datasourceCache: datasourceCache,
|
||||
|
||||
ctx: ctx,
|
||||
Stats: stats,
|
||||
@@ -122,7 +130,7 @@ func (p *Processor) Handle(anomalyPoints []common.AnomalyPoint, from string, inh
|
||||
// 这些信息的修改是不会引起worker restart的,但是确实会影响告警处理逻辑
|
||||
// 所以,这里直接从memsto.AlertRuleCache中获取并覆盖
|
||||
p.inhibit = inhibit
|
||||
cachedRule := p.atertRuleCache.Get(p.rule.Id)
|
||||
cachedRule := p.alertRuleCache.Get(p.rule.Id)
|
||||
if cachedRule == nil {
|
||||
logger.Errorf("rule not found %+v", anomalyPoints)
|
||||
p.Stats.CounterRuleEvalErrorTotal.WithLabelValues(fmt.Sprintf("%v", p.DatasourceId()), "handle_event").Inc()
|
||||
@@ -140,9 +148,10 @@ func (p *Processor) Handle(anomalyPoints []common.AnomalyPoint, from string, inh
|
||||
// 如果 event 被 mute 了,本质也是 fire 的状态,这里无论如何都添加到 alertingKeys 中,防止 fire 的事件自动恢复了
|
||||
hash := event.Hash
|
||||
alertingKeys[hash] = struct{}{}
|
||||
if mute.IsMuted(cachedRule, event, p.TargetCache, p.alertMuteCache) {
|
||||
isMuted, detail := mute.IsMuted(cachedRule, event, p.TargetCache, p.alertMuteCache)
|
||||
if isMuted {
|
||||
p.Stats.CounterMuteTotal.WithLabelValues(event.GroupName).Inc()
|
||||
logger.Debugf("rule_eval:%s event:%v is muted", p.Key(), event)
|
||||
logger.Debugf("rule_eval:%s event:%v is muted, detail:%s", p.Key(), event, detail)
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -160,7 +169,7 @@ func (p *Processor) Handle(anomalyPoints []common.AnomalyPoint, from string, inh
|
||||
p.handleEvent(events)
|
||||
}
|
||||
|
||||
p.HandleRecover(alertingKeys, now)
|
||||
p.HandleRecover(alertingKeys, now, inhibit)
|
||||
}
|
||||
|
||||
func (p *Processor) BuildEvent(anomalyPoint common.AnomalyPoint, from string, now int64) *models.AlertCurEvent {
|
||||
@@ -173,9 +182,13 @@ func (p *Processor) BuildEvent(anomalyPoint common.AnomalyPoint, from string, no
|
||||
dsName = ds.Name
|
||||
}
|
||||
|
||||
bg := p.BusiGroupCache.GetByBusiGroupId(p.rule.GroupId)
|
||||
|
||||
event := p.rule.GenerateNewEvent(p.ctx)
|
||||
|
||||
bg := p.BusiGroupCache.GetByBusiGroupId(p.rule.GroupId)
|
||||
if bg != nil {
|
||||
event.GroupName = bg.Name
|
||||
}
|
||||
|
||||
event.TriggerTime = anomalyPoint.Timestamp
|
||||
event.TagsMap = p.tagsMap
|
||||
event.DatasourceId = p.datasourceId
|
||||
@@ -184,8 +197,8 @@ func (p *Processor) BuildEvent(anomalyPoint common.AnomalyPoint, from string, no
|
||||
event.TargetIdent = p.target
|
||||
event.TargetNote = p.targetNote
|
||||
event.TriggerValue = anomalyPoint.ReadableValue()
|
||||
event.TriggerValues = anomalyPoint.Values
|
||||
event.TagsJSON = p.tagsArr
|
||||
event.GroupName = bg.Name
|
||||
event.Tags = strings.Join(p.tagsArr, ",,")
|
||||
event.IsRecovered = false
|
||||
event.Callbacks = p.rule.Callbacks
|
||||
@@ -198,15 +211,68 @@ func (p *Processor) BuildEvent(anomalyPoint common.AnomalyPoint, from string, no
|
||||
event.ExtraConfig = p.rule.ExtraConfigJSON
|
||||
event.PromQl = anomalyPoint.Query
|
||||
|
||||
if event.TriggerValues != "" && strings.Count(event.TriggerValues, "$") > 1 {
|
||||
// TriggerValues 有多个变量,将多个变量都放到 TriggerValue 中
|
||||
event.TriggerValue = event.TriggerValues
|
||||
}
|
||||
|
||||
if from == "inner" {
|
||||
event.LastEvalTime = now
|
||||
} else {
|
||||
event.LastEvalTime = event.TriggerTime
|
||||
}
|
||||
|
||||
// 生成事件之后,立马进程 relabel 处理
|
||||
Relabel(p.rule, event)
|
||||
return event
|
||||
}
|
||||
|
||||
func (p *Processor) HandleRecover(alertingKeys map[string]struct{}, now int64) {
|
||||
func Relabel(rule *models.AlertRule, event *models.AlertCurEvent) {
|
||||
if rule == nil {
|
||||
return
|
||||
}
|
||||
|
||||
// need to keep the original label
|
||||
event.OriginalTags = event.Tags
|
||||
event.OriginalTagsJSON = make([]string, len(event.TagsJSON))
|
||||
|
||||
labels := make([]prompb.Label, len(event.TagsJSON))
|
||||
for i, tag := range event.TagsJSON {
|
||||
label := strings.Split(tag, "=")
|
||||
if len(label) != 2 {
|
||||
logger.Errorf("event%+v relabel: the label length is not 2:%v", event, label)
|
||||
continue
|
||||
}
|
||||
event.OriginalTagsJSON[i] = tag
|
||||
labels[i] = prompb.Label{Name: label[0], Value: label[1]}
|
||||
}
|
||||
|
||||
for i := 0; i < len(rule.EventRelabelConfig); i++ {
|
||||
if rule.EventRelabelConfig[i].Replacement == "" {
|
||||
rule.EventRelabelConfig[i].Replacement = "$1"
|
||||
}
|
||||
|
||||
if rule.EventRelabelConfig[i].Separator == "" {
|
||||
rule.EventRelabelConfig[i].Separator = ";"
|
||||
}
|
||||
|
||||
if rule.EventRelabelConfig[i].Regex == "" {
|
||||
rule.EventRelabelConfig[i].Regex = "(.*)"
|
||||
}
|
||||
}
|
||||
|
||||
// relabel process
|
||||
relabels := writer.Process(labels, rule.EventRelabelConfig...)
|
||||
event.TagsJSON = make([]string, len(relabels))
|
||||
event.TagsMap = make(map[string]string, len(relabels))
|
||||
for i, label := range relabels {
|
||||
event.TagsJSON[i] = fmt.Sprintf("%s=%s", label.Name, label.Value)
|
||||
event.TagsMap[label.Name] = label.Value
|
||||
}
|
||||
event.Tags = strings.Join(event.TagsJSON, ",,")
|
||||
}
|
||||
|
||||
func (p *Processor) HandleRecover(alertingKeys map[string]struct{}, now int64, inhibit bool) {
|
||||
for _, hash := range p.pendings.Keys() {
|
||||
if _, has := alertingKeys[hash]; has {
|
||||
continue
|
||||
@@ -214,19 +280,64 @@ func (p *Processor) HandleRecover(alertingKeys map[string]struct{}, now int64) {
|
||||
p.pendings.Delete(hash)
|
||||
}
|
||||
|
||||
hashArr := make([]string, 0, len(alertingKeys))
|
||||
for hash := range p.fires.GetAll() {
|
||||
if _, has := alertingKeys[hash]; has {
|
||||
continue
|
||||
}
|
||||
p.RecoverSingle(hash, now, nil)
|
||||
|
||||
hashArr = append(hashArr, hash)
|
||||
}
|
||||
p.HandleRecoverEvent(hashArr, now, inhibit)
|
||||
|
||||
}
|
||||
|
||||
func (p *Processor) RecoverSingle(hash string, now int64, value *string) {
|
||||
func (p *Processor) HandleRecoverEvent(hashArr []string, now int64, inhibit bool) {
|
||||
cachedRule := p.rule
|
||||
if cachedRule == nil {
|
||||
return
|
||||
}
|
||||
|
||||
if !inhibit {
|
||||
for _, hash := range hashArr {
|
||||
p.RecoverSingle(hash, now, nil)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
eventMap := make(map[string]models.AlertCurEvent)
|
||||
for _, hash := range hashArr {
|
||||
event, has := p.fires.Get(hash)
|
||||
if !has {
|
||||
continue
|
||||
}
|
||||
|
||||
e, exists := eventMap[event.Tags]
|
||||
if !exists {
|
||||
eventMap[event.Tags] = *event
|
||||
continue
|
||||
}
|
||||
|
||||
if e.Severity > event.Severity {
|
||||
// hash 对应的恢复事件的被抑制了,把之前的事件删除
|
||||
p.fires.Delete(e.Hash)
|
||||
p.pendings.Delete(e.Hash)
|
||||
models.AlertCurEventDelByHash(p.ctx, e.Hash)
|
||||
eventMap[event.Tags] = *event
|
||||
}
|
||||
}
|
||||
|
||||
for _, event := range eventMap {
|
||||
p.RecoverSingle(event.Hash, now, nil)
|
||||
}
|
||||
}
|
||||
|
||||
func (p *Processor) RecoverSingle(hash string, now int64, value *string, values ...string) {
|
||||
cachedRule := p.rule
|
||||
if cachedRule == nil {
|
||||
return
|
||||
}
|
||||
|
||||
event, has := p.fires.Get(hash)
|
||||
if !has {
|
||||
return
|
||||
@@ -238,6 +349,9 @@ func (p *Processor) RecoverSingle(hash string, now int64, value *string) {
|
||||
}
|
||||
if value != nil {
|
||||
event.TriggerValue = *value
|
||||
if len(values) > 0 {
|
||||
event.TriggerValues = values[0]
|
||||
}
|
||||
}
|
||||
|
||||
// 没查到触发阈值的vector,姑且就认为这个vector的值恢复了
|
||||
@@ -374,6 +488,14 @@ func (p *Processor) RecoverAlertCurEventFromDb() {
|
||||
|
||||
fireMap := make(map[string]*models.AlertCurEvent)
|
||||
for _, event := range curEvents {
|
||||
if event.Cate == models.HOST {
|
||||
target, exists := p.TargetCache.Get(event.TargetIdent)
|
||||
if exists && target.EngineName != p.EngineName && !(p.ctx.IsCenter && target.EngineName == "") {
|
||||
// 如果是 host rule,且 target 的 engineName 不是当前的 engineName 或者是中心机房 target EngineName 为空,就跳过
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
event.DB2Mem()
|
||||
fireMap[event.Hash] = event
|
||||
}
|
||||
@@ -451,6 +573,11 @@ func (p *Processor) mayHandleGroup() {
|
||||
}
|
||||
}
|
||||
|
||||
func (p *Processor) DeleteProcessEvent(hash string) {
|
||||
p.fires.Delete(hash)
|
||||
p.pendings.Delete(hash)
|
||||
}
|
||||
|
||||
func labelMapToArr(m map[string]string) []string {
|
||||
numLabels := len(m)
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/prom"
|
||||
"github.com/ccfos/nightingale/v6/pushgw/writer"
|
||||
"github.com/robfig/cron/v3"
|
||||
|
||||
"github.com/toolkits/pkg/logger"
|
||||
"github.com/toolkits/pkg/str"
|
||||
@@ -19,19 +20,35 @@ type RecordRuleContext struct {
|
||||
datasourceId int64
|
||||
quit chan struct{}
|
||||
|
||||
scheduler *cron.Cron
|
||||
rule *models.RecordingRule
|
||||
promClients *prom.PromClientMap
|
||||
stats *astats.Stats
|
||||
}
|
||||
|
||||
func NewRecordRuleContext(rule *models.RecordingRule, datasourceId int64, promClients *prom.PromClientMap, writers *writer.WritersType, stats *astats.Stats) *RecordRuleContext {
|
||||
return &RecordRuleContext{
|
||||
rrc := &RecordRuleContext{
|
||||
datasourceId: datasourceId,
|
||||
quit: make(chan struct{}),
|
||||
rule: rule,
|
||||
promClients: promClients,
|
||||
stats: stats,
|
||||
}
|
||||
|
||||
if rule.CronPattern == "" && rule.PromEvalInterval != 0 {
|
||||
rule.CronPattern = fmt.Sprintf("@every %ds", rule.PromEvalInterval)
|
||||
}
|
||||
|
||||
rrc.scheduler = cron.New(cron.WithSeconds())
|
||||
_, err := rrc.scheduler.AddFunc(rule.CronPattern, func() {
|
||||
rrc.Eval()
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
logger.Errorf("add cron pattern error: %v", err)
|
||||
}
|
||||
|
||||
return rrc
|
||||
}
|
||||
|
||||
func (rrc *RecordRuleContext) Key() string {
|
||||
@@ -39,9 +56,9 @@ func (rrc *RecordRuleContext) Key() string {
|
||||
}
|
||||
|
||||
func (rrc *RecordRuleContext) Hash() string {
|
||||
return str.MD5(fmt.Sprintf("%d_%d_%s_%d",
|
||||
return str.MD5(fmt.Sprintf("%d_%s_%s_%d",
|
||||
rrc.rule.Id,
|
||||
rrc.rule.PromEvalInterval,
|
||||
rrc.rule.CronPattern,
|
||||
rrc.rule.PromQl,
|
||||
rrc.datasourceId,
|
||||
))
|
||||
@@ -51,23 +68,7 @@ func (rrc *RecordRuleContext) Prepare() {}
|
||||
|
||||
func (rrc *RecordRuleContext) Start() {
|
||||
logger.Infof("eval:%s started", rrc.Key())
|
||||
interval := rrc.rule.PromEvalInterval
|
||||
if interval <= 0 {
|
||||
interval = 10
|
||||
}
|
||||
|
||||
ticker := time.NewTicker(time.Duration(interval) * time.Second)
|
||||
go func() {
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-rrc.quit:
|
||||
return
|
||||
case <-ticker.C:
|
||||
rrc.Eval()
|
||||
}
|
||||
}
|
||||
}()
|
||||
rrc.scheduler.Start()
|
||||
}
|
||||
|
||||
func (rrc *RecordRuleContext) Eval() {
|
||||
@@ -109,5 +110,8 @@ func (rrc *RecordRuleContext) Eval() {
|
||||
|
||||
func (rrc *RecordRuleContext) Stop() {
|
||||
logger.Infof("%s stopped", rrc.Key())
|
||||
|
||||
c := rrc.scheduler.Stop()
|
||||
<-c.Done()
|
||||
close(rrc.quit)
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ package record
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/aconf"
|
||||
@@ -68,7 +69,7 @@ func (s *Scheduler) syncRecordRules() {
|
||||
|
||||
datasourceIds := s.promClients.Hit(rule.DatasourceIdsJson)
|
||||
for _, dsId := range datasourceIds {
|
||||
if !naming.DatasourceHashRing.IsHit(dsId, fmt.Sprintf("%d", rule.Id), s.aconf.Heartbeat.Endpoint) {
|
||||
if !naming.DatasourceHashRing.IsHit(strconv.FormatInt(dsId, 10), fmt.Sprintf("%d", rule.Id), s.aconf.Heartbeat.Endpoint) {
|
||||
continue
|
||||
}
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@ package router
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@@ -102,7 +103,7 @@ func (rt *Router) makeEvent(c *gin.Context) {
|
||||
ginx.BindJSON(c, &events)
|
||||
//now := time.Now().Unix()
|
||||
for i := 0; i < len(events); i++ {
|
||||
node, err := naming.DatasourceHashRing.GetNode(events[i].DatasourceId, fmt.Sprintf("%d", events[i].RuleId))
|
||||
node, err := naming.DatasourceHashRing.GetNode(strconv.FormatInt(events[i].DatasourceId, 10), fmt.Sprintf("%d", events[i].RuleId))
|
||||
if err != nil {
|
||||
logger.Warningf("event:%+v get node err:%v", events[i], err)
|
||||
ginx.Bomb(200, "event node not exists")
|
||||
|
||||
@@ -1,64 +1,156 @@
|
||||
package sender
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"strconv"
|
||||
"html/template"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/aconf"
|
||||
"github.com/ccfos/nightingale/v6/alert/astats"
|
||||
"github.com/ccfos/nightingale/v6/memsto"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ibex"
|
||||
"github.com/ccfos/nightingale/v6/pkg/poster"
|
||||
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
func SendCallbacks(ctx *ctx.Context, urls []string, event *models.AlertCurEvent, targetCache *memsto.TargetCacheType, userCache *memsto.UserCacheType,
|
||||
ibexConf aconf.Ibex, stats *astats.Stats) {
|
||||
for _, url := range urls {
|
||||
if url == "" {
|
||||
continue
|
||||
}
|
||||
type (
|
||||
// CallBacker 进行回调的接口
|
||||
CallBacker interface {
|
||||
CallBack(ctx CallBackContext)
|
||||
}
|
||||
|
||||
if strings.HasPrefix(url, "${ibex}") {
|
||||
if !event.IsRecovered {
|
||||
handleIbex(ctx, url, event, targetCache, userCache, ibexConf)
|
||||
}
|
||||
continue
|
||||
}
|
||||
// CallBackContext 回调时所需的上下文
|
||||
CallBackContext struct {
|
||||
Ctx *ctx.Context
|
||||
CallBackURL string
|
||||
Users []*models.User
|
||||
Rule *models.AlertRule
|
||||
Events []*models.AlertCurEvent
|
||||
Stats *astats.Stats
|
||||
BatchSend bool
|
||||
}
|
||||
|
||||
if !(strings.HasPrefix(url, "http://") || strings.HasPrefix(url, "https://")) {
|
||||
url = "http://" + url
|
||||
}
|
||||
DefaultCallBacker struct{}
|
||||
)
|
||||
|
||||
stats.AlertNotifyTotal.WithLabelValues("rule_callback").Inc()
|
||||
resp, code, err := poster.PostJSON(url, 5*time.Second, event, 3)
|
||||
if err != nil {
|
||||
logger.Errorf("event_callback_fail(rule_id=%d url=%s), resp: %s, err: %v, code: %d", event.RuleId, url, string(resp), err, code)
|
||||
stats.AlertNotifyErrorTotal.WithLabelValues("rule_callback").Inc()
|
||||
} else {
|
||||
logger.Infof("event_callback_succ(rule_id=%d url=%s), resp: %s, code: %d", event.RuleId, url, string(resp), code)
|
||||
}
|
||||
func BuildCallBackContext(ctx *ctx.Context, callBackURL string, rule *models.AlertRule, events []*models.AlertCurEvent,
|
||||
uids []int64, userCache *memsto.UserCacheType, batchSend bool, stats *astats.Stats) CallBackContext {
|
||||
users := userCache.GetByUserIds(uids)
|
||||
|
||||
newCallBackUrl, _ := events[0].ParseURL(callBackURL)
|
||||
return CallBackContext{
|
||||
Ctx: ctx,
|
||||
CallBackURL: newCallBackUrl,
|
||||
Rule: rule,
|
||||
Events: events,
|
||||
Users: users,
|
||||
BatchSend: batchSend,
|
||||
Stats: stats,
|
||||
}
|
||||
}
|
||||
|
||||
type TaskForm struct {
|
||||
Title string `json:"title"`
|
||||
Account string `json:"account"`
|
||||
Batch int `json:"batch"`
|
||||
Tolerance int `json:"tolerance"`
|
||||
Timeout int `json:"timeout"`
|
||||
Pause string `json:"pause"`
|
||||
Script string `json:"script"`
|
||||
Args string `json:"args"`
|
||||
Stdin string `json:"stdin"`
|
||||
Action string `json:"action"`
|
||||
Creator string `json:"creator"`
|
||||
Hosts []string `json:"hosts"`
|
||||
func ExtractAtsParams(rawURL string) []string {
|
||||
ans := make([]string, 0, 1)
|
||||
parsedURL, err := url.Parse(rawURL)
|
||||
if err != nil {
|
||||
logger.Errorf("ExtractAtsParams(url=%s), err: %v", rawURL, err)
|
||||
return ans
|
||||
}
|
||||
|
||||
queryParams := parsedURL.Query()
|
||||
atParam := queryParams.Get("ats")
|
||||
if atParam == "" {
|
||||
return ans
|
||||
}
|
||||
|
||||
// Split the atParam by comma and return the result as a slice
|
||||
return strings.Split(atParam, ",")
|
||||
}
|
||||
|
||||
func NewCallBacker(
|
||||
key string,
|
||||
targetCache *memsto.TargetCacheType,
|
||||
userCache *memsto.UserCacheType,
|
||||
taskTplCache *memsto.TaskTplCache,
|
||||
tpls map[string]*template.Template,
|
||||
) CallBacker {
|
||||
|
||||
switch key {
|
||||
case models.IbexDomain: // Distribute to Ibex
|
||||
return &IbexCallBacker{
|
||||
targetCache: targetCache,
|
||||
userCache: userCache,
|
||||
taskTplCache: taskTplCache,
|
||||
}
|
||||
case models.DefaultDomain: // default callback
|
||||
return &DefaultCallBacker{}
|
||||
case models.DingtalkDomain:
|
||||
return &DingtalkSender{tpl: tpls[models.Dingtalk]}
|
||||
case models.WecomDomain:
|
||||
return &WecomSender{tpl: tpls[models.Wecom]}
|
||||
case models.FeishuDomain:
|
||||
return &FeishuSender{tpl: tpls[models.Feishu]}
|
||||
case models.FeishuCardDomain:
|
||||
return &FeishuCardSender{tpl: tpls[models.FeishuCard]}
|
||||
//case models.Mm:
|
||||
// return &MmSender{tpl: tpls[models.Mm]}
|
||||
case models.TelegramDomain:
|
||||
return &TelegramSender{tpl: tpls[models.Telegram]}
|
||||
case models.LarkDomain:
|
||||
return &LarkSender{tpl: tpls[models.Lark]}
|
||||
case models.LarkCardDomain:
|
||||
return &LarkCardSender{tpl: tpls[models.LarkCard]}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *DefaultCallBacker) CallBack(ctx CallBackContext) {
|
||||
if len(ctx.CallBackURL) == 0 || len(ctx.Events) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
event := ctx.Events[0]
|
||||
|
||||
if ctx.BatchSend {
|
||||
webhookConf := &models.Webhook{
|
||||
Type: models.RuleCallback,
|
||||
Enable: true,
|
||||
Url: ctx.CallBackURL,
|
||||
Timeout: 5,
|
||||
RetryCount: 3,
|
||||
RetryInterval: 10,
|
||||
Batch: 1000,
|
||||
}
|
||||
|
||||
PushCallbackEvent(webhookConf, event, ctx.Stats)
|
||||
return
|
||||
}
|
||||
|
||||
ctx.Stats.AlertNotifyTotal.WithLabelValues("rule_callback").Inc()
|
||||
resp, code, err := poster.PostJSON(ctx.CallBackURL, 5*time.Second, event, 3)
|
||||
if err != nil {
|
||||
logger.Errorf("event_callback_fail(rule_id=%d url=%s), event:%+v, resp: %s, err: %v, code: %d",
|
||||
event.RuleId, ctx.CallBackURL, event, string(resp), err, code)
|
||||
ctx.Stats.AlertNotifyErrorTotal.WithLabelValues("rule_callback").Inc()
|
||||
} else {
|
||||
logger.Infof("event_callback_succ(rule_id=%d url=%s), event:%+v, resp: %s, code: %d",
|
||||
event.RuleId, ctx.CallBackURL, event, string(resp), code)
|
||||
}
|
||||
}
|
||||
|
||||
func doSend(url string, body interface{}, channel string, stats *astats.Stats) {
|
||||
stats.AlertNotifyTotal.WithLabelValues(channel).Inc()
|
||||
|
||||
res, code, err := poster.PostJSON(url, time.Second*5, body, 3)
|
||||
if err != nil {
|
||||
logger.Errorf("%s_sender: result=fail url=%s code=%d error=%v req:%v response=%s", channel, url, code, err, body, string(res))
|
||||
stats.AlertNotifyErrorTotal.WithLabelValues(channel).Inc()
|
||||
} else {
|
||||
logger.Infof("%s_sender: result=succ url=%s code=%d req:%v response=%s", channel, url, code, body, string(res))
|
||||
}
|
||||
}
|
||||
|
||||
type TaskCreateReply struct {
|
||||
@@ -66,157 +158,26 @@ type TaskCreateReply struct {
|
||||
Dat int64 `json:"dat"` // task.id
|
||||
}
|
||||
|
||||
func handleIbex(ctx *ctx.Context, url string, event *models.AlertCurEvent, targetCache *memsto.TargetCacheType, userCache *memsto.UserCacheType, ibexConf aconf.Ibex) {
|
||||
arr := strings.Split(url, "/")
|
||||
func PushCallbackEvent(webhook *models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
|
||||
CallbackEventQueueLock.RLock()
|
||||
queue := CallbackEventQueue[webhook.Url]
|
||||
CallbackEventQueueLock.RUnlock()
|
||||
|
||||
var idstr string
|
||||
var host string
|
||||
|
||||
if len(arr) > 1 {
|
||||
idstr = arr[1]
|
||||
}
|
||||
|
||||
if len(arr) > 2 {
|
||||
host = arr[2]
|
||||
}
|
||||
|
||||
id, err := strconv.ParseInt(idstr, 10, 64)
|
||||
if err != nil {
|
||||
logger.Errorf("event_callback_ibex: failed to parse url: %s", url)
|
||||
return
|
||||
}
|
||||
|
||||
if host == "" {
|
||||
// 用户在callback url中没有传入host,就从event中解析
|
||||
host = event.TargetIdent
|
||||
}
|
||||
|
||||
if host == "" {
|
||||
logger.Error("event_callback_ibex: failed to get host")
|
||||
return
|
||||
}
|
||||
|
||||
tpl, err := models.TaskTplGetById(ctx, id)
|
||||
if err != nil {
|
||||
logger.Errorf("event_callback_ibex: failed to get tpl: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
if tpl == nil {
|
||||
logger.Errorf("event_callback_ibex: no such tpl(%d)", id)
|
||||
return
|
||||
}
|
||||
|
||||
// check perm
|
||||
// tpl.GroupId - host - account 三元组校验权限
|
||||
can, err := canDoIbex(ctx, tpl.UpdateBy, tpl, host, targetCache, userCache)
|
||||
if err != nil {
|
||||
logger.Errorf("event_callback_ibex: check perm fail: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
if !can {
|
||||
logger.Errorf("event_callback_ibex: user(%s) no permission", tpl.UpdateBy)
|
||||
return
|
||||
}
|
||||
|
||||
tagsMap := make(map[string]string)
|
||||
for i := 0; i < len(event.TagsJSON); i++ {
|
||||
pair := strings.TrimSpace(event.TagsJSON[i])
|
||||
if pair == "" {
|
||||
continue
|
||||
if queue == nil {
|
||||
queue = &WebhookQueue{
|
||||
list: NewSafeListLimited(QueueMaxSize),
|
||||
closeCh: make(chan struct{}),
|
||||
}
|
||||
|
||||
arr := strings.Split(pair, "=")
|
||||
if len(arr) != 2 {
|
||||
continue
|
||||
}
|
||||
CallbackEventQueueLock.Lock()
|
||||
CallbackEventQueue[webhook.Url] = queue
|
||||
CallbackEventQueueLock.Unlock()
|
||||
|
||||
tagsMap[arr[0]] = arr[1]
|
||||
}
|
||||
// 附加告警级别 告警触发值标签
|
||||
tagsMap["alert_severity"] = strconv.Itoa(event.Severity)
|
||||
tagsMap["alert_trigger_value"] = event.TriggerValue
|
||||
|
||||
tags, err := json.Marshal(tagsMap)
|
||||
if err != nil {
|
||||
logger.Errorf("event_callback_ibex: failed to marshal tags to json: %v", tagsMap)
|
||||
return
|
||||
StartConsumer(queue, webhook.Batch, webhook, stats)
|
||||
}
|
||||
|
||||
// call ibex
|
||||
in := TaskForm{
|
||||
Title: tpl.Title + " FH: " + host,
|
||||
Account: tpl.Account,
|
||||
Batch: tpl.Batch,
|
||||
Tolerance: tpl.Tolerance,
|
||||
Timeout: tpl.Timeout,
|
||||
Pause: tpl.Pause,
|
||||
Script: tpl.Script,
|
||||
Args: tpl.Args,
|
||||
Stdin: string(tags),
|
||||
Action: "start",
|
||||
Creator: tpl.UpdateBy,
|
||||
Hosts: []string{host},
|
||||
}
|
||||
|
||||
var res TaskCreateReply
|
||||
err = ibex.New(
|
||||
ibexConf.Address,
|
||||
ibexConf.BasicAuthUser,
|
||||
ibexConf.BasicAuthPass,
|
||||
ibexConf.Timeout,
|
||||
).
|
||||
Path("/ibex/v1/tasks").
|
||||
In(in).
|
||||
Out(&res).
|
||||
POST()
|
||||
|
||||
if err != nil {
|
||||
logger.Errorf("event_callback_ibex: call ibex fail: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
if res.Err != "" {
|
||||
logger.Errorf("event_callback_ibex: call ibex response error: %v", res.Err)
|
||||
return
|
||||
}
|
||||
|
||||
// write db
|
||||
record := models.TaskRecord{
|
||||
Id: res.Dat,
|
||||
EventId: event.Id,
|
||||
GroupId: tpl.GroupId,
|
||||
IbexAddress: ibexConf.Address,
|
||||
IbexAuthUser: ibexConf.BasicAuthUser,
|
||||
IbexAuthPass: ibexConf.BasicAuthPass,
|
||||
Title: in.Title,
|
||||
Account: in.Account,
|
||||
Batch: in.Batch,
|
||||
Tolerance: in.Tolerance,
|
||||
Timeout: in.Timeout,
|
||||
Pause: in.Pause,
|
||||
Script: in.Script,
|
||||
Args: in.Args,
|
||||
CreateAt: time.Now().Unix(),
|
||||
CreateBy: in.Creator,
|
||||
}
|
||||
|
||||
if err = record.Add(ctx); err != nil {
|
||||
logger.Errorf("event_callback_ibex: persist task_record fail: %v", err)
|
||||
succ := queue.list.PushFront(event)
|
||||
if !succ {
|
||||
logger.Warningf("Write channel(%s) full, current channel size: %d event:%v", webhook.Url, queue.list.Len(), event)
|
||||
}
|
||||
}
|
||||
|
||||
func canDoIbex(ctx *ctx.Context, username string, tpl *models.TaskTpl, host string, targetCache *memsto.TargetCacheType, userCache *memsto.UserCacheType) (bool, error) {
|
||||
user := userCache.GetByUsername(username)
|
||||
if user != nil && user.IsAdmin() {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
target, has := targetCache.Get(host)
|
||||
if !has {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
return target.GroupId == tpl.GroupId, nil
|
||||
}
|
||||
|
||||
@@ -1,15 +1,9 @@
|
||||
package sender
|
||||
|
||||
import (
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"html/template"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/astats"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/poster"
|
||||
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
type dingtalkMarkdown struct {
|
||||
@@ -28,6 +22,10 @@ type dingtalk struct {
|
||||
At dingtalkAt `json:"at"`
|
||||
}
|
||||
|
||||
var (
|
||||
_ CallBacker = (*DingtalkSender)(nil)
|
||||
)
|
||||
|
||||
type DingtalkSender struct {
|
||||
tpl *template.Template
|
||||
}
|
||||
@@ -72,6 +70,37 @@ func (ds *DingtalkSender) Send(ctx MessageContext) {
|
||||
}
|
||||
}
|
||||
|
||||
func (ds *DingtalkSender) CallBack(ctx CallBackContext) {
|
||||
if len(ctx.Events) == 0 || len(ctx.CallBackURL) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
body := dingtalk{
|
||||
Msgtype: "markdown",
|
||||
Markdown: dingtalkMarkdown{
|
||||
Title: ctx.Events[0].RuleName,
|
||||
},
|
||||
}
|
||||
|
||||
ats := ExtractAtsParams(ctx.CallBackURL)
|
||||
message := BuildTplMessage(models.Dingtalk, ds.tpl, ctx.Events)
|
||||
|
||||
if len(ats) > 0 {
|
||||
body.Markdown.Text = message + "\n@" + strings.Join(ats, "@")
|
||||
body.At = dingtalkAt{
|
||||
AtMobiles: ats,
|
||||
IsAtAll: false,
|
||||
}
|
||||
} else {
|
||||
// NoAt in url
|
||||
body.Markdown.Text = message
|
||||
}
|
||||
|
||||
doSend(ctx.CallBackURL, body, models.Dingtalk, ctx.Stats)
|
||||
|
||||
ctx.Stats.AlertNotifyTotal.WithLabelValues("rule_callback").Inc()
|
||||
}
|
||||
|
||||
// extract urls and ats from Users
|
||||
func (ds *DingtalkSender) extract(users []*models.User) ([]string, []string) {
|
||||
urls := make([]string, 0, len(users))
|
||||
@@ -91,15 +120,3 @@ func (ds *DingtalkSender) extract(users []*models.User) ([]string, []string) {
|
||||
}
|
||||
return urls, ats
|
||||
}
|
||||
|
||||
func doSend(url string, body interface{}, channel string, stats *astats.Stats) {
|
||||
stats.AlertNotifyTotal.WithLabelValues(channel).Inc()
|
||||
|
||||
res, code, err := poster.PostJSON(url, time.Second*5, body, 3)
|
||||
if err != nil {
|
||||
logger.Errorf("%s_sender: result=fail url=%s code=%d error=%v response=%s", channel, url, code, err, string(res))
|
||||
stats.AlertNotifyErrorTotal.WithLabelValues(channel).Inc()
|
||||
} else {
|
||||
logger.Infof("%s_sender: result=succ url=%s code=%d response=%s", channel, url, code, string(res))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/aconf"
|
||||
"github.com/ccfos/nightingale/v6/memsto"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
|
||||
"github.com/toolkits/pkg/logger"
|
||||
@@ -85,12 +86,18 @@ func (es *EmailSender) WriteEmail(subject, content string, tos []string) {
|
||||
|
||||
func dialSmtp(d *gomail.Dialer) gomail.SendCloser {
|
||||
for {
|
||||
if s, err := d.Dial(); err != nil {
|
||||
logger.Errorf("email_sender: failed to dial smtp: %s", err)
|
||||
select {
|
||||
case <-mailQuit:
|
||||
// Note that Sendcloser is not obtained below,
|
||||
// and the outgoing signal (with configuration changes) exits the current dial
|
||||
return nil
|
||||
default:
|
||||
if s, err := d.Dial(); err != nil {
|
||||
logger.Errorf("email_sender: failed to dial smtp: %s", err)
|
||||
} else {
|
||||
return s
|
||||
}
|
||||
time.Sleep(time.Second)
|
||||
continue
|
||||
} else {
|
||||
return s
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -98,14 +105,31 @@ func dialSmtp(d *gomail.Dialer) gomail.SendCloser {
|
||||
var mailQuit = make(chan struct{})
|
||||
|
||||
func RestartEmailSender(smtp aconf.SMTPConfig) {
|
||||
close(mailQuit)
|
||||
mailQuit = make(chan struct{})
|
||||
// Notify internal start exit
|
||||
mailQuit <- struct{}{}
|
||||
startEmailSender(smtp)
|
||||
}
|
||||
|
||||
func InitEmailSender(smtp aconf.SMTPConfig) {
|
||||
var smtpConfig aconf.SMTPConfig
|
||||
|
||||
func InitEmailSender(ncc *memsto.NotifyConfigCacheType) {
|
||||
mailch = make(chan *gomail.Message, 100000)
|
||||
startEmailSender(smtp)
|
||||
go updateSmtp(ncc)
|
||||
smtpConfig = ncc.GetSMTP()
|
||||
startEmailSender(smtpConfig)
|
||||
}
|
||||
|
||||
func updateSmtp(ncc *memsto.NotifyConfigCacheType) {
|
||||
for {
|
||||
time.Sleep(1 * time.Minute)
|
||||
smtp := ncc.GetSMTP()
|
||||
if smtpConfig.Host != smtp.Host || smtpConfig.Batch != smtp.Batch || smtpConfig.From != smtp.From ||
|
||||
smtpConfig.Pass != smtp.Pass || smtpConfig.User != smtp.User || smtpConfig.Port != smtp.Port ||
|
||||
smtpConfig.InsecureSkipVerify != smtp.InsecureSkipVerify { //diff
|
||||
smtpConfig = smtp
|
||||
RestartEmailSender(smtp)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func startEmailSender(smtp aconf.SMTPConfig) {
|
||||
@@ -135,6 +159,12 @@ func startEmailSender(smtp aconf.SMTPConfig) {
|
||||
|
||||
if !open {
|
||||
s = dialSmtp(d)
|
||||
if s == nil {
|
||||
// Indicates that the dialing failed and exited the current goroutine directly,
|
||||
// but put the Message back in the mailch
|
||||
mailch <- m
|
||||
return
|
||||
}
|
||||
open = true
|
||||
}
|
||||
if err := gomail.Send(s, m); err != nil {
|
||||
@@ -146,6 +176,12 @@ func startEmailSender(smtp aconf.SMTPConfig) {
|
||||
}
|
||||
|
||||
s = dialSmtp(d)
|
||||
if s == nil {
|
||||
// Indicates that the dialing failed and exited the current goroutine directly,
|
||||
// but put the Message back in the mailch
|
||||
mailch <- m
|
||||
return
|
||||
}
|
||||
open = true
|
||||
|
||||
if err := gomail.Send(s, m); err != nil {
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package sender
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"html/template"
|
||||
"strings"
|
||||
|
||||
@@ -22,10 +23,41 @@ type feishu struct {
|
||||
At feishuAt `json:"at"`
|
||||
}
|
||||
|
||||
var (
|
||||
_ CallBacker = (*FeishuSender)(nil)
|
||||
)
|
||||
|
||||
type FeishuSender struct {
|
||||
tpl *template.Template
|
||||
}
|
||||
|
||||
func (fs *FeishuSender) CallBack(ctx CallBackContext) {
|
||||
if len(ctx.Events) == 0 || len(ctx.CallBackURL) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
ats := ExtractAtsParams(ctx.CallBackURL)
|
||||
message := BuildTplMessage(models.Feishu, fs.tpl, ctx.Events)
|
||||
|
||||
if len(ats) > 0 {
|
||||
atTags := ""
|
||||
for _, at := range ats {
|
||||
atTags += fmt.Sprintf("<at user_id=\"%s\"></at> ", at)
|
||||
}
|
||||
message = atTags + message
|
||||
}
|
||||
|
||||
body := feishu{
|
||||
Msgtype: "text",
|
||||
Content: feishuContent{
|
||||
Text: message,
|
||||
},
|
||||
}
|
||||
|
||||
doSend(ctx.CallBackURL, body, models.Feishu, ctx.Stats)
|
||||
ctx.Stats.AlertNotifyTotal.WithLabelValues("rule_callback").Inc()
|
||||
}
|
||||
|
||||
func (fs *FeishuSender) Send(ctx MessageContext) {
|
||||
if len(ctx.Users) == 0 || len(ctx.Events) == 0 {
|
||||
return
|
||||
|
||||
@@ -3,6 +3,7 @@ package sender
|
||||
import (
|
||||
"fmt"
|
||||
"html/template"
|
||||
"net/url"
|
||||
"strings"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
@@ -91,6 +92,51 @@ var (
|
||||
}
|
||||
)
|
||||
|
||||
func (fs *FeishuCardSender) CallBack(ctx CallBackContext) {
|
||||
if len(ctx.Events) == 0 || len(ctx.CallBackURL) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
ats := ExtractAtsParams(ctx.CallBackURL)
|
||||
message := BuildTplMessage(models.FeishuCard, fs.tpl, ctx.Events)
|
||||
|
||||
if len(ats) > 0 {
|
||||
atTags := ""
|
||||
for _, at := range ats {
|
||||
if strings.Contains(at, "@") {
|
||||
atTags += fmt.Sprintf("<at email=\"%s\" ></at>", at)
|
||||
} else {
|
||||
atTags += fmt.Sprintf("<at id=\"%s\" ></at>", at)
|
||||
}
|
||||
}
|
||||
message = atTags + message
|
||||
}
|
||||
|
||||
color := "red"
|
||||
lowerUnicode := strings.ToLower(message)
|
||||
if strings.Count(lowerUnicode, Recovered) > 0 && strings.Count(lowerUnicode, Triggered) > 0 {
|
||||
color = "orange"
|
||||
} else if strings.Count(lowerUnicode, Recovered) > 0 {
|
||||
color = "green"
|
||||
}
|
||||
|
||||
SendTitle := fmt.Sprintf("🔔 %s", ctx.Events[0].RuleName)
|
||||
body.Card.Header.Title.Content = SendTitle
|
||||
body.Card.Header.Template = color
|
||||
body.Card.Elements[0].Text.Content = message
|
||||
body.Card.Elements[2].Elements[0].Content = SendTitle
|
||||
|
||||
// This is to be compatible with the feishucard interface, if with query string parameters, the request will fail
|
||||
// Remove query parameters from the URL,
|
||||
parsedURL, err := url.Parse(ctx.CallBackURL)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
parsedURL.RawQuery = ""
|
||||
|
||||
doSend(parsedURL.String(), body, models.FeishuCard, ctx.Stats)
|
||||
}
|
||||
|
||||
func (fs *FeishuCardSender) Send(ctx MessageContext) {
|
||||
if len(ctx.Users) == 0 || len(ctx.Events) == 0 {
|
||||
return
|
||||
|
||||
265
alert/sender/ibex.go
Normal file
265
alert/sender/ibex.go
Normal file
@@ -0,0 +1,265 @@
|
||||
// @Author: Ciusyan 6/5/24
|
||||
|
||||
package sender
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/memsto"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
imodels "github.com/flashcatcloud/ibex/src/models"
|
||||
"github.com/flashcatcloud/ibex/src/storage"
|
||||
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
var (
|
||||
_ CallBacker = (*IbexCallBacker)(nil)
|
||||
)
|
||||
|
||||
type IbexCallBacker struct {
|
||||
targetCache *memsto.TargetCacheType
|
||||
userCache *memsto.UserCacheType
|
||||
taskTplCache *memsto.TaskTplCache
|
||||
}
|
||||
|
||||
func (c *IbexCallBacker) CallBack(ctx CallBackContext) {
|
||||
if len(ctx.CallBackURL) == 0 || len(ctx.Events) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
event := ctx.Events[0]
|
||||
|
||||
if event.IsRecovered {
|
||||
return
|
||||
}
|
||||
|
||||
c.handleIbex(ctx.Ctx, ctx.CallBackURL, event)
|
||||
}
|
||||
|
||||
func (c *IbexCallBacker) handleIbex(ctx *ctx.Context, url string, event *models.AlertCurEvent) {
|
||||
if imodels.DB() == nil && ctx.IsCenter {
|
||||
logger.Warning("event_callback_ibex: db is nil")
|
||||
return
|
||||
}
|
||||
|
||||
arr := strings.Split(url, "/")
|
||||
|
||||
var idstr string
|
||||
var host string
|
||||
|
||||
if len(arr) > 1 {
|
||||
idstr = arr[1]
|
||||
}
|
||||
|
||||
if len(arr) > 2 {
|
||||
host = arr[2]
|
||||
}
|
||||
|
||||
id, err := strconv.ParseInt(idstr, 10, 64)
|
||||
if err != nil {
|
||||
logger.Errorf("event_callback_ibex: failed to parse url: %s", url)
|
||||
return
|
||||
}
|
||||
|
||||
if host == "" {
|
||||
// 用户在callback url中没有传入host,就从event中解析
|
||||
host = event.TargetIdent
|
||||
}
|
||||
|
||||
if host == "" {
|
||||
logger.Error("event_callback_ibex: failed to get host")
|
||||
return
|
||||
}
|
||||
|
||||
tpl := c.taskTplCache.Get(id)
|
||||
if tpl == nil {
|
||||
logger.Errorf("event_callback_ibex: no such tpl(%d)", id)
|
||||
return
|
||||
}
|
||||
|
||||
// check perm
|
||||
// tpl.GroupId - host - account 三元组校验权限
|
||||
can, err := canDoIbex(tpl.UpdateBy, tpl, host, c.targetCache, c.userCache)
|
||||
if err != nil {
|
||||
logger.Errorf("event_callback_ibex: check perm fail: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
if !can {
|
||||
logger.Errorf("event_callback_ibex: user(%s) no permission", tpl.UpdateBy)
|
||||
return
|
||||
}
|
||||
|
||||
tagsMap := make(map[string]string)
|
||||
for i := 0; i < len(event.TagsJSON); i++ {
|
||||
pair := strings.TrimSpace(event.TagsJSON[i])
|
||||
if pair == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
arr := strings.Split(pair, "=")
|
||||
if len(arr) != 2 {
|
||||
continue
|
||||
}
|
||||
|
||||
tagsMap[arr[0]] = arr[1]
|
||||
}
|
||||
// 附加告警级别 告警触发值标签
|
||||
tagsMap["alert_severity"] = strconv.Itoa(event.Severity)
|
||||
tagsMap["alert_trigger_value"] = event.TriggerValue
|
||||
|
||||
tags, err := json.Marshal(tagsMap)
|
||||
if err != nil {
|
||||
logger.Errorf("event_callback_ibex: failed to marshal tags to json: %v", tagsMap)
|
||||
return
|
||||
}
|
||||
|
||||
// call ibex
|
||||
in := models.TaskForm{
|
||||
Title: tpl.Title + " FH: " + host,
|
||||
Account: tpl.Account,
|
||||
Batch: tpl.Batch,
|
||||
Tolerance: tpl.Tolerance,
|
||||
Timeout: tpl.Timeout,
|
||||
Pause: tpl.Pause,
|
||||
Script: tpl.Script,
|
||||
Args: tpl.Args,
|
||||
Stdin: string(tags),
|
||||
Action: "start",
|
||||
Creator: tpl.UpdateBy,
|
||||
Hosts: []string{host},
|
||||
AlertTriggered: true,
|
||||
}
|
||||
|
||||
id, err = TaskAdd(in, tpl.UpdateBy, ctx.IsCenter)
|
||||
if err != nil {
|
||||
logger.Errorf("event_callback_ibex: call ibex fail: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
// write db
|
||||
record := models.TaskRecord{
|
||||
Id: id,
|
||||
EventId: event.Id,
|
||||
GroupId: tpl.GroupId,
|
||||
Title: in.Title,
|
||||
Account: in.Account,
|
||||
Batch: in.Batch,
|
||||
Tolerance: in.Tolerance,
|
||||
Timeout: in.Timeout,
|
||||
Pause: in.Pause,
|
||||
Script: in.Script,
|
||||
Args: in.Args,
|
||||
CreateAt: time.Now().Unix(),
|
||||
CreateBy: in.Creator,
|
||||
}
|
||||
|
||||
if err = record.Add(ctx); err != nil {
|
||||
logger.Errorf("event_callback_ibex: persist task_record fail: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func canDoIbex(username string, tpl *models.TaskTpl, host string, targetCache *memsto.TargetCacheType, userCache *memsto.UserCacheType) (bool, error) {
|
||||
user := userCache.GetByUsername(username)
|
||||
if user != nil && user.IsAdmin() {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
target, has := targetCache.Get(host)
|
||||
if !has {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
return target.GroupId == tpl.GroupId, nil
|
||||
}
|
||||
|
||||
func TaskAdd(f models.TaskForm, authUser string, isCenter bool) (int64, error) {
|
||||
hosts := cleanHosts(f.Hosts)
|
||||
if len(hosts) == 0 {
|
||||
return 0, fmt.Errorf("arg(hosts) empty")
|
||||
}
|
||||
|
||||
taskMeta := &imodels.TaskMeta{
|
||||
Title: f.Title,
|
||||
Account: f.Account,
|
||||
Batch: f.Batch,
|
||||
Tolerance: f.Tolerance,
|
||||
Timeout: f.Timeout,
|
||||
Pause: f.Pause,
|
||||
Script: f.Script,
|
||||
Args: f.Args,
|
||||
Stdin: f.Stdin,
|
||||
Creator: f.Creator,
|
||||
}
|
||||
|
||||
err := taskMeta.CleanFields()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
taskMeta.HandleFH(hosts[0])
|
||||
|
||||
// 任务类型分为"告警规则触发"和"n9e center用户下发"两种;
|
||||
// 边缘机房"告警规则触发"的任务不需要规划,并且它可能是失联的,无法使用db资源,所以放入redis缓存中,直接下发给agentd执行
|
||||
if !isCenter && f.AlertTriggered {
|
||||
if err := taskMeta.Create(); err != nil {
|
||||
// 当网络不连通时,生成唯一的id,防止边缘机房中不同任务的id相同;
|
||||
// 方法是,redis自增id去防止同一个机房的不同n9e edge生成的id相同;
|
||||
// 但没法防止不同边缘机房生成同样的id,所以,生成id的数据不会上报存入数据库,只用于闭环执行。
|
||||
taskMeta.Id, err = storage.IdGet()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
taskHost := imodels.TaskHost{
|
||||
Id: taskMeta.Id,
|
||||
Host: hosts[0],
|
||||
Status: "running",
|
||||
}
|
||||
if err = taskHost.Create(); err != nil {
|
||||
logger.Warningf("task_add_fail: authUser=%s title=%s err=%s", authUser, taskMeta.Title, err.Error())
|
||||
}
|
||||
|
||||
// 缓存任务元信息和待下发的任务
|
||||
err = taskMeta.Cache(hosts[0])
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
} else {
|
||||
// 如果是中心机房,还是保持之前的逻辑
|
||||
err = taskMeta.Save(hosts, f.Action)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
logger.Infof("task_add_succ: authUser=%s title=%s", authUser, taskMeta.Title)
|
||||
return taskMeta.Id, nil
|
||||
}
|
||||
|
||||
func cleanHosts(formHosts []string) []string {
|
||||
cnt := len(formHosts)
|
||||
arr := make([]string, 0, cnt)
|
||||
for i := 0; i < cnt; i++ {
|
||||
item := strings.TrimSpace(formHosts[i])
|
||||
if item == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
if strings.HasPrefix(item, "#") {
|
||||
continue
|
||||
}
|
||||
|
||||
arr = append(arr, item)
|
||||
}
|
||||
|
||||
return arr
|
||||
}
|
||||
64
alert/sender/lark.go
Normal file
64
alert/sender/lark.go
Normal file
@@ -0,0 +1,64 @@
|
||||
package sender
|
||||
|
||||
import (
|
||||
"html/template"
|
||||
"strings"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
)
|
||||
|
||||
var (
|
||||
_ CallBacker = (*LarkSender)(nil)
|
||||
)
|
||||
|
||||
type LarkSender struct {
|
||||
tpl *template.Template
|
||||
}
|
||||
|
||||
func (lk *LarkSender) CallBack(ctx CallBackContext) {
|
||||
if len(ctx.Events) == 0 || len(ctx.CallBackURL) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
body := feishu{
|
||||
Msgtype: "text",
|
||||
Content: feishuContent{
|
||||
Text: BuildTplMessage(models.Lark, lk.tpl, ctx.Events),
|
||||
},
|
||||
}
|
||||
|
||||
doSend(ctx.CallBackURL, body, models.Lark, ctx.Stats)
|
||||
ctx.Stats.AlertNotifyTotal.WithLabelValues("rule_callback").Inc()
|
||||
}
|
||||
|
||||
func (lk *LarkSender) Send(ctx MessageContext) {
|
||||
if len(ctx.Users) == 0 || len(ctx.Events) == 0 {
|
||||
return
|
||||
}
|
||||
urls := lk.extract(ctx.Users)
|
||||
message := BuildTplMessage(models.Lark, lk.tpl, ctx.Events)
|
||||
for _, url := range urls {
|
||||
body := feishu{
|
||||
Msgtype: "text",
|
||||
Content: feishuContent{
|
||||
Text: message,
|
||||
},
|
||||
}
|
||||
doSend(url, body, models.Lark, ctx.Stats)
|
||||
}
|
||||
}
|
||||
|
||||
func (lk *LarkSender) extract(users []*models.User) []string {
|
||||
urls := make([]string, 0, len(users))
|
||||
|
||||
for _, user := range users {
|
||||
if token, has := user.ExtractToken(models.Lark); has {
|
||||
url := token
|
||||
if !strings.HasPrefix(token, "https://") && !strings.HasPrefix(token, "http://") {
|
||||
url = "https://open.larksuite.com/open-apis/bot/v2/hook/" + token
|
||||
}
|
||||
urls = append(urls, url)
|
||||
}
|
||||
}
|
||||
return urls
|
||||
}
|
||||
98
alert/sender/larkcard.go
Normal file
98
alert/sender/larkcard.go
Normal file
@@ -0,0 +1,98 @@
|
||||
package sender
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"html/template"
|
||||
"net/url"
|
||||
"strings"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
)
|
||||
|
||||
type LarkCardSender struct {
|
||||
tpl *template.Template
|
||||
}
|
||||
|
||||
func (fs *LarkCardSender) CallBack(ctx CallBackContext) {
|
||||
if len(ctx.Events) == 0 || len(ctx.CallBackURL) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
ats := ExtractAtsParams(ctx.CallBackURL)
|
||||
message := BuildTplMessage(models.LarkCard, fs.tpl, ctx.Events)
|
||||
|
||||
if len(ats) > 0 {
|
||||
atTags := ""
|
||||
for _, at := range ats {
|
||||
if strings.Contains(at, "@") {
|
||||
atTags += fmt.Sprintf("<at email=\"%s\" ></at>", at)
|
||||
} else {
|
||||
atTags += fmt.Sprintf("<at id=\"%s\" ></at>", at)
|
||||
}
|
||||
}
|
||||
message = atTags + message
|
||||
}
|
||||
|
||||
color := "red"
|
||||
lowerUnicode := strings.ToLower(message)
|
||||
if strings.Count(lowerUnicode, Recovered) > 0 && strings.Count(lowerUnicode, Triggered) > 0 {
|
||||
color = "orange"
|
||||
} else if strings.Count(lowerUnicode, Recovered) > 0 {
|
||||
color = "green"
|
||||
}
|
||||
|
||||
SendTitle := fmt.Sprintf("🔔 %s", ctx.Events[0].RuleName)
|
||||
body.Card.Header.Title.Content = SendTitle
|
||||
body.Card.Header.Template = color
|
||||
body.Card.Elements[0].Text.Content = message
|
||||
body.Card.Elements[2].Elements[0].Content = SendTitle
|
||||
|
||||
// This is to be compatible with the Larkcard interface, if with query string parameters, the request will fail
|
||||
// Remove query parameters from the URL,
|
||||
parsedURL, err := url.Parse(ctx.CallBackURL)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
parsedURL.RawQuery = ""
|
||||
|
||||
doSend(parsedURL.String(), body, models.LarkCard, ctx.Stats)
|
||||
}
|
||||
|
||||
func (fs *LarkCardSender) Send(ctx MessageContext) {
|
||||
if len(ctx.Users) == 0 || len(ctx.Events) == 0 {
|
||||
return
|
||||
}
|
||||
urls, _ := fs.extract(ctx.Users)
|
||||
message := BuildTplMessage(models.LarkCard, fs.tpl, ctx.Events)
|
||||
color := "red"
|
||||
lowerUnicode := strings.ToLower(message)
|
||||
if strings.Count(lowerUnicode, Recovered) > 0 && strings.Count(lowerUnicode, Triggered) > 0 {
|
||||
color = "orange"
|
||||
} else if strings.Count(lowerUnicode, Recovered) > 0 {
|
||||
color = "green"
|
||||
}
|
||||
|
||||
SendTitle := fmt.Sprintf("🔔 %s", ctx.Events[0].RuleName)
|
||||
body.Card.Header.Title.Content = SendTitle
|
||||
body.Card.Header.Template = color
|
||||
body.Card.Elements[0].Text.Content = message
|
||||
body.Card.Elements[2].Elements[0].Content = SendTitle
|
||||
for _, url := range urls {
|
||||
doSend(url, body, models.LarkCard, ctx.Stats)
|
||||
}
|
||||
}
|
||||
|
||||
func (fs *LarkCardSender) extract(users []*models.User) ([]string, []string) {
|
||||
urls := make([]string, 0, len(users))
|
||||
ats := make([]string, 0)
|
||||
for i := range users {
|
||||
if token, has := users[i].ExtractToken(models.Lark); has {
|
||||
url := token
|
||||
if !strings.HasPrefix(token, "https://") && !strings.HasPrefix(token, "http://") {
|
||||
url = "https://open.larksuite.com/open-apis/bot/v2/hook/" + strings.TrimSpace(token)
|
||||
}
|
||||
urls = append(urls, url)
|
||||
}
|
||||
}
|
||||
return urls, ats
|
||||
}
|
||||
@@ -45,6 +45,21 @@ func (ms *MmSender) Send(ctx MessageContext) {
|
||||
})
|
||||
}
|
||||
|
||||
func (ms *MmSender) CallBack(ctx CallBackContext) {
|
||||
if len(ctx.Events) == 0 || len(ctx.CallBackURL) == 0 {
|
||||
return
|
||||
}
|
||||
message := BuildTplMessage(models.Mm, ms.tpl, ctx.Events)
|
||||
|
||||
SendMM(MatterMostMessage{
|
||||
Text: message,
|
||||
Tokens: []string{ctx.CallBackURL},
|
||||
Stats: ctx.Stats,
|
||||
})
|
||||
|
||||
ctx.Stats.AlertNotifyTotal.WithLabelValues("rule_callback").Inc()
|
||||
}
|
||||
|
||||
func (ms *MmSender) extract(users []*models.User) []string {
|
||||
tokens := make([]string, 0, len(users))
|
||||
for _, user := range users {
|
||||
|
||||
@@ -41,6 +41,10 @@ func NewSender(key string, tpls map[string]*template.Template, smtp ...aconf.SMT
|
||||
return &MmSender{tpl: tpls[models.Mm]}
|
||||
case models.Telegram:
|
||||
return &TelegramSender{tpl: tpls[models.Telegram]}
|
||||
case models.Lark:
|
||||
return &LarkSender{tpl: tpls[models.Lark]}
|
||||
case models.LarkCard:
|
||||
return &LarkCardSender{tpl: tpls[models.LarkCard]}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -21,10 +21,29 @@ type telegram struct {
|
||||
Text string `json:"text"`
|
||||
}
|
||||
|
||||
var (
|
||||
_ CallBacker = (*TelegramSender)(nil)
|
||||
)
|
||||
|
||||
type TelegramSender struct {
|
||||
tpl *template.Template
|
||||
}
|
||||
|
||||
func (ts *TelegramSender) CallBack(ctx CallBackContext) {
|
||||
if len(ctx.Events) == 0 || len(ctx.CallBackURL) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
message := BuildTplMessage(models.Telegram, ts.tpl, ctx.Events)
|
||||
SendTelegram(TelegramMessage{
|
||||
Text: message,
|
||||
Tokens: []string{ctx.CallBackURL},
|
||||
Stats: ctx.Stats,
|
||||
})
|
||||
|
||||
ctx.Stats.AlertNotifyTotal.WithLabelValues("rule_callback").Inc()
|
||||
}
|
||||
|
||||
func (ts *TelegramSender) Send(ctx MessageContext) {
|
||||
if len(ctx.Users) == 0 || len(ctx.Events) == 0 {
|
||||
return
|
||||
|
||||
@@ -2,9 +2,11 @@ package sender
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/tls"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"net/http"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/astats"
|
||||
@@ -13,59 +15,159 @@ import (
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
func SendWebhooks(webhooks []*models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
|
||||
for _, conf := range webhooks {
|
||||
if conf.Url == "" || !conf.Enable {
|
||||
continue
|
||||
}
|
||||
bs, err := json.Marshal(event)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
func sendWebhook(webhook *models.Webhook, event interface{}, stats *astats.Stats) bool {
|
||||
channel := "webhook"
|
||||
if webhook.Type == models.RuleCallback {
|
||||
channel = "callback"
|
||||
}
|
||||
|
||||
bf := bytes.NewBuffer(bs)
|
||||
conf := webhook
|
||||
if conf.Url == "" || !conf.Enable {
|
||||
return false
|
||||
}
|
||||
bs, err := json.Marshal(event)
|
||||
if err != nil {
|
||||
logger.Errorf("%s alertingWebhook failed to marshal event:%+v err:%v", channel, event, err)
|
||||
return false
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("POST", conf.Url, bf)
|
||||
if err != nil {
|
||||
logger.Warning("alertingWebhook failed to new request", err)
|
||||
continue
|
||||
}
|
||||
bf := bytes.NewBuffer(bs)
|
||||
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
if conf.BasicAuthUser != "" && conf.BasicAuthPass != "" {
|
||||
req.SetBasicAuth(conf.BasicAuthUser, conf.BasicAuthPass)
|
||||
}
|
||||
req, err := http.NewRequest("POST", conf.Url, bf)
|
||||
if err != nil {
|
||||
logger.Warningf("%s alertingWebhook failed to new reques event:%s err:%v", channel, string(bs), err)
|
||||
return true
|
||||
}
|
||||
|
||||
if len(conf.Headers) > 0 && len(conf.Headers)%2 == 0 {
|
||||
for i := 0; i < len(conf.Headers); i += 2 {
|
||||
if conf.Headers[i] == "host" {
|
||||
req.Host = conf.Headers[i+1]
|
||||
continue
|
||||
}
|
||||
req.Header.Set(conf.Headers[i], conf.Headers[i+1])
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
if conf.BasicAuthUser != "" && conf.BasicAuthPass != "" {
|
||||
req.SetBasicAuth(conf.BasicAuthUser, conf.BasicAuthPass)
|
||||
}
|
||||
|
||||
if len(conf.Headers) > 0 && len(conf.Headers)%2 == 0 {
|
||||
for i := 0; i < len(conf.Headers); i += 2 {
|
||||
if conf.Headers[i] == "host" || conf.Headers[i] == "Host" {
|
||||
req.Host = conf.Headers[i+1]
|
||||
continue
|
||||
}
|
||||
req.Header.Set(conf.Headers[i], conf.Headers[i+1])
|
||||
}
|
||||
}
|
||||
insecureSkipVerify := false
|
||||
if webhook != nil {
|
||||
insecureSkipVerify = webhook.SkipVerify
|
||||
}
|
||||
client := http.Client{
|
||||
Timeout: time.Duration(conf.Timeout) * time.Second,
|
||||
Transport: &http.Transport{
|
||||
TLSClientConfig: &tls.Config{InsecureSkipVerify: insecureSkipVerify},
|
||||
},
|
||||
}
|
||||
|
||||
// todo add skip verify
|
||||
client := http.Client{
|
||||
Timeout: time.Duration(conf.Timeout) * time.Second,
|
||||
stats.AlertNotifyTotal.WithLabelValues(channel).Inc()
|
||||
var resp *http.Response
|
||||
resp, err = client.Do(req)
|
||||
if err != nil {
|
||||
stats.AlertNotifyErrorTotal.WithLabelValues(channel).Inc()
|
||||
logger.Errorf("event_%s_fail, event:%s, url: [%s], error: [%s]", channel, string(bs), conf.Url, err)
|
||||
return true
|
||||
}
|
||||
|
||||
var body []byte
|
||||
if resp.Body != nil {
|
||||
defer resp.Body.Close()
|
||||
body, _ = io.ReadAll(resp.Body)
|
||||
}
|
||||
|
||||
if resp.StatusCode == 429 {
|
||||
logger.Errorf("event_%s_fail, url: %s, response code: %d, body: %s event:%s", channel, conf.Url, resp.StatusCode, string(body), string(bs))
|
||||
return true
|
||||
}
|
||||
|
||||
logger.Debugf("event_%s_succ, url: %s, response code: %d, body: %s event:%s", channel, conf.Url, resp.StatusCode, string(body), string(bs))
|
||||
return false
|
||||
}
|
||||
|
||||
func SingleSendWebhooks(webhooks []*models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
|
||||
for _, conf := range webhooks {
|
||||
retryCount := 0
|
||||
for retryCount < 3 {
|
||||
needRetry := sendWebhook(conf, event, stats)
|
||||
if !needRetry {
|
||||
break
|
||||
}
|
||||
retryCount++
|
||||
time.Sleep(time.Minute * 1 * time.Duration(retryCount))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BatchSendWebhooks(webhooks []*models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
|
||||
for _, conf := range webhooks {
|
||||
logger.Infof("push event:%+v to queue:%v", event, conf)
|
||||
PushEvent(conf, event, stats)
|
||||
}
|
||||
}
|
||||
|
||||
var EventQueue = make(map[string]*WebhookQueue)
|
||||
var CallbackEventQueue = make(map[string]*WebhookQueue)
|
||||
var CallbackEventQueueLock sync.RWMutex
|
||||
var EventQueueLock sync.RWMutex
|
||||
|
||||
const QueueMaxSize = 100000
|
||||
|
||||
type WebhookQueue struct {
|
||||
list *SafeListLimited
|
||||
closeCh chan struct{}
|
||||
}
|
||||
|
||||
func PushEvent(webhook *models.Webhook, event *models.AlertCurEvent, stats *astats.Stats) {
|
||||
EventQueueLock.RLock()
|
||||
queue := EventQueue[webhook.Url]
|
||||
EventQueueLock.RUnlock()
|
||||
|
||||
if queue == nil {
|
||||
queue = &WebhookQueue{
|
||||
list: NewSafeListLimited(QueueMaxSize),
|
||||
closeCh: make(chan struct{}),
|
||||
}
|
||||
|
||||
EventQueueLock.Lock()
|
||||
EventQueue[webhook.Url] = queue
|
||||
EventQueueLock.Unlock()
|
||||
|
||||
StartConsumer(queue, webhook.Batch, webhook, stats)
|
||||
}
|
||||
|
||||
succ := queue.list.PushFront(event)
|
||||
if !succ {
|
||||
stats.AlertNotifyErrorTotal.WithLabelValues("push_event_queue").Inc()
|
||||
logger.Warningf("Write channel(%s) full, current channel size: %d event:%v", webhook.Url, queue.list.Len(), event)
|
||||
}
|
||||
}
|
||||
|
||||
func StartConsumer(queue *WebhookQueue, popSize int, webhook *models.Webhook, stats *astats.Stats) {
|
||||
for {
|
||||
select {
|
||||
case <-queue.closeCh:
|
||||
logger.Infof("event queue:%v closed", queue)
|
||||
return
|
||||
default:
|
||||
events := queue.list.PopBack(popSize)
|
||||
if len(events) == 0 {
|
||||
time.Sleep(time.Millisecond * 400)
|
||||
continue
|
||||
}
|
||||
|
||||
retryCount := 0
|
||||
for retryCount < webhook.RetryCount {
|
||||
needRetry := sendWebhook(webhook, events, stats)
|
||||
if !needRetry {
|
||||
break
|
||||
}
|
||||
retryCount++
|
||||
time.Sleep(time.Second * time.Duration(webhook.RetryInterval) * time.Duration(retryCount))
|
||||
}
|
||||
}
|
||||
|
||||
stats.AlertNotifyTotal.WithLabelValues("webhook").Inc()
|
||||
var resp *http.Response
|
||||
resp, err = client.Do(req)
|
||||
if err != nil {
|
||||
stats.AlertNotifyErrorTotal.WithLabelValues("webhook").Inc()
|
||||
logger.Errorf("event_webhook_fail, ruleId: [%d], eventId: [%d], url: [%s], error: [%s]", event.RuleId, event.Id, conf.Url, err)
|
||||
continue
|
||||
}
|
||||
|
||||
var body []byte
|
||||
if resp.Body != nil {
|
||||
defer resp.Body.Close()
|
||||
body, _ = io.ReadAll(resp.Body)
|
||||
}
|
||||
|
||||
logger.Debugf("event_webhook_succ, url: %s, response code: %d, body: %s event:%+v", conf.Url, resp.StatusCode, string(body), event)
|
||||
}
|
||||
}
|
||||
|
||||
111
alert/sender/webhook_queue.go
Normal file
111
alert/sender/webhook_queue.go
Normal file
@@ -0,0 +1,111 @@
|
||||
package sender
|
||||
|
||||
import (
|
||||
"container/list"
|
||||
"sync"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
)
|
||||
|
||||
type SafeList struct {
|
||||
sync.RWMutex
|
||||
L *list.List
|
||||
}
|
||||
|
||||
func NewSafeList() *SafeList {
|
||||
return &SafeList{L: list.New()}
|
||||
}
|
||||
|
||||
func (sl *SafeList) PushFront(v interface{}) *list.Element {
|
||||
sl.Lock()
|
||||
e := sl.L.PushFront(v)
|
||||
sl.Unlock()
|
||||
return e
|
||||
}
|
||||
|
||||
func (sl *SafeList) PushFrontBatch(vs []interface{}) {
|
||||
sl.Lock()
|
||||
for _, item := range vs {
|
||||
sl.L.PushFront(item)
|
||||
}
|
||||
sl.Unlock()
|
||||
}
|
||||
|
||||
func (sl *SafeList) PopBack(max int) []*models.AlertCurEvent {
|
||||
sl.Lock()
|
||||
|
||||
count := sl.L.Len()
|
||||
if count == 0 {
|
||||
sl.Unlock()
|
||||
return []*models.AlertCurEvent{}
|
||||
}
|
||||
|
||||
if count > max {
|
||||
count = max
|
||||
}
|
||||
|
||||
items := make([]*models.AlertCurEvent, 0, count)
|
||||
for i := 0; i < count; i++ {
|
||||
item := sl.L.Remove(sl.L.Back())
|
||||
sample, ok := item.(*models.AlertCurEvent)
|
||||
if ok {
|
||||
items = append(items, sample)
|
||||
}
|
||||
}
|
||||
|
||||
sl.Unlock()
|
||||
return items
|
||||
}
|
||||
|
||||
func (sl *SafeList) RemoveAll() {
|
||||
sl.Lock()
|
||||
sl.L.Init()
|
||||
sl.Unlock()
|
||||
}
|
||||
|
||||
func (sl *SafeList) Len() int {
|
||||
sl.RLock()
|
||||
size := sl.L.Len()
|
||||
sl.RUnlock()
|
||||
return size
|
||||
}
|
||||
|
||||
// SafeList with Limited Size
|
||||
type SafeListLimited struct {
|
||||
maxSize int
|
||||
SL *SafeList
|
||||
}
|
||||
|
||||
func NewSafeListLimited(maxSize int) *SafeListLimited {
|
||||
return &SafeListLimited{SL: NewSafeList(), maxSize: maxSize}
|
||||
}
|
||||
|
||||
func (sll *SafeListLimited) PopBack(max int) []*models.AlertCurEvent {
|
||||
return sll.SL.PopBack(max)
|
||||
}
|
||||
|
||||
func (sll *SafeListLimited) PushFront(v interface{}) bool {
|
||||
if sll.SL.Len() >= sll.maxSize {
|
||||
return false
|
||||
}
|
||||
|
||||
sll.SL.PushFront(v)
|
||||
return true
|
||||
}
|
||||
|
||||
func (sll *SafeListLimited) PushFrontBatch(vs []interface{}) bool {
|
||||
if sll.SL.Len() >= sll.maxSize {
|
||||
return false
|
||||
}
|
||||
|
||||
sll.SL.PushFrontBatch(vs)
|
||||
return true
|
||||
}
|
||||
|
||||
func (sll *SafeListLimited) RemoveAll() {
|
||||
sll.SL.RemoveAll()
|
||||
}
|
||||
|
||||
func (sll *SafeListLimited) Len() int {
|
||||
return sll.SL.Len()
|
||||
}
|
||||
@@ -16,10 +16,31 @@ type wecom struct {
|
||||
Markdown wecomMarkdown `json:"markdown"`
|
||||
}
|
||||
|
||||
var (
|
||||
_ CallBacker = (*WecomSender)(nil)
|
||||
)
|
||||
|
||||
type WecomSender struct {
|
||||
tpl *template.Template
|
||||
}
|
||||
|
||||
func (ws *WecomSender) CallBack(ctx CallBackContext) {
|
||||
if len(ctx.Events) == 0 || len(ctx.CallBackURL) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
message := BuildTplMessage(models.Wecom, ws.tpl, ctx.Events)
|
||||
body := wecom{
|
||||
Msgtype: "markdown",
|
||||
Markdown: wecomMarkdown{
|
||||
Content: message,
|
||||
},
|
||||
}
|
||||
|
||||
doSend(ctx.CallBackURL, body, models.Wecom, ctx.Stats)
|
||||
ctx.Stats.AlertNotifyTotal.WithLabelValues("rule_callback").Inc()
|
||||
}
|
||||
|
||||
func (ws *WecomSender) Send(ctx MessageContext) {
|
||||
if len(ctx.Users) == 0 || len(ctx.Events) == 0 {
|
||||
return
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
package cconf
|
||||
|
||||
import "time"
|
||||
|
||||
type Center struct {
|
||||
Plugins []Plugin
|
||||
MetricsYamlFile string
|
||||
@@ -9,6 +11,8 @@ type Center struct {
|
||||
MetricDesc MetricDescType
|
||||
AnonymousAccess AnonymousAccess
|
||||
UseFileAssets bool
|
||||
FlashDuty FlashDuty
|
||||
EventHistoryGroupView bool
|
||||
}
|
||||
|
||||
type Plugin struct {
|
||||
@@ -18,6 +22,12 @@ type Plugin struct {
|
||||
TypeName string `json:"plugin_type_name"`
|
||||
}
|
||||
|
||||
type FlashDuty struct {
|
||||
Api string
|
||||
Headers map[string]string
|
||||
Timeout time.Duration
|
||||
}
|
||||
|
||||
type AnonymousAccess struct {
|
||||
PromQuerier bool
|
||||
AlertDetail bool
|
||||
|
||||
@@ -18,20 +18,28 @@ var MetricDesc MetricDescType
|
||||
// GetMetricDesc , if metric is not registered, empty string will be returned
|
||||
func GetMetricDesc(lang, metric string) string {
|
||||
var m map[string]string
|
||||
if lang == "zh" {
|
||||
m = MetricDesc.Zh
|
||||
} else {
|
||||
|
||||
switch lang {
|
||||
case "en":
|
||||
m = MetricDesc.En
|
||||
default:
|
||||
m = MetricDesc.Zh
|
||||
}
|
||||
|
||||
if m != nil {
|
||||
if desc, has := m[metric]; has {
|
||||
if desc, ok := m[metric]; ok {
|
||||
return desc
|
||||
}
|
||||
}
|
||||
|
||||
return MetricDesc.CommonDesc[metric]
|
||||
}
|
||||
if MetricDesc.CommonDesc != nil {
|
||||
if desc, ok := MetricDesc.CommonDesc[metric]; ok {
|
||||
return desc
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
func LoadMetricsYaml(configDir, metricsYamlFile string) error {
|
||||
fp := metricsYamlFile
|
||||
if fp == "" {
|
||||
|
||||
@@ -28,6 +28,14 @@ func LoadOpsYaml(configDir string, opsYamlFile string) error {
|
||||
if !file.IsExist(fp) {
|
||||
return nil
|
||||
}
|
||||
|
||||
hash, _ := file.MD5(fp)
|
||||
if hash == "2f91a9ed265cf2024e266dc1d538ee77" {
|
||||
// ops.yaml 是老的默认文件,删除
|
||||
file.Remove(fp)
|
||||
return nil
|
||||
}
|
||||
|
||||
return file.ReadYaml(fp, &Operations)
|
||||
}
|
||||
|
||||
@@ -68,7 +76,9 @@ ops:
|
||||
- "/dashboards/add"
|
||||
- "/dashboards/put"
|
||||
- "/dashboards/del"
|
||||
- "/dashboards-built-in"
|
||||
- "/embedded-dashboards/put"
|
||||
- "/embedded-dashboards"
|
||||
- "/public-dashboards"
|
||||
|
||||
- name: alert
|
||||
cname: 告警规则
|
||||
@@ -77,7 +87,7 @@ ops:
|
||||
- "/alert-rules/add"
|
||||
- "/alert-rules/put"
|
||||
- "/alert-rules/del"
|
||||
- "/alert-rules-built-in"
|
||||
|
||||
- name: alert-mutes
|
||||
cname: 告警静默管理
|
||||
ops:
|
||||
@@ -164,6 +174,22 @@ ops:
|
||||
- "/busi-groups/put"
|
||||
- "/busi-groups/del"
|
||||
|
||||
- name: builtin-metrics
|
||||
cname: 指标视图
|
||||
ops:
|
||||
- "/metrics-built-in"
|
||||
- "/builtin-metrics/add"
|
||||
- "/builtin-metrics/put"
|
||||
- "/builtin-metrics/del"
|
||||
|
||||
- name: built-in-components
|
||||
cname: 模版中心
|
||||
ops:
|
||||
- "/built-in-components"
|
||||
- "/built-in-components/add"
|
||||
- "/built-in-components/put"
|
||||
- "/built-in-components/del"
|
||||
|
||||
- name: system
|
||||
cname: 系统信息
|
||||
ops:
|
||||
|
||||
@@ -7,10 +7,13 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/alert"
|
||||
"github.com/ccfos/nightingale/v6/alert/astats"
|
||||
"github.com/ccfos/nightingale/v6/alert/process"
|
||||
alertrt "github.com/ccfos/nightingale/v6/alert/router"
|
||||
"github.com/ccfos/nightingale/v6/center/cconf"
|
||||
"github.com/ccfos/nightingale/v6/center/cconf/rsa"
|
||||
"github.com/ccfos/nightingale/v6/center/cstats"
|
||||
"github.com/ccfos/nightingale/v6/center/integration"
|
||||
"github.com/ccfos/nightingale/v6/center/metas"
|
||||
centerrt "github.com/ccfos/nightingale/v6/center/router"
|
||||
"github.com/ccfos/nightingale/v6/center/sso"
|
||||
"github.com/ccfos/nightingale/v6/conf"
|
||||
"github.com/ccfos/nightingale/v6/dumper"
|
||||
@@ -18,19 +21,19 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/models/migrate"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/flashduty"
|
||||
"github.com/ccfos/nightingale/v6/pkg/httpx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/i18nx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/logx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/version"
|
||||
"github.com/ccfos/nightingale/v6/prom"
|
||||
"github.com/ccfos/nightingale/v6/pushgw/idents"
|
||||
pushgwrt "github.com/ccfos/nightingale/v6/pushgw/router"
|
||||
"github.com/ccfos/nightingale/v6/pushgw/writer"
|
||||
"github.com/ccfos/nightingale/v6/storage"
|
||||
"github.com/ccfos/nightingale/v6/tdengine"
|
||||
|
||||
alertrt "github.com/ccfos/nightingale/v6/alert/router"
|
||||
centerrt "github.com/ccfos/nightingale/v6/center/router"
|
||||
pushgwrt "github.com/ccfos/nightingale/v6/pushgw/router"
|
||||
"github.com/flashcatcloud/ibex/src/cmd/ibex"
|
||||
)
|
||||
|
||||
func Initialize(configDir string, cryptoKey string) (func(), error) {
|
||||
@@ -51,6 +54,7 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
|
||||
|
||||
i18nx.Init(configDir)
|
||||
cstats.Init()
|
||||
flashduty.Init(config.Center.FlashDuty)
|
||||
|
||||
db, err := storage.New(config.DB)
|
||||
if err != nil {
|
||||
@@ -60,11 +64,14 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
|
||||
migrate.Migrate(db)
|
||||
models.InitRoot(ctx)
|
||||
|
||||
config.HTTP.JWTAuth.SigningKey = models.InitJWTSigningKey(ctx)
|
||||
|
||||
err = rsa.InitRSAConfig(ctx, &config.HTTP.RSA)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
integration.Init(ctx, config.Center.BuiltinIntegrationsDir)
|
||||
var redis storage.Redis
|
||||
redis, err = storage.NewRedis(config.Redis)
|
||||
if err != nil {
|
||||
@@ -72,13 +79,11 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
|
||||
}
|
||||
|
||||
metas := metas.New(redis)
|
||||
idents := idents.New(ctx)
|
||||
idents := idents.New(ctx, redis)
|
||||
|
||||
syncStats := memsto.NewSyncStats()
|
||||
alertStats := astats.NewSyncStats()
|
||||
|
||||
sso := sso.Init(config.Center, ctx)
|
||||
|
||||
configCache := memsto.NewConfigCache(ctx, syncStats, config.HTTP.RSA.RSAPrivateKey, config.HTTP.RSA.RSAPassWord)
|
||||
busiGroupCache := memsto.NewBusiGroupCache(ctx, syncStats)
|
||||
targetCache := memsto.NewTargetCache(ctx, syncStats, redis)
|
||||
@@ -88,21 +93,23 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
|
||||
notifyConfigCache := memsto.NewNotifyConfigCache(ctx, configCache)
|
||||
userCache := memsto.NewUserCache(ctx, syncStats)
|
||||
userGroupCache := memsto.NewUserGroupCache(ctx, syncStats)
|
||||
taskTplCache := memsto.NewTaskTplCache(ctx)
|
||||
|
||||
sso := sso.Init(config.Center, ctx, configCache)
|
||||
promClients := prom.NewPromClient(ctx)
|
||||
tdengineClients := tdengine.NewTdengineClient(ctx, config.Alert.Heartbeat)
|
||||
|
||||
externalProcessors := process.NewExternalProcessors()
|
||||
alert.Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache, alertRuleCache, notifyConfigCache, dsCache, ctx, promClients, tdengineClients, userCache, userGroupCache)
|
||||
alert.Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache, alertRuleCache, notifyConfigCache, taskTplCache, dsCache, ctx, promClients, tdengineClients, userCache, userGroupCache)
|
||||
|
||||
writers := writer.NewWriters(config.Pushgw)
|
||||
|
||||
go version.GetGithubVersion()
|
||||
|
||||
alertrtRouter := alertrt.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors)
|
||||
centerRouter := centerrt.New(config.HTTP, config.Center, cconf.Operations, dsCache, notifyConfigCache, promClients, tdengineClients,
|
||||
centerRouter := centerrt.New(config.HTTP, config.Center, config.Alert, config.Ibex, cconf.Operations, dsCache, notifyConfigCache, promClients, tdengineClients,
|
||||
redis, sso, ctx, metas, idents, targetCache, userCache, userGroupCache)
|
||||
pushgwRouter := pushgwrt.New(config.HTTP, config.Pushgw, targetCache, busiGroupCache, idents, writers, ctx)
|
||||
pushgwRouter := pushgwrt.New(config.HTTP, config.Pushgw, config.Alert, targetCache, busiGroupCache, idents, metas, writers, ctx)
|
||||
|
||||
r := httpx.GinEngine(config.Global.RunMode, config.HTTP)
|
||||
|
||||
@@ -111,6 +118,11 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
|
||||
pushgwRouter.Config(r)
|
||||
dumper.ConfigRouter(r)
|
||||
|
||||
if config.Ibex.Enable {
|
||||
migrate.MigrateIbexTables(db)
|
||||
ibex.ServerStart(true, db, redis, config.HTTP.APIForService.BasicAuth, config.Alert.Heartbeat, &config.CenterApi, r, centerRouter, config.Ibex, config.HTTP.Port)
|
||||
}
|
||||
|
||||
httpClean := httpx.Init(config.HTTP, r)
|
||||
|
||||
return func() {
|
||||
|
||||
365
center/integration/init.go
Normal file
365
center/integration/init.go
Normal file
@@ -0,0 +1,365 @@
|
||||
package integration
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"path"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/toolkits/pkg/file"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
"github.com/toolkits/pkg/runner"
|
||||
)
|
||||
|
||||
const SYSTEM = "system"
|
||||
|
||||
func Init(ctx *ctx.Context, builtinIntegrationsDir string) {
|
||||
fp := builtinIntegrationsDir
|
||||
if fp == "" {
|
||||
fp = path.Join(runner.Cwd, "integrations")
|
||||
}
|
||||
|
||||
// var fileList []string
|
||||
dirList, err := file.DirsUnder(fp)
|
||||
if err != nil {
|
||||
logger.Warning("read builtin component dir fail ", err)
|
||||
return
|
||||
}
|
||||
|
||||
for _, dir := range dirList {
|
||||
// components icon
|
||||
componentDir := fp + "/" + dir
|
||||
component := models.BuiltinComponent{
|
||||
Ident: dir,
|
||||
}
|
||||
|
||||
// get logo name
|
||||
// /api/n9e/integrations/icon/AliYun/aliyun.png
|
||||
files, err := file.FilesUnder(componentDir + "/icon")
|
||||
if err == nil && len(files) > 0 {
|
||||
component.Logo = "/api/n9e/integrations/icon/" + component.Ident + "/" + files[0]
|
||||
} else if err != nil {
|
||||
logger.Warningf("read builtin component icon dir fail %s %v", component.Ident, err)
|
||||
}
|
||||
|
||||
// get description
|
||||
files, err = file.FilesUnder(componentDir + "/markdown")
|
||||
if err == nil && len(files) > 0 {
|
||||
var readmeFile string
|
||||
for _, file := range files {
|
||||
if strings.HasSuffix(strings.ToLower(file), "md") {
|
||||
readmeFile = componentDir + "/markdown/" + file
|
||||
break
|
||||
}
|
||||
}
|
||||
if readmeFile != "" {
|
||||
component.Readme, _ = file.ReadString(readmeFile)
|
||||
}
|
||||
} else if err != nil {
|
||||
logger.Warningf("read builtin component markdown dir fail %s %v", component.Ident, err)
|
||||
}
|
||||
|
||||
exists, _ := models.BuiltinComponentExists(ctx, &component)
|
||||
if !exists {
|
||||
err = component.Add(ctx, SYSTEM)
|
||||
if err != nil {
|
||||
logger.Warning("add builtin component fail ", component, err)
|
||||
continue
|
||||
}
|
||||
} else {
|
||||
old, err := models.BuiltinComponentGet(ctx, "ident = ?", component.Ident)
|
||||
if err != nil {
|
||||
logger.Warning("get builtin component fail ", component, err)
|
||||
continue
|
||||
}
|
||||
|
||||
if old == nil {
|
||||
logger.Warning("get builtin component nil ", component)
|
||||
continue
|
||||
}
|
||||
|
||||
if old.UpdatedBy == SYSTEM {
|
||||
now := time.Now().Unix()
|
||||
old.CreatedAt = now
|
||||
old.UpdatedAt = now
|
||||
old.Readme = component.Readme
|
||||
old.UpdatedBy = SYSTEM
|
||||
|
||||
err = models.DB(ctx).Model(old).Select("*").Updates(old).Error
|
||||
if err != nil {
|
||||
logger.Warning("update builtin component fail ", old, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// delete uuid is emtpy
|
||||
err = models.DB(ctx).Exec("delete from builtin_payloads where uuid = 0 and type != 'collect' and (updated_by = 'system' or updated_by = '')").Error
|
||||
if err != nil {
|
||||
logger.Warning("delete builtin payloads fail ", err)
|
||||
}
|
||||
|
||||
// delete builtin metrics uuid is emtpy
|
||||
err = models.DB(ctx).Exec("delete from builtin_metrics where uuid = 0 and (updated_by = 'system' or updated_by = '')").Error
|
||||
if err != nil {
|
||||
logger.Warning("delete builtin metrics fail ", err)
|
||||
}
|
||||
|
||||
// alerts
|
||||
files, err = file.FilesUnder(componentDir + "/alerts")
|
||||
if err == nil && len(files) > 0 {
|
||||
for _, f := range files {
|
||||
fp := componentDir + "/alerts/" + f
|
||||
bs, err := file.ReadBytes(fp)
|
||||
if err != nil {
|
||||
logger.Warning("read builtin component alerts file fail ", f, err)
|
||||
continue
|
||||
}
|
||||
|
||||
alerts := []models.AlertRule{}
|
||||
err = json.Unmarshal(bs, &alerts)
|
||||
if err != nil {
|
||||
logger.Warning("parse builtin component alerts file fail ", f, err)
|
||||
continue
|
||||
}
|
||||
|
||||
newAlerts := []models.AlertRule{}
|
||||
writeAlertFileFlag := false
|
||||
for _, alert := range alerts {
|
||||
if alert.UUID == 0 {
|
||||
writeAlertFileFlag = true
|
||||
alert.UUID = time.Now().UnixNano()
|
||||
}
|
||||
|
||||
newAlerts = append(newAlerts, alert)
|
||||
content, err := json.Marshal(alert)
|
||||
if err != nil {
|
||||
logger.Warning("marshal builtin alert fail ", alert, err)
|
||||
continue
|
||||
}
|
||||
|
||||
cate := strings.Replace(f, ".json", "", -1)
|
||||
builtinAlert := models.BuiltinPayload{
|
||||
Component: component.Ident,
|
||||
Type: "alert",
|
||||
Cate: cate,
|
||||
Name: alert.Name,
|
||||
Tags: alert.AppendTags,
|
||||
Content: string(content),
|
||||
UUID: alert.UUID,
|
||||
}
|
||||
|
||||
old, err := models.BuiltinPayloadGet(ctx, "uuid = ?", alert.UUID)
|
||||
if err != nil {
|
||||
logger.Warning("get builtin alert fail ", builtinAlert, err)
|
||||
continue
|
||||
}
|
||||
|
||||
if old == nil {
|
||||
err := builtinAlert.Add(ctx, SYSTEM)
|
||||
if err != nil {
|
||||
logger.Warning("add builtin alert fail ", builtinAlert, err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if old.UpdatedBy == SYSTEM {
|
||||
old.Content = string(content)
|
||||
old.Name = alert.Name
|
||||
old.Tags = alert.AppendTags
|
||||
err = models.DB(ctx).Model(old).Select("*").Updates(old).Error
|
||||
if err != nil {
|
||||
logger.Warningf("update builtin alert:%+v fail %v", builtinAlert, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if writeAlertFileFlag {
|
||||
bs, err = json.MarshalIndent(newAlerts, "", " ")
|
||||
if err != nil {
|
||||
logger.Warning("marshal builtin alerts fail ", newAlerts, err)
|
||||
continue
|
||||
}
|
||||
|
||||
_, err = file.WriteBytes(fp, bs)
|
||||
if err != nil {
|
||||
logger.Warning("write builtin alerts file fail ", f, err)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// dashboards
|
||||
files, err = file.FilesUnder(componentDir + "/dashboards")
|
||||
if err == nil && len(files) > 0 {
|
||||
for _, f := range files {
|
||||
fp := componentDir + "/dashboards/" + f
|
||||
bs, err := file.ReadBytes(fp)
|
||||
if err != nil {
|
||||
logger.Warning("read builtin component dashboards file fail ", f, err)
|
||||
continue
|
||||
}
|
||||
|
||||
dashboard := BuiltinBoard{}
|
||||
err = json.Unmarshal(bs, &dashboard)
|
||||
if err != nil {
|
||||
logger.Warning("parse builtin component dashboards file fail ", f, err)
|
||||
continue
|
||||
}
|
||||
|
||||
if dashboard.UUID == 0 {
|
||||
dashboard.UUID = time.Now().UnixNano()
|
||||
// 补全文件中的 uuid
|
||||
bs, err = json.MarshalIndent(dashboard, "", " ")
|
||||
if err != nil {
|
||||
logger.Warning("marshal builtin dashboard fail ", dashboard, err)
|
||||
continue
|
||||
}
|
||||
|
||||
_, err = file.WriteBytes(fp, bs)
|
||||
if err != nil {
|
||||
logger.Warning("write builtin dashboard file fail ", f, err)
|
||||
}
|
||||
}
|
||||
|
||||
content, err := json.Marshal(dashboard)
|
||||
if err != nil {
|
||||
logger.Warning("marshal builtin dashboard fail ", dashboard, err)
|
||||
continue
|
||||
}
|
||||
|
||||
builtinDashboard := models.BuiltinPayload{
|
||||
Component: component.Ident,
|
||||
Type: "dashboard",
|
||||
Cate: "",
|
||||
Name: dashboard.Name,
|
||||
Tags: dashboard.Tags,
|
||||
Content: string(content),
|
||||
UUID: dashboard.UUID,
|
||||
}
|
||||
|
||||
old, err := models.BuiltinPayloadGet(ctx, "uuid = ?", dashboard.UUID)
|
||||
if err != nil {
|
||||
logger.Warning("get builtin alert fail ", builtinDashboard, err)
|
||||
continue
|
||||
}
|
||||
|
||||
if old == nil {
|
||||
err := builtinDashboard.Add(ctx, SYSTEM)
|
||||
if err != nil {
|
||||
logger.Warning("add builtin alert fail ", builtinDashboard, err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if old.UpdatedBy == SYSTEM {
|
||||
old.Content = string(content)
|
||||
old.Name = dashboard.Name
|
||||
old.Tags = dashboard.Tags
|
||||
err = models.DB(ctx).Model(old).Select("*").Updates(old).Error
|
||||
if err != nil {
|
||||
logger.Warningf("update builtin alert:%+v fail %v", builtinDashboard, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if err != nil {
|
||||
logger.Warningf("read builtin component dash dir fail %s %v", component.Ident, err)
|
||||
}
|
||||
|
||||
// metrics
|
||||
files, err = file.FilesUnder(componentDir + "/metrics")
|
||||
if err == nil && len(files) > 0 {
|
||||
for _, f := range files {
|
||||
fp := componentDir + "/metrics/" + f
|
||||
bs, err := file.ReadBytes(fp)
|
||||
if err != nil {
|
||||
logger.Warning("read builtin component metrics file fail", f, err)
|
||||
continue
|
||||
}
|
||||
|
||||
metrics := []models.BuiltinMetric{}
|
||||
newMetrics := []models.BuiltinMetric{}
|
||||
err = json.Unmarshal(bs, &metrics)
|
||||
if err != nil {
|
||||
logger.Warning("parse builtin component metrics file fail", f, err)
|
||||
continue
|
||||
}
|
||||
|
||||
writeMetricFileFlag := false
|
||||
for _, metric := range metrics {
|
||||
if metric.UUID == 0 {
|
||||
writeMetricFileFlag = true
|
||||
metric.UUID = time.Now().UnixNano()
|
||||
}
|
||||
newMetrics = append(newMetrics, metric)
|
||||
|
||||
old, err := models.BuiltinMetricGet(ctx, "uuid = ?", metric.UUID)
|
||||
if err != nil {
|
||||
logger.Warning("get builtin metrics fail ", metric, err)
|
||||
continue
|
||||
}
|
||||
|
||||
if old == nil {
|
||||
err := metric.Add(ctx, SYSTEM)
|
||||
if err != nil {
|
||||
logger.Warning("add builtin metrics fail ", metric, err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if old.UpdatedBy == SYSTEM {
|
||||
old.Collector = metric.Collector
|
||||
old.Typ = metric.Typ
|
||||
old.Name = metric.Name
|
||||
old.Unit = metric.Unit
|
||||
old.Note = metric.Note
|
||||
old.Lang = metric.Lang
|
||||
old.Expression = metric.Expression
|
||||
|
||||
err = models.DB(ctx).Model(old).Select("*").Updates(old).Error
|
||||
if err != nil {
|
||||
logger.Warningf("update builtin metric:%+v fail %v", metric, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if writeMetricFileFlag {
|
||||
bs, err = json.MarshalIndent(newMetrics, "", " ")
|
||||
if err != nil {
|
||||
logger.Warning("marshal builtin metrics fail ", newMetrics, err)
|
||||
continue
|
||||
}
|
||||
|
||||
_, err = file.WriteBytes(fp, bs)
|
||||
if err != nil {
|
||||
logger.Warning("write builtin metrics file fail ", f, err)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
} else if err != nil {
|
||||
logger.Warningf("read builtin component metrics dir fail %s %v", component.Ident, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type BuiltinBoard struct {
|
||||
Id int64 `json:"id" gorm:"primaryKey"`
|
||||
GroupId int64 `json:"group_id"`
|
||||
Name string `json:"name"`
|
||||
Ident string `json:"ident"`
|
||||
Tags string `json:"tags"`
|
||||
CreateAt int64 `json:"create_at"`
|
||||
CreateBy string `json:"create_by"`
|
||||
UpdateAt int64 `json:"update_at"`
|
||||
UpdateBy string `json:"update_by"`
|
||||
Configs interface{} `json:"configs" gorm:"-"`
|
||||
Public int `json:"public"` // 0: false, 1: true
|
||||
PublicCate int `json:"public_cate"` // 0: anonymous, 1: login, 2: busi
|
||||
Bgids []int64 `json:"bgids" gorm:"-"`
|
||||
BuiltIn int `json:"built_in"` // 0: false, 1: true
|
||||
Hide int `json:"hide"` // 0: false, 1: true
|
||||
UUID int64 `json:"uuid"`
|
||||
}
|
||||
@@ -91,6 +91,11 @@ func (s *Set) updateMeta(items map[string]models.HostMeta) {
|
||||
}
|
||||
|
||||
func (s *Set) updateTargets(m map[string]models.HostMeta) error {
|
||||
if s.redis == nil {
|
||||
logger.Warningf("redis is nil")
|
||||
return nil
|
||||
}
|
||||
|
||||
count := int64(len(m))
|
||||
if count == 0 {
|
||||
return nil
|
||||
|
||||
@@ -8,10 +8,12 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/aconf"
|
||||
"github.com/ccfos/nightingale/v6/center/cconf"
|
||||
"github.com/ccfos/nightingale/v6/center/cstats"
|
||||
"github.com/ccfos/nightingale/v6/center/metas"
|
||||
"github.com/ccfos/nightingale/v6/center/sso"
|
||||
"github.com/ccfos/nightingale/v6/conf"
|
||||
_ "github.com/ccfos/nightingale/v6/front/statik"
|
||||
"github.com/ccfos/nightingale/v6/memsto"
|
||||
"github.com/ccfos/nightingale/v6/pkg/aop"
|
||||
@@ -33,6 +35,8 @@ import (
|
||||
type Router struct {
|
||||
HTTP httpx.Config
|
||||
Center cconf.Center
|
||||
Ibex conf.Ibex
|
||||
Alert aconf.Alert
|
||||
Operations cconf.Operation
|
||||
DatasourceCache *memsto.DatasourceCacheType
|
||||
NotifyConfigCache *memsto.NotifyConfigCacheType
|
||||
@@ -46,12 +50,15 @@ type Router struct {
|
||||
UserCache *memsto.UserCacheType
|
||||
UserGroupCache *memsto.UserGroupCacheType
|
||||
Ctx *ctx.Context
|
||||
HeartbeatHook HeartbeatHookFunc
|
||||
}
|
||||
|
||||
func New(httpConfig httpx.Config, center cconf.Center, operations cconf.Operation, ds *memsto.DatasourceCacheType, ncc *memsto.NotifyConfigCacheType, pc *prom.PromClientMap, tdendgineClients *tdengine.TdengineClientMap, redis storage.Redis, sso *sso.SsoClient, ctx *ctx.Context, metaSet *metas.Set, idents *idents.Set, tc *memsto.TargetCacheType, uc *memsto.UserCacheType, ugc *memsto.UserGroupCacheType) *Router {
|
||||
func New(httpConfig httpx.Config, center cconf.Center, alert aconf.Alert, ibex conf.Ibex, operations cconf.Operation, ds *memsto.DatasourceCacheType, ncc *memsto.NotifyConfigCacheType, pc *prom.PromClientMap, tdendgineClients *tdengine.TdengineClientMap, redis storage.Redis, sso *sso.SsoClient, ctx *ctx.Context, metaSet *metas.Set, idents *idents.Set, tc *memsto.TargetCacheType, uc *memsto.UserCacheType, ugc *memsto.UserGroupCacheType) *Router {
|
||||
return &Router{
|
||||
HTTP: httpConfig,
|
||||
Center: center,
|
||||
Alert: alert,
|
||||
Ibex: ibex,
|
||||
Operations: operations,
|
||||
DatasourceCache: ds,
|
||||
NotifyConfigCache: ncc,
|
||||
@@ -65,6 +72,7 @@ func New(httpConfig httpx.Config, center cconf.Center, operations cconf.Operatio
|
||||
UserCache: uc,
|
||||
UserGroupCache: ugc,
|
||||
Ctx: ctx,
|
||||
HeartbeatHook: func(ident string) map[string]interface{} { return nil },
|
||||
}
|
||||
}
|
||||
|
||||
@@ -88,15 +96,17 @@ func languageDetector(i18NHeaderKey string) gin.HandlerFunc {
|
||||
if headerKey != "" {
|
||||
lang := c.GetHeader(headerKey)
|
||||
if lang != "" {
|
||||
if strings.HasPrefix(lang, "zh") {
|
||||
c.Request.Header.Set("X-Language", "zh")
|
||||
if strings.HasPrefix(lang, "zh_HK") {
|
||||
c.Request.Header.Set("X-Language", "zh_HK")
|
||||
} else if strings.HasPrefix(lang, "zh") {
|
||||
c.Request.Header.Set("X-Language", "zh_CN")
|
||||
} else if strings.HasPrefix(lang, "en") {
|
||||
c.Request.Header.Set("X-Language", "en")
|
||||
} else {
|
||||
c.Request.Header.Set("X-Language", lang)
|
||||
}
|
||||
} else {
|
||||
c.Request.Header.Set("X-Language", "en")
|
||||
c.Request.Header.Set("X-Language", "zh_CN")
|
||||
}
|
||||
}
|
||||
c.Next()
|
||||
@@ -109,7 +119,7 @@ func (rt *Router) configNoRoute(r *gin.Engine, fs *http.FileSystem) {
|
||||
suffix := arr[len(arr)-1]
|
||||
|
||||
switch suffix {
|
||||
case "png", "jpeg", "jpg", "svg", "ico", "gif", "css", "js", "html", "htm", "gz", "zip", "map", "ttf":
|
||||
case "png", "jpeg", "jpg", "svg", "ico", "gif", "css", "js", "html", "htm", "gz", "zip", "map", "ttf", "md":
|
||||
if !rt.Center.UseFileAssets {
|
||||
c.FileFromFS(c.Request.URL.Path, *fs)
|
||||
} else {
|
||||
@@ -187,7 +197,7 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
|
||||
pages.GET("/sql-template", rt.QuerySqlTemplate)
|
||||
pages.POST("/auth/login", rt.jwtMock(), rt.loginPost)
|
||||
pages.POST("/auth/logout", rt.jwtMock(), rt.auth(), rt.logoutPost)
|
||||
pages.POST("/auth/logout", rt.jwtMock(), rt.auth(), rt.user(), rt.logoutPost)
|
||||
pages.POST("/auth/refresh", rt.jwtMock(), rt.refreshPost)
|
||||
pages.POST("/auth/captcha", rt.jwtMock(), rt.generateCaptcha)
|
||||
pages.POST("/auth/captcha-verify", rt.jwtMock(), rt.captchaVerify)
|
||||
@@ -226,6 +236,20 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.POST("/metric-views", rt.auth(), rt.user(), rt.metricViewAdd)
|
||||
pages.PUT("/metric-views", rt.auth(), rt.user(), rt.metricViewPut)
|
||||
|
||||
pages.GET("/builtin-metric-filters", rt.auth(), rt.user(), rt.metricFilterGets)
|
||||
pages.DELETE("/builtin-metric-filters", rt.auth(), rt.user(), rt.metricFilterDel)
|
||||
pages.POST("/builtin-metric-filters", rt.auth(), rt.user(), rt.metricFilterAdd)
|
||||
pages.PUT("/builtin-metric-filters", rt.auth(), rt.user(), rt.metricFilterPut)
|
||||
pages.POST("/builtin-metric-promql", rt.auth(), rt.user(), rt.getMetricPromql)
|
||||
|
||||
pages.POST("/builtin-metrics", rt.auth(), rt.user(), rt.perm("/builtin-metrics/add"), rt.builtinMetricsAdd)
|
||||
pages.PUT("/builtin-metrics", rt.auth(), rt.user(), rt.perm("/builtin-metrics/put"), rt.builtinMetricsPut)
|
||||
pages.DELETE("/builtin-metrics", rt.auth(), rt.user(), rt.perm("/builtin-metrics/del"), rt.builtinMetricsDel)
|
||||
pages.GET("/builtin-metrics", rt.auth(), rt.user(), rt.builtinMetricsGets)
|
||||
pages.GET("/builtin-metrics/types", rt.auth(), rt.user(), rt.builtinMetricsTypes)
|
||||
pages.GET("/builtin-metrics/types/default", rt.auth(), rt.user(), rt.builtinMetricsDefaultTypes)
|
||||
pages.GET("/builtin-metrics/collectors", rt.auth(), rt.user(), rt.builtinMetricsCollectors)
|
||||
|
||||
pages.GET("/user-groups", rt.auth(), rt.user(), rt.userGroupGets)
|
||||
pages.POST("/user-groups", rt.auth(), rt.user(), rt.perm("/user-groups/add"), rt.userGroupAdd)
|
||||
pages.GET("/user-group/:id", rt.auth(), rt.user(), rt.userGroupGet)
|
||||
@@ -257,18 +281,21 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.POST("/builtin-cate-favorite", rt.auth(), rt.user(), rt.builtinCateFavoriteAdd)
|
||||
pages.DELETE("/builtin-cate-favorite/:name", rt.auth(), rt.user(), rt.builtinCateFavoriteDel)
|
||||
|
||||
pages.GET("/builtin-boards", rt.builtinBoardGets)
|
||||
pages.GET("/builtin-board/:name", rt.builtinBoardGet)
|
||||
pages.GET("/dashboards/builtin/list", rt.builtinBoardGets)
|
||||
pages.GET("/builtin-boards-cates", rt.auth(), rt.user(), rt.builtinBoardCateGets)
|
||||
pages.POST("/builtin-boards-detail", rt.auth(), rt.user(), rt.builtinBoardDetailGets)
|
||||
pages.GET("/integrations/icon/:cate/:name", rt.builtinIcon)
|
||||
pages.GET("/integrations/makedown/:cate", rt.builtinMarkdown)
|
||||
|
||||
// pages.GET("/builtin-boards", rt.builtinBoardGets)
|
||||
// pages.GET("/builtin-board/:name", rt.builtinBoardGet)
|
||||
// pages.GET("/dashboards/builtin/list", rt.builtinBoardGets)
|
||||
// pages.GET("/builtin-boards-cates", rt.auth(), rt.user(), rt.builtinBoardCateGets)
|
||||
// pages.POST("/builtin-boards-detail", rt.auth(), rt.user(), rt.builtinBoardDetailGets)
|
||||
// pages.GET("/integrations/makedown/:cate", rt.builtinMarkdown)
|
||||
|
||||
pages.GET("/busi-groups/public-boards", rt.auth(), rt.user(), rt.perm("/dashboards"), rt.publicBoardGets)
|
||||
pages.GET("/busi-groups/boards", rt.auth(), rt.user(), rt.perm("/dashboards"), rt.boardGetsByGids)
|
||||
pages.GET("/busi-group/:id/boards", rt.auth(), rt.user(), rt.perm("/dashboards"), rt.bgro(), rt.boardGets)
|
||||
pages.POST("/busi-group/:id/boards", rt.auth(), rt.user(), rt.perm("/dashboards/add"), rt.bgrw(), rt.boardAdd)
|
||||
pages.POST("/busi-group/:id/board/:bid/clone", rt.auth(), rt.user(), rt.perm("/dashboards/add"), rt.bgrw(), rt.boardClone)
|
||||
pages.POST("/busi-groups/boards/clones", rt.auth(), rt.user(), rt.perm("/dashboards/add"), rt.boardBatchClone)
|
||||
|
||||
pages.GET("/board/:bid", rt.boardGet)
|
||||
pages.GET("/board/:bid/pure", rt.boardPureGet)
|
||||
@@ -280,8 +307,9 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.GET("/share-charts", rt.chartShareGets)
|
||||
pages.POST("/share-charts", rt.auth(), rt.chartShareAdd)
|
||||
|
||||
pages.GET("/alert-rules/builtin/alerts-cates", rt.auth(), rt.user(), rt.builtinAlertCateGets)
|
||||
pages.GET("/alert-rules/builtin/list", rt.auth(), rt.user(), rt.builtinAlertRules)
|
||||
// pages.GET("/alert-rules/builtin/alerts-cates", rt.auth(), rt.user(), rt.builtinAlertCateGets)
|
||||
// pages.GET("/alert-rules/builtin/list", rt.auth(), rt.user(), rt.builtinAlertRules)
|
||||
pages.GET("/alert-rules/callbacks", rt.auth(), rt.user(), rt.alertRuleCallbacks)
|
||||
|
||||
pages.GET("/busi-groups/alert-rules", rt.auth(), rt.user(), rt.perm("/alert-rules"), rt.alertRuleGetsByGids)
|
||||
pages.GET("/busi-group/:id/alert-rules", rt.auth(), rt.user(), rt.perm("/alert-rules"), rt.alertRuleGets)
|
||||
@@ -291,7 +319,9 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.PUT("/busi-group/:id/alert-rules/fields", rt.auth(), rt.user(), rt.perm("/alert-rules/put"), rt.bgrw(), rt.alertRulePutFields)
|
||||
pages.PUT("/busi-group/:id/alert-rule/:arid", rt.auth(), rt.user(), rt.perm("/alert-rules/put"), rt.alertRulePutByFE)
|
||||
pages.GET("/alert-rule/:arid", rt.auth(), rt.user(), rt.perm("/alert-rules"), rt.alertRuleGet)
|
||||
pages.GET("/alert-rule/:arid/pure", rt.auth(), rt.user(), rt.perm("/alert-rules"), rt.alertRulePureGet)
|
||||
pages.PUT("/busi-group/alert-rule/validate", rt.auth(), rt.user(), rt.perm("/alert-rules/put"), rt.alertRuleValidation)
|
||||
pages.POST("/relabel-test", rt.auth(), rt.user(), rt.relabelTest)
|
||||
|
||||
pages.GET("/busi-groups/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules"), rt.recordingRuleGetsByGids)
|
||||
pages.GET("/busi-group/:id/recording-rules", rt.auth(), rt.user(), rt.perm("/recording-rules"), rt.recordingRuleGets)
|
||||
@@ -307,6 +337,7 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.POST("/busi-group/:id/alert-mutes", rt.auth(), rt.user(), rt.perm("/alert-mutes/add"), rt.bgrw(), rt.alertMuteAdd)
|
||||
pages.DELETE("/busi-group/:id/alert-mutes", rt.auth(), rt.user(), rt.perm("/alert-mutes/del"), rt.bgrw(), rt.alertMuteDel)
|
||||
pages.PUT("/busi-group/:id/alert-mute/:amid", rt.auth(), rt.user(), rt.perm("/alert-mutes/put"), rt.alertMutePutByFE)
|
||||
pages.GET("/busi-group/:id/alert-mute/:amid", rt.auth(), rt.user(), rt.perm("/alert-mutes"), rt.alertMuteGet)
|
||||
pages.PUT("/busi-group/:id/alert-mutes/fields", rt.auth(), rt.user(), rt.perm("/alert-mutes/put"), rt.bgrw(), rt.alertMutePutFields)
|
||||
|
||||
pages.GET("/busi-groups/alert-subscribes", rt.auth(), rt.user(), rt.perm("/alert-subscribes"), rt.alertSubscribeGetsByGids)
|
||||
@@ -320,15 +351,15 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.GET("/alert-cur-event/:eid", rt.alertCurEventGet)
|
||||
pages.GET("/alert-his-event/:eid", rt.alertHisEventGet)
|
||||
} else {
|
||||
pages.GET("/alert-cur-event/:eid", rt.auth(), rt.alertCurEventGet)
|
||||
pages.GET("/alert-his-event/:eid", rt.auth(), rt.alertHisEventGet)
|
||||
pages.GET("/alert-cur-event/:eid", rt.auth(), rt.user(), rt.alertCurEventGet)
|
||||
pages.GET("/alert-his-event/:eid", rt.auth(), rt.user(), rt.alertHisEventGet)
|
||||
}
|
||||
|
||||
// card logic
|
||||
pages.GET("/alert-cur-events/list", rt.auth(), rt.alertCurEventsList)
|
||||
pages.GET("/alert-cur-events/card", rt.auth(), rt.alertCurEventsCard)
|
||||
pages.GET("/alert-cur-events/list", rt.auth(), rt.user(), rt.alertCurEventsList)
|
||||
pages.GET("/alert-cur-events/card", rt.auth(), rt.user(), rt.alertCurEventsCard)
|
||||
pages.POST("/alert-cur-events/card/details", rt.auth(), rt.alertCurEventsCardDetails)
|
||||
pages.GET("/alert-his-events/list", rt.auth(), rt.alertHisEventsList)
|
||||
pages.GET("/alert-his-events/list", rt.auth(), rt.user(), rt.alertHisEventsList)
|
||||
pages.DELETE("/alert-cur-events", rt.auth(), rt.user(), rt.perm("/alert-cur-events/del"), rt.alertCurEventDel)
|
||||
pages.GET("/alert-cur-events/stats", rt.auth(), rt.alertCurEventsStatistics)
|
||||
|
||||
@@ -349,11 +380,9 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.GET("/busi-groups/tasks", rt.auth(), rt.user(), rt.perm("/job-tasks"), rt.taskGetsByGids)
|
||||
pages.GET("/busi-group/:id/tasks", rt.auth(), rt.user(), rt.perm("/job-tasks"), rt.bgro(), rt.taskGets)
|
||||
pages.POST("/busi-group/:id/tasks", rt.auth(), rt.user(), rt.perm("/job-tasks/add"), rt.bgrw(), rt.taskAdd)
|
||||
pages.GET("/busi-group/:id/task/*url", rt.auth(), rt.user(), rt.perm("/job-tasks"), rt.taskProxy)
|
||||
pages.PUT("/busi-group/:id/task/*url", rt.auth(), rt.user(), rt.perm("/job-tasks/put"), rt.bgrw(), rt.taskProxy)
|
||||
|
||||
pages.GET("/servers", rt.auth(), rt.admin(), rt.serversGet)
|
||||
pages.GET("/server-clusters", rt.auth(), rt.admin(), rt.serverClustersGet)
|
||||
pages.GET("/servers", rt.auth(), rt.user(), rt.serversGet)
|
||||
pages.GET("/server-clusters", rt.auth(), rt.user(), rt.serverClustersGet)
|
||||
|
||||
pages.POST("/datasource/list", rt.auth(), rt.user(), rt.datasourceList)
|
||||
pages.POST("/datasource/plugin/list", rt.auth(), rt.pluginList)
|
||||
@@ -371,7 +400,7 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.PUT("/role/:id/ops", rt.auth(), rt.admin(), rt.roleBindOperation)
|
||||
pages.GET("/operation", rt.operations)
|
||||
|
||||
pages.GET("/notify-tpls", rt.auth(), rt.user(), rt.perm("/help/notification-tpls"), rt.notifyTplGets)
|
||||
pages.GET("/notify-tpls", rt.auth(), rt.user(), rt.notifyTplGets)
|
||||
pages.PUT("/notify-tpl/content", rt.auth(), rt.user(), rt.notifyTplUpdateContent)
|
||||
pages.PUT("/notify-tpl", rt.auth(), rt.user(), rt.notifyTplUpdate)
|
||||
pages.POST("/notify-tpl", rt.auth(), rt.user(), rt.notifyTplAdd)
|
||||
@@ -403,6 +432,9 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
pages.PUT("/es-index-pattern", rt.auth(), rt.admin(), rt.esIndexPatternPut)
|
||||
pages.DELETE("/es-index-pattern", rt.auth(), rt.admin(), rt.esIndexPatternDel)
|
||||
|
||||
pages.GET("/embedded-dashboards", rt.auth(), rt.user(), rt.perm("/embedded-dashboards"), rt.embeddedDashboardsGet)
|
||||
pages.PUT("/embedded-dashboards", rt.auth(), rt.user(), rt.perm("/embedded-dashboards/put"), rt.embeddedDashboardsPut)
|
||||
|
||||
pages.GET("/user-variable-configs", rt.auth(), rt.user(), rt.perm("/help/variable-configs"), rt.userVariableConfigGets)
|
||||
pages.POST("/user-variable-config", rt.auth(), rt.user(), rt.perm("/help/variable-configs"), rt.userVariableConfigAdd)
|
||||
pages.PUT("/user-variable-config/:id", rt.auth(), rt.user(), rt.perm("/help/variable-configs"), rt.userVariableConfigPut)
|
||||
@@ -415,6 +447,17 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
// for admin api
|
||||
pages.GET("/user/busi-groups", rt.auth(), rt.admin(), rt.userBusiGroupsGets)
|
||||
|
||||
pages.GET("/builtin-components", rt.auth(), rt.user(), rt.builtinComponentsGets)
|
||||
pages.POST("/builtin-components", rt.auth(), rt.user(), rt.perm("/built-in-components/add"), rt.builtinComponentsAdd)
|
||||
pages.PUT("/builtin-components", rt.auth(), rt.user(), rt.perm("/built-in-components/put"), rt.builtinComponentsPut)
|
||||
pages.DELETE("/builtin-components", rt.auth(), rt.user(), rt.perm("/built-in-components/del"), rt.builtinComponentsDel)
|
||||
|
||||
pages.GET("/builtin-payloads", rt.auth(), rt.user(), rt.builtinPayloadsGets)
|
||||
pages.GET("/builtin-payloads/cates", rt.auth(), rt.user(), rt.builtinPayloadcatesGet)
|
||||
pages.POST("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/built-in-components/add"), rt.builtinPayloadsAdd)
|
||||
pages.GET("/builtin-payload/:id", rt.auth(), rt.user(), rt.perm("/built-in-components"), rt.builtinPayloadGet)
|
||||
pages.PUT("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/built-in-components/put"), rt.builtinPayloadsPut)
|
||||
pages.DELETE("/builtin-payloads", rt.auth(), rt.user(), rt.perm("/built-in-components/del"), rt.builtinPayloadsDel)
|
||||
}
|
||||
|
||||
r.GET("/api/n9e/versions", func(c *gin.Context) {
|
||||
@@ -448,10 +491,14 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
service.GET("/user-group-members", rt.userGroupMemberGetsByService)
|
||||
|
||||
service.GET("/targets", rt.targetGetsByService)
|
||||
service.GET("/target/extra-meta", rt.targetExtendInfoByIdent)
|
||||
service.POST("/target/list", rt.targetGetsByHostFilter)
|
||||
service.DELETE("/targets", rt.targetDelByService)
|
||||
service.GET("/targets/tags", rt.targetGetTags)
|
||||
service.POST("/targets/tags", rt.targetBindTagsByService)
|
||||
service.DELETE("/targets/tags", rt.targetUnbindTagsByService)
|
||||
service.PUT("/targets/note", rt.targetUpdateNoteByService)
|
||||
service.PUT("/targets/bgid", rt.targetUpdateBgidByService)
|
||||
|
||||
service.POST("/alert-rules", rt.alertRuleAddByService)
|
||||
service.POST("/alert-rule-add", rt.alertRuleAddOneByService)
|
||||
@@ -481,6 +528,8 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
service.GET("/alert-his-event/:eid", rt.alertHisEventGet)
|
||||
|
||||
service.GET("/task-tpl/:tid", rt.taskTplGetByService)
|
||||
service.GET("/task-tpls", rt.taskTplGetsByService)
|
||||
service.GET("/task-tpl/statistics", rt.taskTplStatistics)
|
||||
|
||||
service.GET("/config/:id", rt.configGet)
|
||||
service.GET("/configs", rt.configsGet)
|
||||
@@ -499,6 +548,9 @@ func (rt *Router) Config(r *gin.Engine) {
|
||||
service.POST("/task-record-add", rt.taskRecordAdd)
|
||||
|
||||
service.GET("/user-variable/decrypt", rt.userVariableGetDecryptByService)
|
||||
|
||||
service.GET("/targets-of-alert-rule", rt.targetsOfAlertRule)
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -43,7 +43,6 @@ func (rt *Router) alertCurEventsCard(c *gin.Context) {
|
||||
stime, etime := getTimeRange(c)
|
||||
severity := ginx.QueryInt(c, "severity", -1)
|
||||
query := ginx.QueryStr(c, "query", "")
|
||||
busiGroupId := ginx.QueryInt64(c, "bgid", 0)
|
||||
dsIds := queryDatasourceIds(c)
|
||||
rules := parseAggrRules(c)
|
||||
|
||||
@@ -62,8 +61,11 @@ func (rt *Router) alertCurEventsCard(c *gin.Context) {
|
||||
cates = strings.Split(cate, ",")
|
||||
}
|
||||
|
||||
bgids, err := GetBusinessGroupIds(c, rt.Ctx, rt.Center.EventHistoryGroupView)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
// 最多获取50000个,获取太多也没啥意义
|
||||
list, err := models.AlertCurEventGets(rt.Ctx, prods, busiGroupId, stime, etime, severity, dsIds, cates, query, 50000, 0)
|
||||
list, err := models.AlertCurEventGets(rt.Ctx, prods, bgids, stime, etime, severity, dsIds, cates, query, 50000, 0)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
cardmap := make(map[string]*AlertCard)
|
||||
@@ -142,7 +144,6 @@ func (rt *Router) alertCurEventsList(c *gin.Context) {
|
||||
severity := ginx.QueryInt(c, "severity", -1)
|
||||
query := ginx.QueryStr(c, "query", "")
|
||||
limit := ginx.QueryInt(c, "limit", 20)
|
||||
busiGroupId := ginx.QueryInt64(c, "bgid", 0)
|
||||
dsIds := queryDatasourceIds(c)
|
||||
|
||||
prod := ginx.QueryStr(c, "prods", "")
|
||||
@@ -161,10 +162,13 @@ func (rt *Router) alertCurEventsList(c *gin.Context) {
|
||||
cates = strings.Split(cate, ",")
|
||||
}
|
||||
|
||||
total, err := models.AlertCurEventTotal(rt.Ctx, prods, busiGroupId, stime, etime, severity, dsIds, cates, query)
|
||||
bgids, err := GetBusinessGroupIds(c, rt.Ctx, rt.Center.EventHistoryGroupView)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
list, err := models.AlertCurEventGets(rt.Ctx, prods, busiGroupId, stime, etime, severity, dsIds, cates, query, limit, ginx.Offset(c, limit))
|
||||
total, err := models.AlertCurEventTotal(rt.Ctx, prods, bgids, stime, etime, severity, dsIds, cates, query)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
list, err := models.AlertCurEventGets(rt.Ctx, prods, bgids, stime, etime, severity, dsIds, cates, query, limit, ginx.Offset(c, limit))
|
||||
ginx.Dangerous(err)
|
||||
|
||||
cache := make(map[int64]*models.UserGroup)
|
||||
@@ -214,6 +218,10 @@ func (rt *Router) alertCurEventGet(c *gin.Context) {
|
||||
ginx.Bomb(404, "No such active event")
|
||||
}
|
||||
|
||||
if !rt.Center.AnonymousAccess.AlertDetail && rt.Center.EventHistoryGroupView {
|
||||
rt.bgroCheck(c, event.GroupId)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(event, nil)
|
||||
}
|
||||
|
||||
|
||||
@@ -1,13 +1,16 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"golang.org/x/exp/slices"
|
||||
)
|
||||
|
||||
func getTimeRange(c *gin.Context) (stime, etime int64) {
|
||||
@@ -33,7 +36,6 @@ func (rt *Router) alertHisEventsList(c *gin.Context) {
|
||||
recovered := ginx.QueryInt(c, "is_recovered", -1)
|
||||
query := ginx.QueryStr(c, "query", "")
|
||||
limit := ginx.QueryInt(c, "limit", 20)
|
||||
busiGroupId := ginx.QueryInt64(c, "bgid", 0)
|
||||
dsIds := queryDatasourceIds(c)
|
||||
|
||||
prod := ginx.QueryStr(c, "prods", "")
|
||||
@@ -52,10 +54,13 @@ func (rt *Router) alertHisEventsList(c *gin.Context) {
|
||||
cates = strings.Split(cate, ",")
|
||||
}
|
||||
|
||||
total, err := models.AlertHisEventTotal(rt.Ctx, prods, busiGroupId, stime, etime, severity, recovered, dsIds, cates, query)
|
||||
bgids, err := GetBusinessGroupIds(c, rt.Ctx, rt.Center.EventHistoryGroupView)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
list, err := models.AlertHisEventGets(rt.Ctx, prods, busiGroupId, stime, etime, severity, recovered, dsIds, cates, query, limit, ginx.Offset(c, limit))
|
||||
total, err := models.AlertHisEventTotal(rt.Ctx, prods, bgids, stime, etime, severity, recovered, dsIds, cates, query)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
list, err := models.AlertHisEventGets(rt.Ctx, prods, bgids, stime, etime, severity, recovered, dsIds, cates, query, limit, ginx.Offset(c, limit))
|
||||
ginx.Dangerous(err)
|
||||
|
||||
cache := make(map[int64]*models.UserGroup)
|
||||
@@ -78,5 +83,50 @@ func (rt *Router) alertHisEventGet(c *gin.Context) {
|
||||
ginx.Bomb(404, "No such alert event")
|
||||
}
|
||||
|
||||
if !rt.Center.AnonymousAccess.AlertDetail && rt.Center.EventHistoryGroupView {
|
||||
rt.bgroCheck(c, event.GroupId)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(event, err)
|
||||
}
|
||||
|
||||
func GetBusinessGroupIds(c *gin.Context, ctx *ctx.Context, eventHistoryGroupView bool) ([]int64, error) {
|
||||
bgid := ginx.QueryInt64(c, "bgid", 0)
|
||||
var bgids []int64
|
||||
|
||||
if !eventHistoryGroupView || strings.HasPrefix(c.Request.URL.Path, "/v1") {
|
||||
if bgid > 0 {
|
||||
return []int64{bgid}, nil
|
||||
}
|
||||
return bgids, nil
|
||||
}
|
||||
|
||||
user := c.MustGet("user").(*models.User)
|
||||
if user.IsAdmin() {
|
||||
if bgid > 0 {
|
||||
return []int64{bgid}, nil
|
||||
}
|
||||
return bgids, nil
|
||||
}
|
||||
|
||||
bussGroupIds, err := models.MyBusiGroupIds(ctx, user.Id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(bussGroupIds) == 0 {
|
||||
// 如果没查到用户属于任何业务组,需要返回一个0,否则会导致查询到全部告警历史
|
||||
return []int64{0}, nil
|
||||
}
|
||||
|
||||
if bgid > 0 && !slices.Contains(bussGroupIds, bgid) {
|
||||
return nil, fmt.Errorf("business group ID not allowed")
|
||||
}
|
||||
|
||||
if bgid > 0 {
|
||||
// Pass filter parameters, priority to use
|
||||
return []int64{bgid}, nil
|
||||
}
|
||||
|
||||
return bussGroupIds, nil
|
||||
}
|
||||
|
||||
@@ -1,14 +1,18 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pushgw/pconf"
|
||||
"github.com/ccfos/nightingale/v6/pushgw/writer"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/prometheus/prometheus/prompb"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
"github.com/toolkits/pkg/str"
|
||||
@@ -29,13 +33,23 @@ func (rt *Router) alertRuleGets(c *gin.Context) {
|
||||
}
|
||||
|
||||
func (rt *Router) alertRuleGetsByGids(c *gin.Context) {
|
||||
gids := str.IdsInt64(ginx.QueryStr(c, "gids"), ",")
|
||||
if len(gids) == 0 {
|
||||
ginx.NewRender(c, http.StatusBadRequest).Message("arg(gids) is empty")
|
||||
return
|
||||
}
|
||||
for _, gid := range gids {
|
||||
rt.bgroCheck(c, gid)
|
||||
gids := str.IdsInt64(ginx.QueryStr(c, "gids", ""), ",")
|
||||
if len(gids) > 0 {
|
||||
for _, gid := range gids {
|
||||
rt.bgroCheck(c, gid)
|
||||
}
|
||||
} else {
|
||||
me := c.MustGet("user").(*models.User)
|
||||
if !me.IsAdmin() {
|
||||
var err error
|
||||
gids, err = models.MyBusiGroupIds(rt.Ctx, me.Id)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if len(gids) == 0 {
|
||||
ginx.NewRender(c).Data([]int{}, nil)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ars, err := models.AlertRuleGetsByBGIds(rt.Ctx, gids)
|
||||
@@ -306,6 +320,20 @@ func (rt *Router) alertRuleGet(c *gin.Context) {
|
||||
ginx.NewRender(c).Data(ar, err)
|
||||
}
|
||||
|
||||
func (rt *Router) alertRulePureGet(c *gin.Context) {
|
||||
arid := ginx.UrlParamInt64(c, "arid")
|
||||
|
||||
ar, err := models.AlertRuleGetById(rt.Ctx, arid)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if ar == nil {
|
||||
ginx.NewRender(c, http.StatusNotFound).Message("No such AlertRule")
|
||||
return
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(ar, err)
|
||||
}
|
||||
|
||||
// pre validation before save rule
|
||||
func (rt *Router) alertRuleValidation(c *gin.Context) {
|
||||
var f models.AlertRule //new
|
||||
@@ -356,3 +384,72 @@ func (rt *Router) alertRuleValidation(c *gin.Context) {
|
||||
|
||||
ginx.NewRender(c).Message("")
|
||||
}
|
||||
|
||||
func (rt *Router) alertRuleCallbacks(c *gin.Context) {
|
||||
user := c.MustGet("user").(*models.User)
|
||||
bussGroupIds, err := models.MyBusiGroupIds(rt.Ctx, user.Id)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ars, err := models.AlertRuleGetsByBGIds(rt.Ctx, bussGroupIds)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
var callbacks []string
|
||||
callbackFilter := make(map[string]struct{})
|
||||
for i := range ars {
|
||||
for _, callback := range ars[i].CallbacksJSON {
|
||||
if _, ok := callbackFilter[callback]; !ok {
|
||||
callbackFilter[callback] = struct{}{}
|
||||
callbacks = append(callbacks, callback)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(callbacks, nil)
|
||||
}
|
||||
|
||||
type alertRuleTestForm struct {
|
||||
Configs []*pconf.RelabelConfig `json:"configs"`
|
||||
Tags []string `json:"tags"`
|
||||
}
|
||||
|
||||
func (rt *Router) relabelTest(c *gin.Context) {
|
||||
var f alertRuleTestForm
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
if len(f.Tags) == 0 || len(f.Configs) == 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, "relabel config is empty")
|
||||
}
|
||||
|
||||
labels := make([]prompb.Label, len(f.Tags))
|
||||
for i, tag := range f.Tags {
|
||||
label := strings.Split(tag, "=")
|
||||
if len(label) != 2 {
|
||||
ginx.Bomb(http.StatusBadRequest, "tag:%s format error", tag)
|
||||
}
|
||||
|
||||
labels[i] = prompb.Label{Name: label[0], Value: label[1]}
|
||||
}
|
||||
|
||||
for i := 0; i < len(f.Configs); i++ {
|
||||
if f.Configs[i].Replacement == "" {
|
||||
f.Configs[i].Replacement = "$1"
|
||||
}
|
||||
|
||||
if f.Configs[i].Separator == "" {
|
||||
f.Configs[i].Separator = ";"
|
||||
}
|
||||
|
||||
if f.Configs[i].Regex == "" {
|
||||
f.Configs[i].Regex = "(.*)"
|
||||
}
|
||||
}
|
||||
|
||||
relabels := writer.Process(labels, f.Configs...)
|
||||
|
||||
var tags []string
|
||||
for _, label := range relabels {
|
||||
tags = append(tags, fmt.Sprintf("%s=%s", label.Name, label.Value))
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(tags, nil)
|
||||
}
|
||||
|
||||
@@ -22,7 +22,7 @@ func (rt *Router) alertSubscribeGets(c *gin.Context) {
|
||||
|
||||
for i := 0; i < len(lst); i++ {
|
||||
ginx.Dangerous(lst[i].FillUserGroups(rt.Ctx, ugcache))
|
||||
ginx.Dangerous(lst[i].FillRuleName(rt.Ctx, rulecache))
|
||||
ginx.Dangerous(lst[i].FillRuleNames(rt.Ctx, rulecache))
|
||||
ginx.Dangerous(lst[i].FillDatasourceIds(rt.Ctx))
|
||||
ginx.Dangerous(lst[i].DB2FE())
|
||||
}
|
||||
@@ -31,14 +31,23 @@ func (rt *Router) alertSubscribeGets(c *gin.Context) {
|
||||
}
|
||||
|
||||
func (rt *Router) alertSubscribeGetsByGids(c *gin.Context) {
|
||||
gids := str.IdsInt64(ginx.QueryStr(c, "gids"), ",")
|
||||
if len(gids) == 0 {
|
||||
ginx.NewRender(c, http.StatusBadRequest).Message("arg(gids) is empty")
|
||||
return
|
||||
}
|
||||
gids := str.IdsInt64(ginx.QueryStr(c, "gids", ""), ",")
|
||||
if len(gids) > 0 {
|
||||
for _, gid := range gids {
|
||||
rt.bgroCheck(c, gid)
|
||||
}
|
||||
} else {
|
||||
me := c.MustGet("user").(*models.User)
|
||||
if !me.IsAdmin() {
|
||||
var err error
|
||||
gids, err = models.MyBusiGroupIds(rt.Ctx, me.Id)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
for _, gid := range gids {
|
||||
rt.bgroCheck(c, gid)
|
||||
if len(gids) == 0 {
|
||||
ginx.NewRender(c).Data([]int{}, nil)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
lst, err := models.AlertSubscribeGetsByBGIds(rt.Ctx, gids)
|
||||
@@ -49,7 +58,7 @@ func (rt *Router) alertSubscribeGetsByGids(c *gin.Context) {
|
||||
|
||||
for i := 0; i < len(lst); i++ {
|
||||
ginx.Dangerous(lst[i].FillUserGroups(rt.Ctx, ugcache))
|
||||
ginx.Dangerous(lst[i].FillRuleName(rt.Ctx, rulecache))
|
||||
ginx.Dangerous(lst[i].FillRuleNames(rt.Ctx, rulecache))
|
||||
ginx.Dangerous(lst[i].FillDatasourceIds(rt.Ctx))
|
||||
ginx.Dangerous(lst[i].DB2FE())
|
||||
}
|
||||
@@ -72,7 +81,7 @@ func (rt *Router) alertSubscribeGet(c *gin.Context) {
|
||||
ginx.Dangerous(sub.FillUserGroups(rt.Ctx, ugcache))
|
||||
|
||||
rulecache := make(map[int64]string)
|
||||
ginx.Dangerous(sub.FillRuleName(rt.Ctx, rulecache))
|
||||
ginx.Dangerous(sub.FillRuleNames(rt.Ctx, rulecache))
|
||||
ginx.Dangerous(sub.FillDatasourceIds(rt.Ctx))
|
||||
ginx.Dangerous(sub.DB2FE())
|
||||
|
||||
@@ -104,6 +113,9 @@ func (rt *Router) alertSubscribePut(c *gin.Context) {
|
||||
for i := 0; i < len(fs); i++ {
|
||||
fs[i].UpdateBy = username
|
||||
fs[i].UpdateAt = timestamp
|
||||
//After adding the function of batch subscription alert rules, rule_ids is used instead of rule_id.
|
||||
//When the subscription rules are updated, set rule_id=0 to prevent the wrong subscription caused by the old rule_id.
|
||||
fs[i].RuleId = 0
|
||||
ginx.Dangerous(fs[i].Update(
|
||||
rt.Ctx,
|
||||
"name",
|
||||
@@ -113,6 +125,7 @@ func (rt *Router) alertSubscribePut(c *gin.Context) {
|
||||
"datasource_ids",
|
||||
"cluster",
|
||||
"rule_id",
|
||||
"rule_ids",
|
||||
"tags",
|
||||
"redefine_severity",
|
||||
"new_severity",
|
||||
|
||||
@@ -1,23 +1,26 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/google/uuid"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
"github.com/toolkits/pkg/str"
|
||||
)
|
||||
|
||||
type boardForm struct {
|
||||
Name string `json:"name"`
|
||||
Ident string `json:"ident"`
|
||||
Tags string `json:"tags"`
|
||||
Configs string `json:"configs"`
|
||||
Public int `json:"public"`
|
||||
Name string `json:"name"`
|
||||
Ident string `json:"ident"`
|
||||
Tags string `json:"tags"`
|
||||
Configs string `json:"configs"`
|
||||
Public int `json:"public"`
|
||||
PublicCate int `json:"public_cate"`
|
||||
Bgids []int64 `json:"bgids"`
|
||||
}
|
||||
|
||||
func (rt *Router) boardAdd(c *gin.Context) {
|
||||
@@ -66,6 +69,28 @@ func (rt *Router) boardGet(c *gin.Context) {
|
||||
}
|
||||
}
|
||||
|
||||
if board.PublicCate == models.PublicLogin {
|
||||
rt.auth()(c)
|
||||
} else if board.PublicCate == models.PublicBusi {
|
||||
rt.auth()(c)
|
||||
rt.user()(c)
|
||||
|
||||
me := c.MustGet("user").(*models.User)
|
||||
if !me.IsAdmin() {
|
||||
bgids, err := models.MyBusiGroupIds(rt.Ctx, me.Id)
|
||||
ginx.Dangerous(err)
|
||||
if len(bgids) == 0 {
|
||||
ginx.Bomb(http.StatusForbidden, "forbidden")
|
||||
}
|
||||
|
||||
ok, err := models.BoardBusigroupCheck(rt.Ctx, board.Id, bgids)
|
||||
ginx.Dangerous(err)
|
||||
if !ok {
|
||||
ginx.Bomb(http.StatusForbidden, "forbidden")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(board, nil)
|
||||
}
|
||||
|
||||
@@ -193,10 +218,20 @@ func (rt *Router) boardPutPublic(c *gin.Context) {
|
||||
}
|
||||
|
||||
bo.Public = f.Public
|
||||
bo.PublicCate = f.PublicCate
|
||||
|
||||
if bo.PublicCate == models.PublicBusi {
|
||||
err := models.BoardBusigroupUpdate(rt.Ctx, bo.Id, f.Bgids)
|
||||
ginx.Dangerous(err)
|
||||
} else {
|
||||
err := models.BoardBusigroupDelByBoardId(rt.Ctx, bo.Id)
|
||||
ginx.Dangerous(err)
|
||||
}
|
||||
|
||||
bo.UpdateBy = me.Username
|
||||
bo.UpdateAt = time.Now().Unix()
|
||||
|
||||
err := bo.Update(rt.Ctx, "public", "update_by", "update_at")
|
||||
err := bo.Update(rt.Ctx, "public", "public_cate", "update_by", "update_at")
|
||||
ginx.NewRender(c).Data(bo, err)
|
||||
}
|
||||
|
||||
@@ -208,15 +243,56 @@ func (rt *Router) boardGets(c *gin.Context) {
|
||||
ginx.NewRender(c).Data(boards, err)
|
||||
}
|
||||
|
||||
func (rt *Router) publicBoardGets(c *gin.Context) {
|
||||
me := c.MustGet("user").(*models.User)
|
||||
|
||||
bgids, err := models.MyBusiGroupIds(rt.Ctx, me.Id)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
boardIds, err := models.BoardIdsByBusiGroupIds(rt.Ctx, bgids)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
boards, err := models.BoardGets(rt.Ctx, "", "public=1 and (public_cate in (?) or id in (?))", []int64{0, 1}, boardIds)
|
||||
ginx.NewRender(c).Data(boards, err)
|
||||
}
|
||||
|
||||
func (rt *Router) boardGetsByGids(c *gin.Context) {
|
||||
gids := str.IdsInt64(ginx.QueryStr(c, "gids"), ",")
|
||||
gids := str.IdsInt64(ginx.QueryStr(c, "gids", ""), ",")
|
||||
query := ginx.QueryStr(c, "query", "")
|
||||
|
||||
for _, gid := range gids {
|
||||
rt.bgroCheck(c, gid)
|
||||
if len(gids) > 0 {
|
||||
for _, gid := range gids {
|
||||
rt.bgroCheck(c, gid)
|
||||
}
|
||||
} else {
|
||||
me := c.MustGet("user").(*models.User)
|
||||
if !me.IsAdmin() {
|
||||
var err error
|
||||
gids, err = models.MyBusiGroupIds(rt.Ctx, me.Id)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if len(gids) == 0 {
|
||||
ginx.NewRender(c).Data([]int{}, nil)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
boardBusigroups, err := models.BoardBusigroupGets(rt.Ctx)
|
||||
ginx.Dangerous(err)
|
||||
m := make(map[int64][]int64)
|
||||
for _, boardBusigroup := range boardBusigroups {
|
||||
m[boardBusigroup.BoardId] = append(m[boardBusigroup.BoardId], boardBusigroup.BusiGroupId)
|
||||
}
|
||||
|
||||
boards, err := models.BoardGetsByBGIds(rt.Ctx, gids, query)
|
||||
ginx.Dangerous(err)
|
||||
for i := 0; i < len(boards); i++ {
|
||||
if ids, ok := m[boards[i].Id]; ok {
|
||||
boards[i].Bgids = ids
|
||||
}
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(boards, err)
|
||||
}
|
||||
|
||||
@@ -224,17 +300,7 @@ func (rt *Router) boardClone(c *gin.Context) {
|
||||
me := c.MustGet("user").(*models.User)
|
||||
bo := rt.Board(ginx.UrlParamInt64(c, "bid"))
|
||||
|
||||
newBoard := &models.Board{
|
||||
Name: bo.Name + " Cloned",
|
||||
Tags: bo.Tags,
|
||||
GroupId: bo.GroupId,
|
||||
CreateBy: me.Username,
|
||||
UpdateBy: me.Username,
|
||||
}
|
||||
|
||||
if bo.Ident != "" {
|
||||
newBoard.Ident = uuid.NewString()
|
||||
}
|
||||
newBoard := bo.Clone(me.Username, bo.GroupId, " Cloned")
|
||||
|
||||
ginx.Dangerous(newBoard.Add(rt.Ctx))
|
||||
|
||||
@@ -248,3 +314,39 @@ func (rt *Router) boardClone(c *gin.Context) {
|
||||
|
||||
ginx.NewRender(c).Message(nil)
|
||||
}
|
||||
|
||||
type boardsForm struct {
|
||||
BoardIds []int64 `json:"board_ids"`
|
||||
Bgids []int64 `json:"bgids"`
|
||||
}
|
||||
|
||||
func (rt *Router) boardBatchClone(c *gin.Context) {
|
||||
me := c.MustGet("user").(*models.User)
|
||||
var f boardsForm
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
for _, bgid := range f.Bgids {
|
||||
rt.bgrwCheck(c, bgid)
|
||||
}
|
||||
|
||||
reterr := make(map[string]string, len(f.BoardIds))
|
||||
lang := c.GetHeader("X-Language")
|
||||
|
||||
for _, bgid := range f.Bgids {
|
||||
for _, bid := range f.BoardIds {
|
||||
bo := rt.Board(bid)
|
||||
newBoard := bo.Clone(me.Username, bgid, "")
|
||||
payload, err := models.BoardPayloadGet(rt.Ctx, bo.Id)
|
||||
if err != nil {
|
||||
reterr[fmt.Sprintf("%s-%d", newBoard.Name, bgid)] = i18n.Sprintf(lang, err.Error())
|
||||
continue
|
||||
}
|
||||
|
||||
if err = newBoard.AtomicAdd(rt.Ctx, payload); err != nil {
|
||||
reterr[fmt.Sprintf("%s-%d", newBoard.Name, bgid)] = i18n.Sprintf(lang, err.Error())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(reterr, nil)
|
||||
}
|
||||
|
||||
66
center/router/router_builtin_componet.go
Normal file
66
center/router/router_builtin_componet.go
Normal file
@@ -0,0 +1,66 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func (rt *Router) builtinComponentsAdd(c *gin.Context) {
|
||||
var lst []models.BuiltinComponent
|
||||
ginx.BindJSON(c, &lst)
|
||||
|
||||
username := Username(c)
|
||||
|
||||
count := len(lst)
|
||||
if count == 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, "input json is empty")
|
||||
}
|
||||
|
||||
reterr := make(map[string]string)
|
||||
for i := 0; i < count; i++ {
|
||||
if err := lst[i].Add(rt.Ctx, username); err != nil {
|
||||
reterr[lst[i].Ident] = err.Error()
|
||||
}
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(reterr, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) builtinComponentsGets(c *gin.Context) {
|
||||
query := ginx.QueryStr(c, "query", "")
|
||||
|
||||
bc, err := models.BuiltinComponentGets(rt.Ctx, query)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Data(bc, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) builtinComponentsPut(c *gin.Context) {
|
||||
var req models.BuiltinComponent
|
||||
ginx.BindJSON(c, &req)
|
||||
|
||||
bc, err := models.BuiltinComponentGet(rt.Ctx, "id = ?", req.ID)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if bc == nil {
|
||||
ginx.NewRender(c, http.StatusNotFound).Message("No such builtin component")
|
||||
return
|
||||
}
|
||||
|
||||
username := Username(c)
|
||||
req.UpdatedBy = username
|
||||
|
||||
ginx.NewRender(c).Message(bc.Update(rt.Ctx, req))
|
||||
}
|
||||
|
||||
func (rt *Router) builtinComponentsDel(c *gin.Context) {
|
||||
var req idsForm
|
||||
ginx.BindJSON(c, &req)
|
||||
|
||||
req.Verify()
|
||||
|
||||
ginx.NewRender(c).Message(models.BuiltinComponentDels(rt.Ctx, req.Ids))
|
||||
}
|
||||
120
center/router/router_builtin_metric_filter.go
Normal file
120
center/router/router_builtin_metric_filter.go
Normal file
@@ -0,0 +1,120 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/prom"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
func (rt *Router) metricFilterGets(c *gin.Context) {
|
||||
lst, err := models.MetricFilterGets(rt.Ctx, "")
|
||||
ginx.Dangerous(err)
|
||||
me := c.MustGet("user").(*models.User)
|
||||
|
||||
gids, err := models.MyGroupIds(rt.Ctx, me.Id)
|
||||
ginx.Dangerous(err)
|
||||
arr := make([]models.MetricFilter, 0)
|
||||
|
||||
for _, f := range lst {
|
||||
if me.Username == f.CreateBy {
|
||||
arr = append(arr, f)
|
||||
continue
|
||||
}
|
||||
|
||||
if HasPerm(gids, f.GroupsPerm, false) {
|
||||
arr = append(arr, f)
|
||||
}
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(arr, err)
|
||||
}
|
||||
|
||||
func (rt *Router) metricFilterAdd(c *gin.Context) {
|
||||
var f models.MetricFilter
|
||||
ginx.BindJSON(c, &f)
|
||||
me := c.MustGet("user").(*models.User)
|
||||
|
||||
f.CreateBy = me.Username
|
||||
f.UpdateBy = me.Username
|
||||
ginx.Dangerous(f.Add(rt.Ctx))
|
||||
ginx.NewRender(c).Data(f, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) metricFilterDel(c *gin.Context) {
|
||||
var f idsForm
|
||||
ginx.BindJSON(c, &f)
|
||||
f.Verify()
|
||||
|
||||
me := c.MustGet("user").(*models.User)
|
||||
|
||||
for _, id := range f.Ids {
|
||||
old, err := models.MetricFilterGet(rt.Ctx, id)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if me.Username != old.CreateBy {
|
||||
gids, err := models.MyGroupIds(rt.Ctx, me.Id)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if !HasPerm(gids, old.GroupsPerm, true) {
|
||||
ginx.NewRender(c).Message("no permission")
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Message(models.MetricFilterDel(rt.Ctx, f.Ids))
|
||||
}
|
||||
|
||||
func (rt *Router) metricFilterPut(c *gin.Context) {
|
||||
var f models.MetricFilter
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
me := c.MustGet("user").(*models.User)
|
||||
old, err := models.MetricFilterGet(rt.Ctx, f.ID)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if me.Username != old.CreateBy {
|
||||
gids, err := models.MyGroupIds(rt.Ctx, me.Id)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if !HasPerm(gids, old.GroupsPerm, true) {
|
||||
ginx.NewRender(c).Message("no permission")
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
f.UpdateBy = me.Username
|
||||
ginx.NewRender(c).Message(f.Update(rt.Ctx))
|
||||
}
|
||||
|
||||
type metricPromqlReq struct {
|
||||
LabelFilter string `json:"label_filter"`
|
||||
Promql string `json:"promql"`
|
||||
}
|
||||
|
||||
func (rt *Router) getMetricPromql(c *gin.Context) {
|
||||
var req metricPromqlReq
|
||||
ginx.BindJSON(c, &req)
|
||||
|
||||
promql := prom.AddLabelToPromQL(req.LabelFilter, req.Promql)
|
||||
ginx.NewRender(c).Data(promql, nil)
|
||||
}
|
||||
|
||||
func HasPerm(gids []int64, gps []models.GroupPerm, checkWrite bool) bool {
|
||||
gmap := make(map[int64]struct{})
|
||||
for _, gp := range gps {
|
||||
if checkWrite && !gp.Write {
|
||||
continue
|
||||
}
|
||||
gmap[gp.Gid] = struct{}{}
|
||||
}
|
||||
|
||||
for _, gid := range gids {
|
||||
if _, ok := gmap[gid]; ok {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
116
center/router/router_builtin_metrics.go
Normal file
116
center/router/router_builtin_metrics.go
Normal file
@@ -0,0 +1,116 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
)
|
||||
|
||||
// single or import
|
||||
func (rt *Router) builtinMetricsAdd(c *gin.Context) {
|
||||
var lst []models.BuiltinMetric
|
||||
ginx.BindJSON(c, &lst)
|
||||
username := Username(c)
|
||||
count := len(lst)
|
||||
if count == 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, "input json is empty")
|
||||
}
|
||||
|
||||
lang := c.GetHeader("X-Language")
|
||||
if lang == "" {
|
||||
lang = "zh_CN"
|
||||
}
|
||||
|
||||
reterr := make(map[string]string)
|
||||
for i := 0; i < count; i++ {
|
||||
lst[i].Lang = lang
|
||||
lst[i].UUID = time.Now().UnixNano()
|
||||
if err := lst[i].Add(rt.Ctx, username); err != nil {
|
||||
reterr[lst[i].Name] = i18n.Sprintf(c.GetHeader("X-Language"), err.Error())
|
||||
}
|
||||
}
|
||||
ginx.NewRender(c).Data(reterr, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) builtinMetricsGets(c *gin.Context) {
|
||||
collector := ginx.QueryStr(c, "collector", "")
|
||||
typ := ginx.QueryStr(c, "typ", "")
|
||||
query := ginx.QueryStr(c, "query", "")
|
||||
limit := ginx.QueryInt(c, "limit", 20)
|
||||
lang := c.GetHeader("X-Language")
|
||||
unit := ginx.QueryStr(c, "unit", "")
|
||||
if lang == "" {
|
||||
lang = "zh_CN"
|
||||
}
|
||||
|
||||
bm, err := models.BuiltinMetricGets(rt.Ctx, lang, collector, typ, query, unit, limit, ginx.Offset(c, limit))
|
||||
ginx.Dangerous(err)
|
||||
|
||||
total, err := models.BuiltinMetricCount(rt.Ctx, lang, collector, typ, query, unit)
|
||||
ginx.Dangerous(err)
|
||||
ginx.NewRender(c).Data(gin.H{
|
||||
"list": bm,
|
||||
"total": total,
|
||||
}, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) builtinMetricsPut(c *gin.Context) {
|
||||
var req models.BuiltinMetric
|
||||
ginx.BindJSON(c, &req)
|
||||
|
||||
bm, err := models.BuiltinMetricGet(rt.Ctx, "id = ?", req.ID)
|
||||
ginx.Dangerous(err)
|
||||
if bm == nil {
|
||||
ginx.NewRender(c, http.StatusNotFound).Message("No such builtin metric")
|
||||
return
|
||||
}
|
||||
username := Username(c)
|
||||
|
||||
req.UpdatedBy = username
|
||||
ginx.NewRender(c).Message(bm.Update(rt.Ctx, req))
|
||||
}
|
||||
|
||||
func (rt *Router) builtinMetricsDel(c *gin.Context) {
|
||||
var req idsForm
|
||||
ginx.BindJSON(c, &req)
|
||||
req.Verify()
|
||||
|
||||
ginx.NewRender(c).Message(models.BuiltinMetricDels(rt.Ctx, req.Ids))
|
||||
}
|
||||
|
||||
func (rt *Router) builtinMetricsDefaultTypes(c *gin.Context) {
|
||||
lst := []string{
|
||||
"Linux",
|
||||
"cAdvisor",
|
||||
"Ping",
|
||||
"MySQL",
|
||||
"Redis",
|
||||
"Kafka",
|
||||
"Elasticsearch",
|
||||
"PostgreSQL",
|
||||
"MongoDB",
|
||||
"Memcached",
|
||||
}
|
||||
ginx.NewRender(c).Data(lst, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) builtinMetricsTypes(c *gin.Context) {
|
||||
collector := ginx.QueryStr(c, "collector", "")
|
||||
query := ginx.QueryStr(c, "query", "")
|
||||
lang := c.GetHeader("X-Language")
|
||||
|
||||
ginx.NewRender(c).Data(models.BuiltinMetricTypes(rt.Ctx, lang, collector, query))
|
||||
}
|
||||
|
||||
func (rt *Router) builtinMetricsCollectors(c *gin.Context) {
|
||||
typ := ginx.QueryStr(c, "typ", "")
|
||||
query := ginx.QueryStr(c, "query", "")
|
||||
lang := c.GetHeader("X-Language")
|
||||
|
||||
ginx.NewRender(c).Data(models.BuiltinMetricCollectors(rt.Ctx, lang, typ, query))
|
||||
}
|
||||
247
center/router/router_builtin_payload.go
Normal file
247
center/router/router_builtin_payload.go
Normal file
@@ -0,0 +1,247 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
)
|
||||
|
||||
type Board struct {
|
||||
Name string `json:"name"`
|
||||
Tags string `json:"tags"`
|
||||
Configs interface{} `json:"configs"`
|
||||
UUID int64 `json:"uuid"`
|
||||
}
|
||||
|
||||
func (rt *Router) builtinPayloadsAdd(c *gin.Context) {
|
||||
var lst []models.BuiltinPayload
|
||||
ginx.BindJSON(c, &lst)
|
||||
|
||||
username := Username(c)
|
||||
|
||||
count := len(lst)
|
||||
if count == 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, "input json is empty")
|
||||
}
|
||||
|
||||
reterr := make(map[string]string)
|
||||
for i := 0; i < count; i++ {
|
||||
if lst[i].Type == "alert" {
|
||||
if strings.HasPrefix(strings.TrimSpace(lst[i].Content), "[") {
|
||||
// 处理多个告警规则模板的情况
|
||||
alertRules := []models.AlertRule{}
|
||||
if err := json.Unmarshal([]byte(lst[i].Content), &alertRules); err != nil {
|
||||
reterr[lst[i].Name] = err.Error()
|
||||
}
|
||||
|
||||
for _, rule := range alertRules {
|
||||
if rule.UUID == 0 {
|
||||
rule.UUID = time.Now().UnixNano()
|
||||
}
|
||||
|
||||
contentBytes, err := json.Marshal(rule)
|
||||
if err != nil {
|
||||
reterr[rule.Name] = err.Error()
|
||||
continue
|
||||
}
|
||||
|
||||
bp := models.BuiltinPayload{
|
||||
Type: lst[i].Type,
|
||||
Component: lst[i].Component,
|
||||
Cate: lst[i].Cate,
|
||||
Name: rule.Name,
|
||||
Tags: rule.AppendTags,
|
||||
UUID: rule.UUID,
|
||||
Content: string(contentBytes),
|
||||
CreatedBy: username,
|
||||
UpdatedBy: username,
|
||||
}
|
||||
|
||||
if err := bp.Add(rt.Ctx, username); err != nil {
|
||||
reterr[bp.Name] = i18n.Sprintf(c.GetHeader("X-Language"), err.Error())
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
alertRule := models.AlertRule{}
|
||||
if err := json.Unmarshal([]byte(lst[i].Content), &alertRule); err != nil {
|
||||
reterr[lst[i].Name] = err.Error()
|
||||
continue
|
||||
}
|
||||
|
||||
if alertRule.UUID == 0 {
|
||||
alertRule.UUID = time.Now().UnixNano()
|
||||
}
|
||||
|
||||
bp := models.BuiltinPayload{
|
||||
Type: lst[i].Type,
|
||||
Component: lst[i].Component,
|
||||
Cate: lst[i].Cate,
|
||||
Name: alertRule.Name,
|
||||
Tags: alertRule.AppendTags,
|
||||
UUID: alertRule.UUID,
|
||||
Content: lst[i].Content,
|
||||
CreatedBy: username,
|
||||
UpdatedBy: username,
|
||||
}
|
||||
|
||||
if err := bp.Add(rt.Ctx, username); err != nil {
|
||||
reterr[bp.Name] = i18n.Sprintf(c.GetHeader("X-Language"), err.Error())
|
||||
}
|
||||
} else if lst[i].Type == "dashboard" {
|
||||
if strings.HasPrefix(strings.TrimSpace(lst[i].Content), "[") {
|
||||
// 处理多个告警规则模板的情况
|
||||
dashboards := []Board{}
|
||||
if err := json.Unmarshal([]byte(lst[i].Content), &dashboards); err != nil {
|
||||
reterr[lst[i].Name] = err.Error()
|
||||
}
|
||||
|
||||
for _, dashboard := range dashboards {
|
||||
if dashboard.UUID == 0 {
|
||||
dashboard.UUID = time.Now().UnixNano()
|
||||
}
|
||||
|
||||
contentBytes, err := json.Marshal(dashboard)
|
||||
if err != nil {
|
||||
reterr[dashboard.Name] = err.Error()
|
||||
continue
|
||||
}
|
||||
|
||||
bp := models.BuiltinPayload{
|
||||
Type: lst[i].Type,
|
||||
Component: lst[i].Component,
|
||||
Cate: lst[i].Cate,
|
||||
Name: dashboard.Name,
|
||||
Tags: dashboard.Tags,
|
||||
UUID: dashboard.UUID,
|
||||
Content: string(contentBytes),
|
||||
CreatedBy: username,
|
||||
UpdatedBy: username,
|
||||
}
|
||||
|
||||
if err := bp.Add(rt.Ctx, username); err != nil {
|
||||
reterr[bp.Name] = i18n.Sprintf(c.GetHeader("X-Language"), err.Error())
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
dashboard := Board{}
|
||||
if err := json.Unmarshal([]byte(lst[i].Content), &dashboard); err != nil {
|
||||
reterr[lst[i].Name] = i18n.Sprintf(c.GetHeader("X-Language"), err.Error())
|
||||
continue
|
||||
}
|
||||
|
||||
if dashboard.UUID == 0 {
|
||||
dashboard.UUID = time.Now().UnixNano()
|
||||
}
|
||||
|
||||
bp := models.BuiltinPayload{
|
||||
Type: lst[i].Type,
|
||||
Component: lst[i].Component,
|
||||
Cate: lst[i].Cate,
|
||||
Name: dashboard.Name,
|
||||
Tags: dashboard.Tags,
|
||||
UUID: dashboard.UUID,
|
||||
Content: lst[i].Content,
|
||||
CreatedBy: username,
|
||||
UpdatedBy: username,
|
||||
}
|
||||
|
||||
if err := bp.Add(rt.Ctx, username); err != nil {
|
||||
reterr[bp.Name] = i18n.Sprintf(c.GetHeader("X-Language"), err.Error())
|
||||
}
|
||||
} else {
|
||||
if err := lst[i].Add(rt.Ctx, username); err != nil {
|
||||
reterr[lst[i].Name] = i18n.Sprintf(c.GetHeader("X-Language"), err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(reterr, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) builtinPayloadsGets(c *gin.Context) {
|
||||
typ := ginx.QueryStr(c, "type", "")
|
||||
component := ginx.QueryStr(c, "component", "")
|
||||
cate := ginx.QueryStr(c, "cate", "")
|
||||
query := ginx.QueryStr(c, "query", "")
|
||||
|
||||
lst, err := models.BuiltinPayloadGets(rt.Ctx, typ, component, cate, query)
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
}
|
||||
|
||||
func (rt *Router) builtinPayloadcatesGet(c *gin.Context) {
|
||||
typ := ginx.QueryStr(c, "type", "")
|
||||
component := ginx.QueryStr(c, "component", "")
|
||||
|
||||
cates, err := models.BuiltinPayloadCates(rt.Ctx, typ, component)
|
||||
ginx.NewRender(c).Data(cates, err)
|
||||
}
|
||||
|
||||
func (rt *Router) builtinPayloadGet(c *gin.Context) {
|
||||
id := ginx.UrlParamInt64(c, "id")
|
||||
|
||||
bp, err := models.BuiltinPayloadGet(rt.Ctx, "id = ?", id)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusInternalServerError, err.Error())
|
||||
}
|
||||
if bp == nil {
|
||||
ginx.Bomb(http.StatusNotFound, "builtin payload not found")
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(bp, nil)
|
||||
}
|
||||
|
||||
func (rt *Router) builtinPayloadsPut(c *gin.Context) {
|
||||
var req models.BuiltinPayload
|
||||
ginx.BindJSON(c, &req)
|
||||
|
||||
bp, err := models.BuiltinPayloadGet(rt.Ctx, "id = ?", req.ID)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if bp == nil {
|
||||
ginx.NewRender(c, http.StatusNotFound).Message("No such builtin payload")
|
||||
return
|
||||
}
|
||||
|
||||
if req.Type == "alert" {
|
||||
alertRule := models.AlertRule{}
|
||||
if err := json.Unmarshal([]byte(req.Content), &alertRule); err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, err.Error())
|
||||
}
|
||||
|
||||
req.Name = alertRule.Name
|
||||
req.Tags = alertRule.AppendTags
|
||||
} else if req.Type == "dashboard" {
|
||||
dashboard := Board{}
|
||||
if err := json.Unmarshal([]byte(req.Content), &dashboard); err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, err.Error())
|
||||
}
|
||||
|
||||
req.Name = dashboard.Name
|
||||
req.Tags = dashboard.Tags
|
||||
}
|
||||
|
||||
username := Username(c)
|
||||
req.UpdatedBy = username
|
||||
|
||||
ginx.NewRender(c).Message(bp.Update(rt.Ctx, req))
|
||||
}
|
||||
|
||||
func (rt *Router) builtinPayloadsDel(c *gin.Context) {
|
||||
var req idsForm
|
||||
ginx.BindJSON(c, &req)
|
||||
|
||||
req.Verify()
|
||||
|
||||
ginx.NewRender(c).Message(models.BuiltinPayloadDels(rt.Ctx, req.Ids))
|
||||
}
|
||||
@@ -1,13 +1,16 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
const EMBEDDEDDASHBOARD = "embedded-dashboards"
|
||||
|
||||
func (rt *Router) configsGet(c *gin.Context) {
|
||||
prefix := ginx.QueryStr(c, "prefix", "")
|
||||
limit := ginx.QueryInt(c, "limit", 10)
|
||||
@@ -33,6 +36,18 @@ func (rt *Router) configPutByKey(c *gin.Context) {
|
||||
ginx.NewRender(c).Message(models.ConfigsSetWithUname(rt.Ctx, f.Ckey, f.Cval, username))
|
||||
}
|
||||
|
||||
func (rt *Router) embeddedDashboardsGet(c *gin.Context) {
|
||||
config, err := models.ConfigsGet(rt.Ctx, EMBEDDEDDASHBOARD)
|
||||
ginx.NewRender(c).Data(config, err)
|
||||
}
|
||||
|
||||
func (rt *Router) embeddedDashboardsPut(c *gin.Context) {
|
||||
var f models.Configs
|
||||
ginx.BindJSON(c, &f)
|
||||
username := c.MustGet("username").(string)
|
||||
ginx.NewRender(c).Message(models.ConfigsSetWithUname(rt.Ctx, EMBEDDEDDASHBOARD, f.Cval, username))
|
||||
}
|
||||
|
||||
func (rt *Router) configsDel(c *gin.Context) {
|
||||
var f idsForm
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
@@ -55,12 +55,19 @@ func (rt *Router) datasourceBriefs(c *gin.Context) {
|
||||
list, err := models.GetDatasourcesGetsBy(rt.Ctx, "", "", "", "")
|
||||
ginx.Dangerous(err)
|
||||
|
||||
for i := range list {
|
||||
dss = append(dss, &models.Datasource{
|
||||
Id: list[i].Id,
|
||||
Name: list[i].Name,
|
||||
PluginType: list[i].PluginType,
|
||||
})
|
||||
for _, item := range list {
|
||||
item.AuthJson.BasicAuthPassword = ""
|
||||
if item.PluginType != models.PROMETHEUS {
|
||||
item.SettingsJson = nil
|
||||
} else {
|
||||
for k, v := range item.SettingsJson {
|
||||
if strings.HasPrefix(k, "prometheus.") {
|
||||
item.SettingsJson[strings.TrimPrefix(k, "prometheus.")] = v
|
||||
delete(item.SettingsJson, k)
|
||||
}
|
||||
}
|
||||
}
|
||||
dss = append(dss, item)
|
||||
}
|
||||
|
||||
if !rt.Center.AnonymousAccess.PromQuerier {
|
||||
|
||||
@@ -1,17 +1,14 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/aconf"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ibex"
|
||||
"github.com/gin-gonic/gin"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
)
|
||||
|
||||
@@ -69,7 +66,8 @@ func queryDatasourceIds(c *gin.Context) []int64 {
|
||||
}
|
||||
|
||||
type idsForm struct {
|
||||
Ids []int64 `json:"ids"`
|
||||
Ids []int64 `json:"ids"`
|
||||
IsSyncToFlashDuty bool `json:"is_sync_to_flashduty"`
|
||||
}
|
||||
|
||||
func (f idsForm) Verify() {
|
||||
@@ -134,31 +132,6 @@ type TaskCreateReply struct {
|
||||
Dat int64 `json:"dat"` // task.id
|
||||
}
|
||||
|
||||
// return task.id, error
|
||||
func TaskCreate(v interface{}, ibexc aconf.Ibex) (int64, error) {
|
||||
var res TaskCreateReply
|
||||
err := ibex.New(
|
||||
ibexc.Address,
|
||||
ibexc.BasicAuthUser,
|
||||
ibexc.BasicAuthPass,
|
||||
ibexc.Timeout,
|
||||
).
|
||||
Path("/ibex/v1/tasks").
|
||||
In(v).
|
||||
Out(&res).
|
||||
POST()
|
||||
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if res.Err != "" {
|
||||
return 0, fmt.Errorf("response.err: %v", res.Err)
|
||||
}
|
||||
|
||||
return res.Dat, nil
|
||||
}
|
||||
|
||||
func Username(c *gin.Context) string {
|
||||
username := c.GetString(gin.AuthUserKey)
|
||||
if username == "" {
|
||||
|
||||
@@ -3,19 +3,33 @@ package router
|
||||
import (
|
||||
"compress/gzip"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/center/metas"
|
||||
"github.com/ccfos/nightingale/v6/memsto"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pushgw/idents"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
type HeartbeatHookFunc func(ident string) map[string]interface{}
|
||||
|
||||
func (rt *Router) heartbeat(c *gin.Context) {
|
||||
req, err := HandleHeartbeat(c, rt.Ctx, rt.Alert.Heartbeat.EngineName, rt.MetaSet, rt.IdentSet, rt.TargetCache)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
m := rt.HeartbeatHook(req.Hostname)
|
||||
ginx.NewRender(c).Data(m, err)
|
||||
}
|
||||
|
||||
func HandleHeartbeat(c *gin.Context, ctx *ctx.Context, engineName string, metaSet *metas.Set, identSet *idents.Set, targetCache *memsto.TargetCacheType) (models.HostMeta, error) {
|
||||
var bs []byte
|
||||
var err error
|
||||
var r *gzip.Reader
|
||||
@@ -24,7 +38,7 @@ func (rt *Router) heartbeat(c *gin.Context) {
|
||||
r, err = gzip.NewReader(c.Request.Body)
|
||||
if err != nil {
|
||||
c.String(400, err.Error())
|
||||
return
|
||||
return req, err
|
||||
}
|
||||
defer r.Close()
|
||||
bs, err = ioutil.ReadAll(r)
|
||||
@@ -32,11 +46,19 @@ func (rt *Router) heartbeat(c *gin.Context) {
|
||||
} else {
|
||||
defer c.Request.Body.Close()
|
||||
bs, err = ioutil.ReadAll(c.Request.Body)
|
||||
ginx.Dangerous(err)
|
||||
if err != nil {
|
||||
return req, err
|
||||
}
|
||||
}
|
||||
|
||||
err = json.Unmarshal(bs, &req)
|
||||
ginx.Dangerous(err)
|
||||
if err != nil {
|
||||
return req, err
|
||||
}
|
||||
|
||||
if req.Hostname == "" {
|
||||
return req, fmt.Errorf("hostname is required", 400)
|
||||
}
|
||||
|
||||
// maybe from pushgw
|
||||
if req.Offset == 0 {
|
||||
@@ -47,44 +69,66 @@ func (rt *Router) heartbeat(c *gin.Context) {
|
||||
req.RemoteAddr = c.ClientIP()
|
||||
}
|
||||
|
||||
rt.MetaSet.Set(req.Hostname, req)
|
||||
if req.EngineName == "" {
|
||||
req.EngineName = engineName
|
||||
}
|
||||
|
||||
metaSet.Set(req.Hostname, req)
|
||||
var items = make(map[string]struct{})
|
||||
items[req.Hostname] = struct{}{}
|
||||
rt.IdentSet.MSet(items)
|
||||
identSet.MSet(items)
|
||||
|
||||
if target, has := rt.TargetCache.Get(req.Hostname); has && target != nil {
|
||||
if target, has := targetCache.Get(req.Hostname); has && target != nil {
|
||||
gid := ginx.QueryInt64(c, "gid", 0)
|
||||
hostIp := strings.TrimSpace(req.HostIp)
|
||||
|
||||
filed := make(map[string]interface{})
|
||||
field := make(map[string]interface{})
|
||||
if gid != 0 && gid != target.GroupId {
|
||||
filed["group_id"] = gid
|
||||
field["group_id"] = gid
|
||||
}
|
||||
|
||||
if hostIp != "" && hostIp != target.HostIp {
|
||||
filed["host_ip"] = hostIp
|
||||
field["host_ip"] = hostIp
|
||||
}
|
||||
|
||||
if len(req.GlobalLabels) > 0 {
|
||||
tagsMap := target.GetTagsMap()
|
||||
tagNeedUpdate := false
|
||||
for k, v := range req.GlobalLabels {
|
||||
if v == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
if tagv, ok := tagsMap[k]; !ok || tagv != v {
|
||||
tagNeedUpdate = true
|
||||
tagsMap[k] = v
|
||||
}
|
||||
}
|
||||
|
||||
if tagNeedUpdate {
|
||||
lst := []string{}
|
||||
for k, v := range req.GlobalLabels {
|
||||
for k, v := range tagsMap {
|
||||
lst = append(lst, k+"="+v)
|
||||
}
|
||||
sort.Strings(lst)
|
||||
labels := strings.Join(lst, " ")
|
||||
if target.Tags != labels {
|
||||
filed["tags"] = labels
|
||||
}
|
||||
labels := strings.Join(lst, " ") + " "
|
||||
field["tags"] = labels
|
||||
}
|
||||
|
||||
if len(filed) > 0 {
|
||||
err := target.UpdateFieldsMap(rt.Ctx, filed)
|
||||
if req.EngineName != "" && req.EngineName != target.EngineName {
|
||||
field["engine_name"] = req.EngineName
|
||||
}
|
||||
|
||||
if req.AgentVersion != "" && req.AgentVersion != target.AgentVersion {
|
||||
field["agent_version"] = req.AgentVersion
|
||||
}
|
||||
|
||||
if len(field) > 0 {
|
||||
err := target.UpdateFieldsMap(ctx, field)
|
||||
if err != nil {
|
||||
logger.Errorf("update target fields failed, err: %v", err)
|
||||
}
|
||||
}
|
||||
logger.Debugf("heartbeat field:%+v target: %v", filed, *target)
|
||||
logger.Debugf("heartbeat field:%+v target: %v", field, *target)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Message(err)
|
||||
return req, nil
|
||||
}
|
||||
|
||||
@@ -6,7 +6,6 @@ import (
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/cas"
|
||||
@@ -14,10 +13,10 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/pkg/oauth2x"
|
||||
"github.com/ccfos/nightingale/v6/pkg/oidcx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/secu"
|
||||
"github.com/pelletier/go-toml/v2"
|
||||
|
||||
"github.com/dgrijalva/jwt-go"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/pelletier/go-toml/v2"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
@@ -51,22 +50,26 @@ func (rt *Router) loginPost(c *gin.Context) {
|
||||
}
|
||||
authPassWord = decPassWord
|
||||
}
|
||||
user, err := models.PassLogin(rt.Ctx, f.Username, authPassWord)
|
||||
if err != nil {
|
||||
// pass validate fail, try ldap
|
||||
if rt.Sso.LDAP.Enable {
|
||||
roles := strings.Join(rt.Sso.LDAP.DefaultRoles, " ")
|
||||
user, err = models.LdapLogin(rt.Ctx, f.Username, authPassWord, roles, rt.Sso.LDAP)
|
||||
if err != nil {
|
||||
logger.Debugf("ldap login failed: %v username: %s", err, f.Username)
|
||||
ginx.NewRender(c).Message(err)
|
||||
|
||||
var user *models.User
|
||||
var err error
|
||||
lc := rt.Sso.LDAP.Copy()
|
||||
if lc.Enable {
|
||||
user, err = ldapx.LdapLogin(rt.Ctx, f.Username, authPassWord, lc.DefaultRoles, lc.DefaultTeams, lc)
|
||||
if err != nil {
|
||||
logger.Debugf("ldap login failed: %v username: %s", err, f.Username)
|
||||
var errLoginInN9e error
|
||||
// to use n9e as the minimum guarantee for login
|
||||
if user, errLoginInN9e = models.PassLogin(rt.Ctx, rt.Redis, f.Username, authPassWord); errLoginInN9e != nil {
|
||||
ginx.NewRender(c).Message("ldap login failed: %v; n9e login failed: %v", err, errLoginInN9e)
|
||||
return
|
||||
}
|
||||
user.RolesLst = strings.Fields(user.Roles)
|
||||
} else {
|
||||
ginx.NewRender(c).Message(err)
|
||||
return
|
||||
user.RolesLst = strings.Fields(user.Roles)
|
||||
}
|
||||
} else {
|
||||
user, err = models.PassLogin(rt.Ctx, rt.Redis, f.Username, authPassWord)
|
||||
ginx.Dangerous(err)
|
||||
}
|
||||
|
||||
if user == nil {
|
||||
@@ -89,7 +92,7 @@ func (rt *Router) loginPost(c *gin.Context) {
|
||||
}
|
||||
|
||||
func (rt *Router) logoutPost(c *gin.Context) {
|
||||
logger.Infof("username:%s login from:%s", c.GetString("username"), c.ClientIP())
|
||||
logger.Infof("username:%s logout from:%s", c.GetString("username"), c.ClientIP())
|
||||
metadata, err := rt.extractTokenMetadata(c.Request)
|
||||
if err != nil {
|
||||
ginx.NewRender(c, http.StatusBadRequest).Message("failed to parse jwt token")
|
||||
@@ -102,7 +105,18 @@ func (rt *Router) logoutPost(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Message("")
|
||||
var logoutAddr string
|
||||
user := c.MustGet("user").(*models.User)
|
||||
switch user.Belong {
|
||||
case "oidc":
|
||||
logoutAddr = rt.Sso.OIDC.GetSsoLogoutAddr()
|
||||
case "cas":
|
||||
logoutAddr = rt.Sso.CAS.GetSsoLogoutAddr()
|
||||
case "oauth2":
|
||||
logoutAddr = rt.Sso.OAuth2.GetSsoLogoutAddr()
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(logoutAddr, nil)
|
||||
}
|
||||
|
||||
type refreshForm struct {
|
||||
@@ -240,41 +254,23 @@ func (rt *Router) loginCallback(c *gin.Context) {
|
||||
|
||||
if user != nil {
|
||||
if rt.Sso.OIDC.CoverAttributes {
|
||||
if ret.Nickname != "" {
|
||||
user.Nickname = ret.Nickname
|
||||
}
|
||||
|
||||
if ret.Email != "" {
|
||||
user.Email = ret.Email
|
||||
}
|
||||
|
||||
if ret.Phone != "" {
|
||||
user.Phone = ret.Phone
|
||||
}
|
||||
|
||||
user.UpdateAt = time.Now().Unix()
|
||||
user.Update(rt.Ctx, "email", "nickname", "phone", "update_at")
|
||||
updatedFields := user.UpdateSsoFields("oidc", ret.Nickname, ret.Phone, ret.Email)
|
||||
ginx.Dangerous(user.Update(rt.Ctx, "update_at", updatedFields...))
|
||||
}
|
||||
} else {
|
||||
now := time.Now().Unix()
|
||||
user = &models.User{
|
||||
Username: ret.Username,
|
||||
Password: "******",
|
||||
Nickname: ret.Nickname,
|
||||
Phone: ret.Phone,
|
||||
Email: ret.Email,
|
||||
Portrait: "",
|
||||
Roles: strings.Join(rt.Sso.OIDC.DefaultRoles, " "),
|
||||
RolesLst: rt.Sso.OIDC.DefaultRoles,
|
||||
Contacts: []byte("{}"),
|
||||
CreateAt: now,
|
||||
UpdateAt: now,
|
||||
CreateBy: "oidc",
|
||||
UpdateBy: "oidc",
|
||||
}
|
||||
|
||||
user = new(models.User)
|
||||
user.FullSsoFields("oidc", ret.Username, ret.Nickname, ret.Phone, ret.Email, rt.Sso.OIDC.DefaultRoles)
|
||||
// create user from oidc
|
||||
ginx.Dangerous(user.Add(rt.Ctx))
|
||||
|
||||
if len(rt.Sso.OIDC.DefaultTeams) > 0 {
|
||||
for _, gid := range rt.Sso.OIDC.DefaultTeams {
|
||||
err = models.UserGroupMemberAdd(rt.Ctx, gid, user.Id)
|
||||
if err != nil {
|
||||
logger.Errorf("user:%v UserGroupMemberAdd: %s", user, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// set user login state
|
||||
@@ -350,38 +346,12 @@ func (rt *Router) loginCallbackCas(c *gin.Context) {
|
||||
ginx.Dangerous(err)
|
||||
if user != nil {
|
||||
if rt.Sso.CAS.CoverAttributes {
|
||||
if ret.Nickname != "" {
|
||||
user.Nickname = ret.Nickname
|
||||
}
|
||||
|
||||
if ret.Email != "" {
|
||||
user.Email = ret.Email
|
||||
}
|
||||
|
||||
if ret.Phone != "" {
|
||||
user.Phone = ret.Phone
|
||||
}
|
||||
|
||||
user.UpdateAt = time.Now().Unix()
|
||||
ginx.Dangerous(user.Update(rt.Ctx, "email", "nickname", "phone", "update_at"))
|
||||
updatedFields := user.UpdateSsoFields("cas", ret.Nickname, ret.Phone, ret.Email)
|
||||
ginx.Dangerous(user.Update(rt.Ctx, "update_at", updatedFields...))
|
||||
}
|
||||
} else {
|
||||
now := time.Now().Unix()
|
||||
user = &models.User{
|
||||
Username: ret.Username,
|
||||
Password: "******",
|
||||
Nickname: ret.Nickname,
|
||||
Portrait: "",
|
||||
Roles: strings.Join(rt.Sso.CAS.DefaultRoles, " "),
|
||||
RolesLst: rt.Sso.CAS.DefaultRoles,
|
||||
Contacts: []byte("{}"),
|
||||
Phone: ret.Phone,
|
||||
Email: ret.Email,
|
||||
CreateAt: now,
|
||||
UpdateAt: now,
|
||||
CreateBy: "CAS",
|
||||
UpdateBy: "CAS",
|
||||
}
|
||||
user = new(models.User)
|
||||
user.FullSsoFields("cas", ret.Username, ret.Nickname, ret.Phone, ret.Email, rt.Sso.CAS.DefaultRoles)
|
||||
// create user from cas
|
||||
ginx.Dangerous(user.Add(rt.Ctx))
|
||||
}
|
||||
@@ -452,39 +422,12 @@ func (rt *Router) loginCallbackOAuth(c *gin.Context) {
|
||||
|
||||
if user != nil {
|
||||
if rt.Sso.OAuth2.CoverAttributes {
|
||||
if ret.Nickname != "" {
|
||||
user.Nickname = ret.Nickname
|
||||
}
|
||||
|
||||
if ret.Email != "" {
|
||||
user.Email = ret.Email
|
||||
}
|
||||
|
||||
if ret.Phone != "" {
|
||||
user.Phone = ret.Phone
|
||||
}
|
||||
|
||||
user.UpdateAt = time.Now().Unix()
|
||||
user.Update(rt.Ctx, "email", "nickname", "phone", "update_at")
|
||||
updatedFields := user.UpdateSsoFields("oauth2", ret.Nickname, ret.Phone, ret.Email)
|
||||
ginx.Dangerous(user.Update(rt.Ctx, "update_at", updatedFields...))
|
||||
}
|
||||
} else {
|
||||
now := time.Now().Unix()
|
||||
user = &models.User{
|
||||
Username: ret.Username,
|
||||
Password: "******",
|
||||
Nickname: ret.Nickname,
|
||||
Phone: ret.Phone,
|
||||
Email: ret.Email,
|
||||
Portrait: "",
|
||||
Roles: strings.Join(rt.Sso.OAuth2.DefaultRoles, " "),
|
||||
RolesLst: rt.Sso.OAuth2.DefaultRoles,
|
||||
Contacts: []byte("{}"),
|
||||
CreateAt: now,
|
||||
UpdateAt: now,
|
||||
CreateBy: "oauth2",
|
||||
UpdateBy: "oauth2",
|
||||
}
|
||||
|
||||
user = new(models.User)
|
||||
user.FullSsoFields("oauth2", ret.Username, ret.Nickname, ret.Phone, ret.Email, rt.Sso.OAuth2.DefaultRoles)
|
||||
// create user from oidc
|
||||
ginx.Dangerous(user.Add(rt.Ctx))
|
||||
}
|
||||
|
||||
@@ -22,14 +22,23 @@ func (rt *Router) alertMuteGetsByBG(c *gin.Context) {
|
||||
}
|
||||
|
||||
func (rt *Router) alertMuteGetsByGids(c *gin.Context) {
|
||||
gids := str.IdsInt64(ginx.QueryStr(c, "gids"), ",")
|
||||
if len(gids) == 0 {
|
||||
ginx.NewRender(c, http.StatusBadRequest).Message("arg(gids) is empty")
|
||||
return
|
||||
}
|
||||
gids := str.IdsInt64(ginx.QueryStr(c, "gids", ""), ",")
|
||||
if len(gids) > 0 {
|
||||
for _, gid := range gids {
|
||||
rt.bgroCheck(c, gid)
|
||||
}
|
||||
} else {
|
||||
me := c.MustGet("user").(*models.User)
|
||||
if !me.IsAdmin() {
|
||||
var err error
|
||||
gids, err = models.MyBusiGroupIds(rt.Ctx, me.Id)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
for _, gid := range gids {
|
||||
rt.bgroCheck(c, gid)
|
||||
if len(gids) == 0 {
|
||||
ginx.NewRender(c).Data([]int{}, nil)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
lst, err := models.AlertMuteGetsByBGIds(rt.Ctx, gids)
|
||||
@@ -86,7 +95,8 @@ func (rt *Router) alertMuteAddByService(c *gin.Context) {
|
||||
var f models.AlertMute
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
ginx.NewRender(c).Message(f.Add(rt.Ctx))
|
||||
err := f.Add(rt.Ctx)
|
||||
ginx.NewRender(c).Data(f.Id, err)
|
||||
}
|
||||
|
||||
func (rt *Router) alertMuteDel(c *gin.Context) {
|
||||
@@ -97,6 +107,14 @@ func (rt *Router) alertMuteDel(c *gin.Context) {
|
||||
ginx.NewRender(c).Message(models.AlertMuteDel(rt.Ctx, f.Ids))
|
||||
}
|
||||
|
||||
// alertMuteGet returns the alert mute by ID
|
||||
func (rt *Router) alertMuteGet(c *gin.Context) {
|
||||
amid := ginx.UrlParamInt64(c, "amid")
|
||||
am, err := models.AlertMuteGetById(rt.Ctx, amid)
|
||||
am.DB2FE()
|
||||
ginx.NewRender(c).Data(am, err)
|
||||
}
|
||||
|
||||
func (rt *Router) alertMutePutByFE(c *gin.Context) {
|
||||
var f models.AlertMute
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
@@ -92,6 +92,10 @@ func (rt *Router) jwtAuth() gin.HandlerFunc {
|
||||
}
|
||||
}
|
||||
|
||||
func (rt *Router) Auth() gin.HandlerFunc {
|
||||
return rt.auth()
|
||||
}
|
||||
|
||||
func (rt *Router) auth() gin.HandlerFunc {
|
||||
if rt.HTTP.ProxyAuth.Enable {
|
||||
return rt.proxyAuth()
|
||||
@@ -120,6 +124,10 @@ func (rt *Router) jwtMock() gin.HandlerFunc {
|
||||
}
|
||||
}
|
||||
|
||||
func (rt *Router) User() gin.HandlerFunc {
|
||||
return rt.user()
|
||||
}
|
||||
|
||||
func (rt *Router) user() gin.HandlerFunc {
|
||||
return func(c *gin.Context) {
|
||||
userid := c.MustGet("userid").(int64)
|
||||
@@ -135,6 +143,8 @@ func (rt *Router) user() gin.HandlerFunc {
|
||||
|
||||
c.Set("user", user)
|
||||
c.Set("isadmin", user.IsAdmin())
|
||||
// Update user.LastActiveTime
|
||||
rt.UserCache.SetLastActiveTime(user.Id, time.Now().Unix())
|
||||
c.Next()
|
||||
}
|
||||
}
|
||||
@@ -174,6 +184,10 @@ func (rt *Router) bgro() gin.HandlerFunc {
|
||||
}
|
||||
|
||||
// bgrw 逐步要被干掉,不安全
|
||||
func (rt *Router) Bgrw() gin.HandlerFunc {
|
||||
return rt.bgrw()
|
||||
}
|
||||
|
||||
func (rt *Router) bgrw() gin.HandlerFunc {
|
||||
return func(c *gin.Context) {
|
||||
me := c.MustGet("user").(*models.User)
|
||||
@@ -233,6 +247,10 @@ func (rt *Router) bgroCheck(c *gin.Context, bgid int64) {
|
||||
c.Set("busi_group", bg)
|
||||
}
|
||||
|
||||
func (rt *Router) Perm(operation string) gin.HandlerFunc {
|
||||
return rt.perm(operation)
|
||||
}
|
||||
|
||||
func (rt *Router) perm(operation string) gin.HandlerFunc {
|
||||
return func(c *gin.Context) {
|
||||
me := c.MustGet("user").(*models.User)
|
||||
|
||||
@@ -90,7 +90,8 @@ func (rt *Router) notifyChannelPuts(c *gin.Context) {
|
||||
var notifyChannels []models.NotifyChannel
|
||||
ginx.BindJSON(c, ¬ifyChannels)
|
||||
|
||||
channels := []string{models.Dingtalk, models.Wecom, models.Feishu, models.Mm, models.Telegram, models.Email}
|
||||
channels := []string{models.Dingtalk, models.Wecom, models.Feishu, models.Mm, models.Telegram,
|
||||
models.Email, models.Lark, models.LarkCard}
|
||||
|
||||
m := make(map[string]struct{})
|
||||
for _, v := range notifyChannels {
|
||||
@@ -126,7 +127,8 @@ func (rt *Router) notifyContactPuts(c *gin.Context) {
|
||||
var notifyContacts []models.NotifyContact
|
||||
ginx.BindJSON(c, ¬ifyContacts)
|
||||
|
||||
keys := []string{models.DingtalkKey, models.WecomKey, models.FeishuKey, models.MmKey, models.TelegramKey}
|
||||
keys := []string{models.DingtalkKey, models.WecomKey, models.FeishuKey, models.MmKey,
|
||||
models.TelegramKey, models.LarkKey}
|
||||
|
||||
m := make(map[string]struct{})
|
||||
for _, v := range notifyContacts {
|
||||
@@ -169,10 +171,6 @@ func (rt *Router) notifyConfigPut(c *gin.Context) {
|
||||
var smtp aconf.SMTPConfig
|
||||
err := toml.Unmarshal([]byte(text), &smtp)
|
||||
ginx.Dangerous(err)
|
||||
case models.IBEX:
|
||||
var ibex aconf.Ibex
|
||||
err := toml.Unmarshal([]byte(f.Cval), &ibex)
|
||||
ginx.Dangerous(err)
|
||||
default:
|
||||
ginx.Bomb(200, "key %s can not modify", f.Ckey)
|
||||
}
|
||||
|
||||
@@ -21,14 +21,23 @@ func (rt *Router) recordingRuleGets(c *gin.Context) {
|
||||
}
|
||||
|
||||
func (rt *Router) recordingRuleGetsByGids(c *gin.Context) {
|
||||
gids := str.IdsInt64(ginx.QueryStr(c, "gids"), ",")
|
||||
if len(gids) == 0 {
|
||||
ginx.NewRender(c, http.StatusBadRequest).Message("arg(gids) is empty")
|
||||
return
|
||||
}
|
||||
gids := str.IdsInt64(ginx.QueryStr(c, "gids", ""), ",")
|
||||
if len(gids) > 0 {
|
||||
for _, gid := range gids {
|
||||
rt.bgroCheck(c, gid)
|
||||
}
|
||||
} else {
|
||||
me := c.MustGet("user").(*models.User)
|
||||
if !me.IsAdmin() {
|
||||
var err error
|
||||
gids, err = models.MyBusiGroupIds(rt.Ctx, me.Id)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
for _, gid := range gids {
|
||||
rt.bgroCheck(c, gid)
|
||||
if len(gids) == 0 {
|
||||
ginx.NewRender(c).Data([]int{}, nil)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ars, err := models.RecordingRuleGetsByBGIds(rt.Ctx, gids)
|
||||
|
||||
@@ -2,10 +2,13 @@ package router
|
||||
|
||||
import (
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/flashduty"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ormx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/secu"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
func (rt *Router) selfProfileGet(c *gin.Context) {
|
||||
@@ -29,6 +32,11 @@ func (rt *Router) selfProfilePut(c *gin.Context) {
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
user := c.MustGet("user").(*models.User)
|
||||
oldInfo := models.User{
|
||||
Username: user.Username,
|
||||
Phone: user.Phone,
|
||||
Email: user.Email,
|
||||
}
|
||||
user.Nickname = f.Nickname
|
||||
user.Phone = f.Phone
|
||||
user.Email = f.Email
|
||||
@@ -36,6 +44,10 @@ func (rt *Router) selfProfilePut(c *gin.Context) {
|
||||
user.Contacts = f.Contacts
|
||||
user.UpdateBy = user.Username
|
||||
|
||||
if flashduty.NeedSyncUser(rt.Ctx) {
|
||||
flashduty.UpdateUser(rt.Ctx, oldInfo, f.Email, f.Phone)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Message(user.UpdateAllFields(rt.Ctx))
|
||||
}
|
||||
|
||||
@@ -48,5 +60,25 @@ func (rt *Router) selfPasswordPut(c *gin.Context) {
|
||||
var f selfPasswordForm
|
||||
ginx.BindJSON(c, &f)
|
||||
user := c.MustGet("user").(*models.User)
|
||||
ginx.NewRender(c).Message(user.ChangePassword(rt.Ctx, f.OldPass, f.NewPass))
|
||||
|
||||
newPassWord := f.NewPass
|
||||
oldPassWord := f.OldPass
|
||||
if rt.HTTP.RSA.OpenRSA {
|
||||
var err error
|
||||
newPassWord, err = secu.Decrypt(f.NewPass, rt.HTTP.RSA.RSAPrivateKey, rt.HTTP.RSA.RSAPassWord)
|
||||
if err != nil {
|
||||
logger.Errorf("RSA Decrypt failed: %v username: %s", err, user.Username)
|
||||
ginx.NewRender(c).Message(err)
|
||||
return
|
||||
}
|
||||
|
||||
oldPassWord, err = secu.Decrypt(f.OldPass, rt.HTTP.RSA.RSAPrivateKey, rt.HTTP.RSA.RSAPassWord)
|
||||
if err != nil {
|
||||
logger.Errorf("RSA Decrypt failed: %v username: %s", err, user.Username)
|
||||
ginx.NewRender(c).Message(err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Message(user.ChangePassword(rt.Ctx, oldPassWord, newPassWord))
|
||||
}
|
||||
|
||||
@@ -27,7 +27,7 @@ func (rt *Router) serverHeartbeat(c *gin.Context) {
|
||||
}
|
||||
|
||||
func (rt *Router) serversActive(c *gin.Context) {
|
||||
datasourceId := ginx.QueryInt64(c, "dsid")
|
||||
datasourceId := ginx.QueryInt64(c, "dsid", 0)
|
||||
engineName := ginx.QueryStr(c, "engine_name", "")
|
||||
if engineName != "" {
|
||||
servers, err := models.AlertingEngineGetsInstances(rt.Ctx, "engine_cluster = ? and clock > ?", engineName, time.Now().Unix()-30)
|
||||
@@ -35,6 +35,10 @@ func (rt *Router) serversActive(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
if datasourceId == 0 {
|
||||
ginx.NewRender(c).Message("dsid is required")
|
||||
return
|
||||
}
|
||||
servers, err := models.AlertingEngineGetsInstances(rt.Ctx, "datasource_id = ? and clock > ?", datasourceId, time.Now().Unix()-30)
|
||||
ginx.NewRender(c).Data(servers, err)
|
||||
}
|
||||
|
||||
@@ -49,26 +49,33 @@ func (rt *Router) targetGets(c *gin.Context) {
|
||||
downtime := ginx.QueryInt64(c, "downtime", 0)
|
||||
dsIds := queryDatasourceIds(c)
|
||||
|
||||
order := ginx.QueryStr(c, "order", "ident")
|
||||
desc := ginx.QueryBool(c, "desc", false)
|
||||
|
||||
var err error
|
||||
if len(bgids) == 0 {
|
||||
user := c.MustGet("user").(*models.User)
|
||||
if !user.IsAdmin() {
|
||||
// 如果是非 admin 用户,全部对象的情况,找到用户有权限的业务组
|
||||
userGroupIds, err := models.MyGroupIds(rt.Ctx, user.Id)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
bgids, err = models.BusiGroupIds(rt.Ctx, userGroupIds)
|
||||
var err error
|
||||
bgids, err = models.MyBusiGroupIds(rt.Ctx, user.Id)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
// 将未分配业务组的对象也加入到列表中
|
||||
bgids = append(bgids, 0)
|
||||
}
|
||||
}
|
||||
|
||||
total, err := models.TargetTotal(rt.Ctx, bgids, dsIds, query, downtime)
|
||||
options := []models.BuildTargetWhereOption{
|
||||
models.BuildTargetWhereWithBgids(bgids),
|
||||
models.BuildTargetWhereWithDsIds(dsIds),
|
||||
models.BuildTargetWhereWithQuery(query),
|
||||
models.BuildTargetWhereWithDowntime(downtime),
|
||||
}
|
||||
total, err := models.TargetTotal(rt.Ctx, options...)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
list, err := models.TargetGets(rt.Ctx, bgids, dsIds, query, downtime, limit, ginx.Offset(c, limit))
|
||||
list, err := models.TargetGets(rt.Ctx, limit,
|
||||
ginx.Offset(c, limit), order, desc, options...)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if err == nil {
|
||||
@@ -155,173 +162,242 @@ func (rt *Router) targetGetTags(c *gin.Context) {
|
||||
}
|
||||
|
||||
type targetTagsForm struct {
|
||||
Idents []string `json:"idents" binding:"required"`
|
||||
Tags []string `json:"tags" binding:"required"`
|
||||
Idents []string `json:"idents" binding:"required_without=HostIps"`
|
||||
HostIps []string `json:"host_ips" binding:"required_without=Idents"`
|
||||
Tags []string `json:"tags" binding:"required"`
|
||||
}
|
||||
|
||||
func (rt *Router) targetBindTagsByFE(c *gin.Context) {
|
||||
var f targetTagsForm
|
||||
var err error
|
||||
var failedResults = make(map[string]string)
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
if len(f.Idents) == 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, "idents empty")
|
||||
if len(f.Idents) == 0 && len(f.HostIps) == 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, "idents or host_ips must be provided")
|
||||
}
|
||||
// Acquire idents by idents and hostIps
|
||||
failedResults, f.Idents, err = models.TargetsGetIdentsByIdentsAndHostIps(rt.Ctx, f.Idents, f.HostIps)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, err.Error())
|
||||
}
|
||||
|
||||
rt.checkTargetPerm(c, f.Idents)
|
||||
|
||||
ginx.NewRender(c).Message(rt.targetBindTags(f))
|
||||
ginx.NewRender(c).Data(rt.targetBindTags(f, failedResults))
|
||||
}
|
||||
|
||||
func (rt *Router) targetBindTagsByService(c *gin.Context) {
|
||||
var f targetTagsForm
|
||||
var err error
|
||||
var failedResults = make(map[string]string)
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
if len(f.Idents) == 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, "idents empty")
|
||||
if len(f.Idents) == 0 && len(f.HostIps) == 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, "idents or host_ips must be provided")
|
||||
}
|
||||
// Acquire idents by idents and hostIps
|
||||
failedResults, f.Idents, err = models.TargetsGetIdentsByIdentsAndHostIps(rt.Ctx, f.Idents, f.HostIps)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, err.Error())
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Message(rt.targetBindTags(f))
|
||||
ginx.NewRender(c).Data(rt.targetBindTags(f, failedResults))
|
||||
}
|
||||
|
||||
func (rt *Router) targetBindTags(f targetTagsForm) error {
|
||||
for i := 0; i < len(f.Tags); i++ {
|
||||
arr := strings.Split(f.Tags[i], "=")
|
||||
func (rt *Router) targetBindTags(f targetTagsForm, failedIdents map[string]string) (map[string]string, error) {
|
||||
// 1. Check tags
|
||||
if err := rt.validateTags(f.Tags); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// 2. Acquire targets by idents
|
||||
targets, err := models.TargetsGetByIdents(rt.Ctx, f.Idents)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// 3. Add tags to targets
|
||||
for _, target := range targets {
|
||||
if err = rt.addTagsToTarget(target, f.Tags); err != nil {
|
||||
failedIdents[target.Ident] = err.Error()
|
||||
}
|
||||
}
|
||||
|
||||
return failedIdents, nil
|
||||
}
|
||||
|
||||
func (rt *Router) validateTags(tags []string) error {
|
||||
for _, tag := range tags {
|
||||
arr := strings.Split(tag, "=")
|
||||
if len(arr) != 2 {
|
||||
return fmt.Errorf("invalid tag(%s)", f.Tags[i])
|
||||
return fmt.Errorf("invalid tag format: %s (expected format: key=value)", tag)
|
||||
}
|
||||
|
||||
if strings.TrimSpace(arr[0]) == "" || strings.TrimSpace(arr[1]) == "" {
|
||||
return fmt.Errorf("invalid tag(%s)", f.Tags[i])
|
||||
key, value := strings.TrimSpace(arr[0]), strings.TrimSpace(arr[1])
|
||||
if key == "" {
|
||||
return fmt.Errorf("invalid tag: key is empty in tag %s", tag)
|
||||
}
|
||||
if value == "" {
|
||||
return fmt.Errorf("invalid tag: value is empty in tag %s", tag)
|
||||
}
|
||||
|
||||
if strings.IndexByte(arr[0], '.') != -1 {
|
||||
return fmt.Errorf("invalid tagkey(%s): cannot contains . ", arr[0])
|
||||
if strings.Contains(key, ".") {
|
||||
return fmt.Errorf("invalid tag key: %s (key cannot contain '.')", key)
|
||||
}
|
||||
|
||||
if strings.IndexByte(arr[0], '-') != -1 {
|
||||
return fmt.Errorf("invalid tagkey(%s): cannot contains -", arr[0])
|
||||
if strings.Contains(key, "-") {
|
||||
return fmt.Errorf("invalid tag key: %s (key cannot contain '-')", key)
|
||||
}
|
||||
|
||||
if !model.LabelNameRE.MatchString(arr[0]) {
|
||||
return fmt.Errorf("invalid tagkey(%s)", arr[0])
|
||||
if !model.LabelNameRE.MatchString(key) {
|
||||
return fmt.Errorf("invalid tag key: %s "+
|
||||
"(key must start with a letter or underscore, followed by letters, digits, or underscores)", key)
|
||||
}
|
||||
}
|
||||
|
||||
for i := 0; i < len(f.Idents); i++ {
|
||||
target, err := models.TargetGetByIdent(rt.Ctx, f.Idents[i])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if target == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// 不能有同key的标签,否则附到时序数据上会产生覆盖,让人困惑
|
||||
for j := 0; j < len(f.Tags); j++ {
|
||||
tagkey := strings.Split(f.Tags[j], "=")[0]
|
||||
tagkeyPrefix := tagkey + "="
|
||||
if strings.HasPrefix(target.Tags, tagkeyPrefix) {
|
||||
return fmt.Errorf("duplicate tagkey(%s)", tagkey)
|
||||
}
|
||||
}
|
||||
|
||||
err = target.AddTags(rt.Ctx, f.Tags)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (rt *Router) addTagsToTarget(target *models.Target, tags []string) error {
|
||||
for _, tag := range tags {
|
||||
tagKey := strings.Split(tag, "=")[0]
|
||||
if strings.Contains(target.Tags, tagKey+"=") {
|
||||
return fmt.Errorf("duplicate tagkey(%s)", tagKey)
|
||||
}
|
||||
}
|
||||
|
||||
return target.AddTags(rt.Ctx, tags)
|
||||
}
|
||||
|
||||
func (rt *Router) targetUnbindTagsByFE(c *gin.Context) {
|
||||
var f targetTagsForm
|
||||
var err error
|
||||
var failedResults = make(map[string]string)
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
if len(f.Idents) == 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, "idents empty")
|
||||
if len(f.Idents) == 0 && len(f.HostIps) == 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, "idents or host_ips must be provided")
|
||||
}
|
||||
// Acquire idents by idents and hostIps
|
||||
failedResults, f.Idents, err = models.TargetsGetIdentsByIdentsAndHostIps(rt.Ctx, f.Idents, f.HostIps)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, err.Error())
|
||||
}
|
||||
|
||||
rt.checkTargetPerm(c, f.Idents)
|
||||
|
||||
ginx.NewRender(c).Message(rt.targetUnbindTags(f))
|
||||
ginx.NewRender(c).Data(rt.targetUnbindTags(f, failedResults))
|
||||
}
|
||||
|
||||
func (rt *Router) targetUnbindTagsByService(c *gin.Context) {
|
||||
var f targetTagsForm
|
||||
var err error
|
||||
var failedResults = make(map[string]string)
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
if len(f.Idents) == 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, "idents empty")
|
||||
if len(f.Idents) == 0 && len(f.HostIps) == 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, "idents or host_ips must be provided")
|
||||
}
|
||||
// Acquire idents by idents and hostIps
|
||||
failedResults, f.Idents, err = models.TargetsGetIdentsByIdentsAndHostIps(rt.Ctx, f.Idents, f.HostIps)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, err.Error())
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Message(rt.targetUnbindTags(f))
|
||||
ginx.NewRender(c).Data(rt.targetUnbindTags(f, failedResults))
|
||||
}
|
||||
|
||||
func (rt *Router) targetUnbindTags(f targetTagsForm) error {
|
||||
for i := 0; i < len(f.Idents); i++ {
|
||||
target, err := models.TargetGetByIdent(rt.Ctx, f.Idents[i])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if target == nil {
|
||||
continue
|
||||
}
|
||||
func (rt *Router) targetUnbindTags(f targetTagsForm, failedIdents map[string]string) (map[string]string, error) {
|
||||
// 1. Acquire targets by idents
|
||||
targets, err := models.TargetsGetByIdents(rt.Ctx, f.Idents)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// 2. Remove tags from targets
|
||||
for _, target := range targets {
|
||||
err = target.DelTags(rt.Ctx, f.Tags)
|
||||
if err != nil {
|
||||
return err
|
||||
failedIdents[target.Ident] = err.Error()
|
||||
continue
|
||||
}
|
||||
}
|
||||
return nil
|
||||
|
||||
return failedIdents, nil
|
||||
}
|
||||
|
||||
type targetNoteForm struct {
|
||||
Idents []string `json:"idents" binding:"required"`
|
||||
Note string `json:"note"`
|
||||
Idents []string `json:"idents" binding:"required_without=HostIps"`
|
||||
HostIps []string `json:"host_ips" binding:"required_without=Idents"`
|
||||
Note string `json:"note"`
|
||||
}
|
||||
|
||||
func (rt *Router) targetUpdateNote(c *gin.Context) {
|
||||
var f targetNoteForm
|
||||
var err error
|
||||
var failedResults = make(map[string]string)
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
if len(f.Idents) == 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, "idents empty")
|
||||
if len(f.Idents) == 0 && len(f.HostIps) == 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, "idents or host_ips must be provided")
|
||||
}
|
||||
|
||||
// Acquire idents by idents and hostIps
|
||||
failedResults, f.Idents, err = models.TargetsGetIdentsByIdentsAndHostIps(rt.Ctx, f.Idents, f.HostIps)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, err.Error())
|
||||
}
|
||||
|
||||
rt.checkTargetPerm(c, f.Idents)
|
||||
|
||||
ginx.NewRender(c).Message(models.TargetUpdateNote(rt.Ctx, f.Idents, f.Note))
|
||||
ginx.NewRender(c).Data(failedResults, models.TargetUpdateNote(rt.Ctx, f.Idents, f.Note))
|
||||
}
|
||||
|
||||
func (rt *Router) targetUpdateNoteByService(c *gin.Context) {
|
||||
var f targetNoteForm
|
||||
var err error
|
||||
var failedResults = make(map[string]string)
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
if len(f.Idents) == 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, "idents empty")
|
||||
if len(f.Idents) == 0 && len(f.HostIps) == 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, "idents or host_ips must be provided")
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Message(models.TargetUpdateNote(rt.Ctx, f.Idents, f.Note))
|
||||
// Acquire idents by idents and hostIps
|
||||
failedResults, f.Idents, err = models.TargetsGetIdentsByIdentsAndHostIps(rt.Ctx, f.Idents, f.HostIps)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, err.Error())
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(failedResults, models.TargetUpdateNote(rt.Ctx, f.Idents, f.Note))
|
||||
}
|
||||
|
||||
type targetBgidForm struct {
|
||||
Idents []string `json:"idents" binding:"required"`
|
||||
Bgid int64 `json:"bgid"`
|
||||
Idents []string `json:"idents" binding:"required_without=HostIps"`
|
||||
HostIps []string `json:"host_ips" binding:"required_without=Idents"`
|
||||
Bgid int64 `json:"bgid"`
|
||||
}
|
||||
|
||||
func (rt *Router) targetUpdateBgid(c *gin.Context) {
|
||||
var f targetBgidForm
|
||||
var err error
|
||||
var failedResults = make(map[string]string)
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
if len(f.Idents) == 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, "idents empty")
|
||||
if len(f.Idents) == 0 && len(f.HostIps) == 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, "idents or host_ips must be provided")
|
||||
}
|
||||
|
||||
// Acquire idents by idents and hostIps
|
||||
failedResults, f.Idents, err = models.TargetsGetIdentsByIdentsAndHostIps(rt.Ctx, f.Idents, f.HostIps)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, err.Error())
|
||||
}
|
||||
|
||||
user := c.MustGet("user").(*models.User)
|
||||
if user.IsAdmin() {
|
||||
ginx.NewRender(c).Message(models.TargetUpdateBgid(rt.Ctx, f.Idents, f.Bgid, false))
|
||||
ginx.NewRender(c).Data(failedResults, models.TargetUpdateBgid(rt.Ctx, f.Idents, f.Bgid, false))
|
||||
return
|
||||
}
|
||||
|
||||
@@ -363,24 +439,69 @@ func (rt *Router) targetUpdateBgid(c *gin.Context) {
|
||||
ginx.Bomb(http.StatusBadRequest, "invalid bgid")
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Message(models.TargetUpdateBgid(rt.Ctx, f.Idents, f.Bgid, false))
|
||||
ginx.NewRender(c).Data(failedResults, models.TargetUpdateBgid(rt.Ctx, f.Idents, f.Bgid, false))
|
||||
}
|
||||
|
||||
func (rt *Router) targetUpdateBgidByService(c *gin.Context) {
|
||||
var f targetBgidForm
|
||||
var err error
|
||||
var failedResults = make(map[string]string)
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
if len(f.Idents) == 0 && len(f.HostIps) == 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, "idents or host_ips must be provided")
|
||||
}
|
||||
|
||||
// Acquire idents by idents and hostIps
|
||||
failedResults, f.Idents, err = models.TargetsGetIdentsByIdentsAndHostIps(rt.Ctx, f.Idents, f.HostIps)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, err.Error())
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(failedResults, models.TargetUpdateBgid(rt.Ctx, f.Idents, f.Bgid, false))
|
||||
}
|
||||
|
||||
type identsForm struct {
|
||||
Idents []string `json:"idents" binding:"required"`
|
||||
Idents []string `json:"idents" binding:"required_without=HostIps"`
|
||||
HostIps []string `json:"host_ips" binding:"required_without=Idents"`
|
||||
}
|
||||
|
||||
func (rt *Router) targetDel(c *gin.Context) {
|
||||
var f identsForm
|
||||
var err error
|
||||
var failedResults = make(map[string]string)
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
if len(f.Idents) == 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, "idents empty")
|
||||
if len(f.Idents) == 0 && len(f.HostIps) == 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, "idents or host_ips must be provided")
|
||||
}
|
||||
|
||||
rt.checkTargetPerm(c, f.Idents)
|
||||
// Acquire idents by idents and hostIps
|
||||
failedResults, f.Idents, err = models.TargetsGetIdentsByIdentsAndHostIps(rt.Ctx, f.Idents, f.HostIps)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, err.Error())
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Message(models.TargetDel(rt.Ctx, f.Idents))
|
||||
ginx.NewRender(c).Data(failedResults, models.TargetDel(rt.Ctx, f.Idents))
|
||||
}
|
||||
|
||||
func (rt *Router) targetDelByService(c *gin.Context) {
|
||||
var f identsForm
|
||||
var err error
|
||||
var failedResults = make(map[string]string)
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
if len(f.Idents) == 0 && len(f.HostIps) == 0 {
|
||||
ginx.Bomb(http.StatusBadRequest, "idents or host_ips must be provided")
|
||||
}
|
||||
|
||||
// Acquire idents by idents and hostIps
|
||||
failedResults, f.Idents, err = models.TargetsGetIdentsByIdentsAndHostIps(rt.Ctx, f.Idents, f.HostIps)
|
||||
if err != nil {
|
||||
ginx.Bomb(http.StatusBadRequest, err.Error())
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(failedResults, models.TargetDel(rt.Ctx, f.Idents))
|
||||
}
|
||||
|
||||
func (rt *Router) checkTargetPerm(c *gin.Context, idents []string) {
|
||||
@@ -392,3 +513,21 @@ func (rt *Router) checkTargetPerm(c *gin.Context, idents []string) {
|
||||
ginx.Bomb(http.StatusForbidden, "No permission to operate the targets: %s", strings.Join(nopri, ", "))
|
||||
}
|
||||
}
|
||||
|
||||
func (rt *Router) targetsOfAlertRule(c *gin.Context) {
|
||||
engineName := ginx.QueryStr(c, "engine_name", "")
|
||||
m, err := models.GetTargetsOfHostAlertRule(rt.Ctx, engineName)
|
||||
ret := make(map[string]map[int64][]string)
|
||||
for en, v := range m {
|
||||
if en != engineName {
|
||||
continue
|
||||
}
|
||||
|
||||
ret[en] = make(map[int64][]string)
|
||||
for rid, idents := range v {
|
||||
ret[en][rid] = idents
|
||||
}
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(ret, err)
|
||||
}
|
||||
|
||||
@@ -1,17 +1,14 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httputil"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/alert/sender"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
"github.com/toolkits/pkg/str"
|
||||
)
|
||||
|
||||
@@ -43,14 +40,23 @@ func (rt *Router) taskGets(c *gin.Context) {
|
||||
}
|
||||
|
||||
func (rt *Router) taskGetsByGids(c *gin.Context) {
|
||||
gids := str.IdsInt64(ginx.QueryStr(c, "gids"), ",")
|
||||
if len(gids) == 0 {
|
||||
ginx.NewRender(c, http.StatusBadRequest).Message("arg(gids) is empty")
|
||||
return
|
||||
}
|
||||
gids := str.IdsInt64(ginx.QueryStr(c, "gids", ""), ",")
|
||||
if len(gids) > 0 {
|
||||
for _, gid := range gids {
|
||||
rt.bgroCheck(c, gid)
|
||||
}
|
||||
} else {
|
||||
me := c.MustGet("user").(*models.User)
|
||||
if !me.IsAdmin() {
|
||||
var err error
|
||||
gids, err = models.MyBusiGroupIds(rt.Ctx, me.Id)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
for _, gid := range gids {
|
||||
rt.bgroCheck(c, gid)
|
||||
if len(gids) == 0 {
|
||||
ginx.NewRender(c).Data([]int{}, nil)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mine := ginx.QueryBool(c, "mine", false)
|
||||
@@ -92,71 +98,6 @@ type taskForm struct {
|
||||
Hosts []string `json:"hosts" binding:"required"`
|
||||
}
|
||||
|
||||
func (f *taskForm) Verify() error {
|
||||
if f.Batch < 0 {
|
||||
return fmt.Errorf("arg(batch) should be nonnegative")
|
||||
}
|
||||
|
||||
if f.Tolerance < 0 {
|
||||
return fmt.Errorf("arg(tolerance) should be nonnegative")
|
||||
}
|
||||
|
||||
if f.Timeout < 0 {
|
||||
return fmt.Errorf("arg(timeout) should be nonnegative")
|
||||
}
|
||||
|
||||
if f.Timeout > 3600*24 {
|
||||
return fmt.Errorf("arg(timeout) longer than one day")
|
||||
}
|
||||
|
||||
if f.Timeout == 0 {
|
||||
f.Timeout = 30
|
||||
}
|
||||
|
||||
f.Pause = strings.Replace(f.Pause, ",", ",", -1)
|
||||
f.Pause = strings.Replace(f.Pause, " ", "", -1)
|
||||
f.Args = strings.Replace(f.Args, ",", ",", -1)
|
||||
|
||||
if f.Title == "" {
|
||||
return fmt.Errorf("arg(title) is required")
|
||||
}
|
||||
|
||||
if str.Dangerous(f.Title) {
|
||||
return fmt.Errorf("arg(title) is dangerous")
|
||||
}
|
||||
|
||||
if f.Script == "" {
|
||||
return fmt.Errorf("arg(script) is required")
|
||||
}
|
||||
f.Script = strings.Replace(f.Script, "\r\n", "\n", -1)
|
||||
|
||||
if str.Dangerous(f.Args) {
|
||||
return fmt.Errorf("arg(args) is dangerous")
|
||||
}
|
||||
|
||||
if str.Dangerous(f.Pause) {
|
||||
return fmt.Errorf("arg(pause) is dangerous")
|
||||
}
|
||||
|
||||
if len(f.Hosts) == 0 {
|
||||
return fmt.Errorf("arg(hosts) empty")
|
||||
}
|
||||
|
||||
if f.Action != "start" && f.Action != "pause" {
|
||||
return fmt.Errorf("arg(action) invalid")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *taskForm) HandleFH(fh string) {
|
||||
i := strings.Index(f.Title, " FH: ")
|
||||
if i > 0 {
|
||||
f.Title = f.Title[:i]
|
||||
}
|
||||
f.Title = f.Title + " FH: " + fh
|
||||
}
|
||||
|
||||
func (rt *Router) taskRecordAdd(c *gin.Context) {
|
||||
var f *models.TaskRecord
|
||||
ginx.BindJSON(c, &f)
|
||||
@@ -164,7 +105,12 @@ func (rt *Router) taskRecordAdd(c *gin.Context) {
|
||||
}
|
||||
|
||||
func (rt *Router) taskAdd(c *gin.Context) {
|
||||
var f taskForm
|
||||
if !rt.Ibex.Enable {
|
||||
ginx.Bomb(400, i18n.Sprintf(c.GetHeader("X-Language"), "This functionality has not been enabled. Please contact the system administrator to activate it."))
|
||||
return
|
||||
}
|
||||
|
||||
var f models.TaskForm
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
bgid := ginx.UrlParamInt64(c, "id")
|
||||
@@ -180,7 +126,7 @@ func (rt *Router) taskAdd(c *gin.Context) {
|
||||
rt.checkTargetPerm(c, f.Hosts)
|
||||
|
||||
// call ibex
|
||||
taskId, err := TaskCreate(f, rt.NotifyConfigCache.GetIbex())
|
||||
taskId, err := sender.TaskAdd(f, user.Username, rt.Ctx.IsCenter)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if taskId <= 0 {
|
||||
@@ -189,65 +135,20 @@ func (rt *Router) taskAdd(c *gin.Context) {
|
||||
|
||||
// write db
|
||||
record := models.TaskRecord{
|
||||
Id: taskId,
|
||||
GroupId: bgid,
|
||||
IbexAddress: rt.NotifyConfigCache.GetIbex().Address,
|
||||
IbexAuthUser: rt.NotifyConfigCache.GetIbex().BasicAuthUser,
|
||||
IbexAuthPass: rt.NotifyConfigCache.GetIbex().BasicAuthPass,
|
||||
Title: f.Title,
|
||||
Account: f.Account,
|
||||
Batch: f.Batch,
|
||||
Tolerance: f.Tolerance,
|
||||
Timeout: f.Timeout,
|
||||
Pause: f.Pause,
|
||||
Script: f.Script,
|
||||
Args: f.Args,
|
||||
CreateAt: time.Now().Unix(),
|
||||
CreateBy: f.Creator,
|
||||
Id: taskId,
|
||||
GroupId: bgid,
|
||||
Title: f.Title,
|
||||
Account: f.Account,
|
||||
Batch: f.Batch,
|
||||
Tolerance: f.Tolerance,
|
||||
Timeout: f.Timeout,
|
||||
Pause: f.Pause,
|
||||
Script: f.Script,
|
||||
Args: f.Args,
|
||||
CreateAt: time.Now().Unix(),
|
||||
CreateBy: f.Creator,
|
||||
}
|
||||
|
||||
err = record.Add(rt.Ctx)
|
||||
ginx.NewRender(c).Data(taskId, err)
|
||||
}
|
||||
|
||||
func (rt *Router) taskProxy(c *gin.Context) {
|
||||
target, err := url.Parse(rt.NotifyConfigCache.GetIbex().Address)
|
||||
if err != nil {
|
||||
ginx.NewRender(c).Message("invalid ibex address: %s", rt.NotifyConfigCache.GetIbex().Address)
|
||||
return
|
||||
}
|
||||
|
||||
director := func(req *http.Request) {
|
||||
req.URL.Scheme = target.Scheme
|
||||
req.URL.Host = target.Host
|
||||
|
||||
// fe request e.g. /api/n9e/busi-group/:id/task/*url
|
||||
index := strings.Index(req.URL.Path, "/task/")
|
||||
if index == -1 {
|
||||
panic("url path invalid")
|
||||
}
|
||||
|
||||
req.URL.Path = "/ibex/v1" + req.URL.Path[index:]
|
||||
|
||||
if target.RawQuery == "" || req.URL.RawQuery == "" {
|
||||
req.URL.RawQuery = target.RawQuery + req.URL.RawQuery
|
||||
} else {
|
||||
req.URL.RawQuery = target.RawQuery + "&" + req.URL.RawQuery
|
||||
}
|
||||
|
||||
if rt.NotifyConfigCache.GetIbex().BasicAuthUser != "" {
|
||||
req.SetBasicAuth(rt.NotifyConfigCache.GetIbex().BasicAuthUser, rt.NotifyConfigCache.GetIbex().BasicAuthPass)
|
||||
}
|
||||
}
|
||||
|
||||
errFunc := func(w http.ResponseWriter, r *http.Request, err error) {
|
||||
ginx.NewRender(c, http.StatusBadGateway).Message(err)
|
||||
}
|
||||
|
||||
proxy := &httputil.ReverseProxy{
|
||||
Director: director,
|
||||
ErrorHandler: errFunc,
|
||||
}
|
||||
|
||||
proxy.ServeHTTP(c.Writer, c.Request)
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/i18n"
|
||||
"github.com/toolkits/pkg/str"
|
||||
)
|
||||
|
||||
@@ -34,14 +35,23 @@ func (rt *Router) taskTplGetsByGids(c *gin.Context) {
|
||||
query := ginx.QueryStr(c, "query", "")
|
||||
limit := ginx.QueryInt(c, "limit", 20)
|
||||
|
||||
gids := str.IdsInt64(ginx.QueryStr(c, "gids"), ",")
|
||||
if len(gids) == 0 {
|
||||
ginx.NewRender(c, http.StatusBadRequest).Message("arg(gids) is empty")
|
||||
return
|
||||
}
|
||||
gids := str.IdsInt64(ginx.QueryStr(c, "gids", ""), ",")
|
||||
if len(gids) > 0 {
|
||||
for _, gid := range gids {
|
||||
rt.bgroCheck(c, gid)
|
||||
}
|
||||
} else {
|
||||
me := c.MustGet("user").(*models.User)
|
||||
if !me.IsAdmin() {
|
||||
var err error
|
||||
gids, err = models.MyBusiGroupIds(rt.Ctx, me.Id)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
for _, gid := range gids {
|
||||
rt.bgroCheck(c, gid)
|
||||
if len(gids) == 0 {
|
||||
ginx.NewRender(c).Data([]int{}, nil)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
total, err := models.TaskTplTotal(rt.Ctx, gids, query)
|
||||
@@ -87,6 +97,14 @@ func (rt *Router) taskTplGetByService(c *gin.Context) {
|
||||
ginx.NewRender(c).Data(tpl, err)
|
||||
}
|
||||
|
||||
func (rt *Router) taskTplGetsByService(c *gin.Context) {
|
||||
ginx.NewRender(c).Data(models.TaskTplGetAll(rt.Ctx))
|
||||
}
|
||||
|
||||
func (rt *Router) taskTplStatistics(c *gin.Context) {
|
||||
ginx.NewRender(c).Data(models.TaskTplStatistics(rt.Ctx))
|
||||
}
|
||||
|
||||
type taskTplForm struct {
|
||||
Title string `json:"title" binding:"required"`
|
||||
Batch int `json:"batch"`
|
||||
@@ -101,6 +119,11 @@ type taskTplForm struct {
|
||||
}
|
||||
|
||||
func (rt *Router) taskTplAdd(c *gin.Context) {
|
||||
if !rt.Ibex.Enable {
|
||||
ginx.Bomb(400, i18n.Sprintf(c.GetHeader("X-Language"), "This functionality has not been enabled. Please contact the system administrator to activate it."))
|
||||
return
|
||||
}
|
||||
|
||||
var f taskTplForm
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
|
||||
@@ -78,7 +78,7 @@ func (rt *Router) QueryData(c *gin.Context) {
|
||||
var f models.QueryParam
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
var resp []*models.DataResp
|
||||
var resp []models.DataResp
|
||||
var err error
|
||||
tdClient := rt.TdendgineClients.GetCli(f.DatasourceId)
|
||||
for _, q := range f.Querys {
|
||||
|
||||
@@ -5,10 +5,13 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/flashduty"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ormx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/secu"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
func (rt *Router) userBusiGroupsGets(c *gin.Context) {
|
||||
@@ -39,13 +42,17 @@ func (rt *Router) userFindAll(c *gin.Context) {
|
||||
}
|
||||
|
||||
func (rt *Router) userGets(c *gin.Context) {
|
||||
stime, etime := getTimeRange(c)
|
||||
limit := ginx.QueryInt(c, "limit", 20)
|
||||
query := ginx.QueryStr(c, "query", "")
|
||||
order := ginx.QueryStr(c, "order", "username")
|
||||
desc := ginx.QueryBool(c, "desc", false)
|
||||
|
||||
total, err := models.UserTotal(rt.Ctx, query)
|
||||
rt.UserCache.UpdateUsersLastActiveTime()
|
||||
total, err := models.UserTotal(rt.Ctx, query, stime, etime)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
list, err := models.UserGets(rt.Ctx, query, limit, ginx.Offset(c, limit))
|
||||
list, err := models.UserGets(rt.Ctx, query, limit, ginx.Offset(c, limit), stime, etime, order, desc)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
user := c.MustGet("user").(*models.User)
|
||||
@@ -72,7 +79,18 @@ func (rt *Router) userAddPost(c *gin.Context) {
|
||||
var f userAddForm
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
password, err := models.CryptoPass(rt.Ctx, f.Password)
|
||||
authPassWord := f.Password
|
||||
if rt.HTTP.RSA.OpenRSA {
|
||||
decPassWord, err := secu.Decrypt(f.Password, rt.HTTP.RSA.RSAPrivateKey, rt.HTTP.RSA.RSAPassWord)
|
||||
if err != nil {
|
||||
logger.Errorf("RSA Decrypt failed: %v username: %s", err, f.Username)
|
||||
ginx.NewRender(c).Message(err)
|
||||
return
|
||||
}
|
||||
authPassWord = decPassWord
|
||||
}
|
||||
|
||||
password, err := models.CryptoPass(rt.Ctx, authPassWord)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
if len(f.Roles) == 0 {
|
||||
@@ -94,6 +112,7 @@ func (rt *Router) userAddPost(c *gin.Context) {
|
||||
UpdateBy: username,
|
||||
}
|
||||
|
||||
ginx.Dangerous(u.Verify())
|
||||
ginx.NewRender(c).Message(u.Add(rt.Ctx))
|
||||
}
|
||||
|
||||
@@ -143,6 +162,11 @@ func (rt *Router) userProfilePut(c *gin.Context) {
|
||||
}
|
||||
|
||||
target := User(rt.Ctx, ginx.UrlParamInt64(c, "id"))
|
||||
oldInfo := models.User{
|
||||
Username: target.Username,
|
||||
Phone: target.Phone,
|
||||
Email: target.Email,
|
||||
}
|
||||
target.Nickname = f.Nickname
|
||||
target.Phone = f.Phone
|
||||
target.Email = f.Email
|
||||
@@ -150,6 +174,10 @@ func (rt *Router) userProfilePut(c *gin.Context) {
|
||||
target.Contacts = f.Contacts
|
||||
target.UpdateBy = c.MustGet("username").(string)
|
||||
|
||||
if flashduty.NeedSyncUser(rt.Ctx) {
|
||||
flashduty.UpdateUser(rt.Ctx, oldInfo, f.Email, f.Phone)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Message(target.UpdateAllFields(rt.Ctx))
|
||||
}
|
||||
|
||||
@@ -163,7 +191,18 @@ func (rt *Router) userPasswordPut(c *gin.Context) {
|
||||
|
||||
target := User(rt.Ctx, ginx.UrlParamInt64(c, "id"))
|
||||
|
||||
cryptoPass, err := models.CryptoPass(rt.Ctx, f.Password)
|
||||
authPassWord := f.Password
|
||||
if rt.HTTP.RSA.OpenRSA {
|
||||
decPassWord, err := secu.Decrypt(f.Password, rt.HTTP.RSA.RSAPrivateKey, rt.HTTP.RSA.RSAPassWord)
|
||||
if err != nil {
|
||||
logger.Errorf("RSA Decrypt failed: %v username: %s", err, target.Username)
|
||||
ginx.NewRender(c).Message(err)
|
||||
return
|
||||
}
|
||||
authPassWord = decPassWord
|
||||
}
|
||||
|
||||
cryptoPass, err := models.CryptoPass(rt.Ctx, authPassWord)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ginx.NewRender(c).Message(target.UpdatePassword(rt.Ctx, cryptoPass, c.MustGet("username").(string)))
|
||||
|
||||
@@ -5,10 +5,12 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/flashduty"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
"github.com/toolkits/pkg/str"
|
||||
)
|
||||
|
||||
func (rt *Router) checkBusiGroupPerm(c *gin.Context) {
|
||||
@@ -30,8 +32,36 @@ func (rt *Router) userGroupGets(c *gin.Context) {
|
||||
}
|
||||
|
||||
func (rt *Router) userGroupGetsByService(c *gin.Context) {
|
||||
lst, err := models.UserGroupGetAll(rt.Ctx)
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
ids := str.IdsInt64(ginx.QueryStr(c, "ids", ""))
|
||||
|
||||
if len(ids) == 0 {
|
||||
lst, err := models.UserGroupGetAll(rt.Ctx)
|
||||
ginx.Dangerous(err)
|
||||
for i := 0; i < len(lst); i++ {
|
||||
ids, err := models.MemberIds(rt.Ctx, lst[i].Id)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
lst[i].Users, err = models.UserGetsByIds(rt.Ctx, ids)
|
||||
ginx.Dangerous(err)
|
||||
}
|
||||
ginx.NewRender(c).Data(lst, err)
|
||||
return
|
||||
}
|
||||
|
||||
lst := make([]models.UserGroup, 0)
|
||||
for _, id := range ids {
|
||||
ug := UserGroup(rt.Ctx, id)
|
||||
|
||||
ids, err := models.MemberIds(rt.Ctx, ug.Id)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
ug.Users, err = models.UserGetsByIds(rt.Ctx, ids)
|
||||
ginx.Dangerous(err)
|
||||
|
||||
lst = append(lst, *ug)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Data(lst, nil)
|
||||
}
|
||||
|
||||
// user group member get by service
|
||||
@@ -41,8 +71,9 @@ func (rt *Router) userGroupMemberGetsByService(c *gin.Context) {
|
||||
}
|
||||
|
||||
type userGroupForm struct {
|
||||
Name string `json:"name" binding:"required"`
|
||||
Note string `json:"note"`
|
||||
Name string `json:"name" binding:"required"`
|
||||
Note string `json:"note"`
|
||||
IsSyncToFlashDuty bool `json:"is_sync_to_flashduty"`
|
||||
}
|
||||
|
||||
func (rt *Router) userGroupAdd(c *gin.Context) {
|
||||
@@ -59,12 +90,19 @@ func (rt *Router) userGroupAdd(c *gin.Context) {
|
||||
}
|
||||
|
||||
err := ug.Add(rt.Ctx)
|
||||
if err == nil {
|
||||
// Even failure is not a big deal
|
||||
models.UserGroupMemberAdd(rt.Ctx, ug.Id, me.Id)
|
||||
}
|
||||
ginx.Dangerous(err)
|
||||
|
||||
// Even failure is not a big deal
|
||||
models.UserGroupMemberAdd(rt.Ctx, ug.Id, me.Id)
|
||||
|
||||
if f.IsSyncToFlashDuty || flashduty.NeedSyncTeam(rt.Ctx) {
|
||||
ugs, err := flashduty.NewUserGroupSyncer(rt.Ctx, &ug)
|
||||
ginx.Dangerous(err)
|
||||
err = ugs.SyncUGAdd()
|
||||
ginx.Dangerous(err)
|
||||
}
|
||||
ginx.NewRender(c).Data(ug.Id, err)
|
||||
|
||||
}
|
||||
|
||||
func (rt *Router) userGroupPut(c *gin.Context) {
|
||||
@@ -73,6 +111,7 @@ func (rt *Router) userGroupPut(c *gin.Context) {
|
||||
|
||||
me := c.MustGet("user").(*models.User)
|
||||
ug := c.MustGet("user_group").(*models.UserGroup)
|
||||
oldUGName := ug.Name
|
||||
|
||||
if ug.Name != f.Name {
|
||||
// name changed, check duplication
|
||||
@@ -88,8 +127,14 @@ func (rt *Router) userGroupPut(c *gin.Context) {
|
||||
ug.Note = f.Note
|
||||
ug.UpdateBy = me.Username
|
||||
ug.UpdateAt = time.Now().Unix()
|
||||
|
||||
if f.IsSyncToFlashDuty || flashduty.NeedSyncTeam(rt.Ctx) {
|
||||
ugs, err := flashduty.NewUserGroupSyncer(rt.Ctx, ug)
|
||||
ginx.Dangerous(err)
|
||||
err = ugs.SyncUGPut(oldUGName)
|
||||
ginx.Dangerous(err)
|
||||
}
|
||||
ginx.NewRender(c).Message(ug.Update(rt.Ctx, "Name", "Note", "UpdateAt", "UpdateBy"))
|
||||
|
||||
}
|
||||
|
||||
// Return all members, front-end search and paging
|
||||
@@ -109,8 +154,16 @@ func (rt *Router) userGroupGet(c *gin.Context) {
|
||||
}
|
||||
|
||||
func (rt *Router) userGroupDel(c *gin.Context) {
|
||||
isSyncToFlashDuty := ginx.QueryBool(c, "is_sync_to_flashduty", false)
|
||||
ug := c.MustGet("user_group").(*models.UserGroup)
|
||||
if isSyncToFlashDuty || flashduty.NeedSyncTeam(rt.Ctx) {
|
||||
ugs, err := flashduty.NewUserGroupSyncer(rt.Ctx, ug)
|
||||
ginx.Dangerous(err)
|
||||
err = ugs.SyncUGDel(ug.Name)
|
||||
ginx.Dangerous(err)
|
||||
}
|
||||
ginx.NewRender(c).Message(ug.Del(rt.Ctx))
|
||||
|
||||
}
|
||||
|
||||
func (rt *Router) userGroupMemberAdd(c *gin.Context) {
|
||||
@@ -122,13 +175,21 @@ func (rt *Router) userGroupMemberAdd(c *gin.Context) {
|
||||
ug := c.MustGet("user_group").(*models.UserGroup)
|
||||
|
||||
err := ug.AddMembers(rt.Ctx, f.Ids)
|
||||
ginx.Dangerous(err)
|
||||
if err == nil {
|
||||
ug.UpdateAt = time.Now().Unix()
|
||||
ug.UpdateBy = me.Username
|
||||
ug.Update(rt.Ctx, "UpdateAt", "UpdateBy")
|
||||
}
|
||||
|
||||
if f.IsSyncToFlashDuty || flashduty.NeedSyncTeam(rt.Ctx) {
|
||||
ugs, err := flashduty.NewUserGroupSyncer(rt.Ctx, ug)
|
||||
ginx.Dangerous(err)
|
||||
err = ugs.SyncMembersAdd()
|
||||
ginx.Dangerous(err)
|
||||
}
|
||||
ginx.NewRender(c).Message(err)
|
||||
|
||||
}
|
||||
|
||||
func (rt *Router) userGroupMemberDel(c *gin.Context) {
|
||||
@@ -145,6 +206,12 @@ func (rt *Router) userGroupMemberDel(c *gin.Context) {
|
||||
ug.UpdateBy = me.Username
|
||||
ug.Update(rt.Ctx, "UpdateAt", "UpdateBy")
|
||||
}
|
||||
if f.IsSyncToFlashDuty || flashduty.NeedSyncTeam(rt.Ctx) {
|
||||
ugs, err := flashduty.NewUserGroupSyncer(rt.Ctx, ug)
|
||||
ginx.Dangerous(err)
|
||||
err = ugs.SyncMembersDel()
|
||||
ginx.Dangerous(err)
|
||||
}
|
||||
|
||||
ginx.NewRender(c).Message(err)
|
||||
}
|
||||
|
||||
@@ -2,24 +2,30 @@ package sso
|
||||
|
||||
import (
|
||||
"log"
|
||||
"time"
|
||||
|
||||
"github.com/BurntSushi/toml"
|
||||
"github.com/ccfos/nightingale/v6/center/cconf"
|
||||
"github.com/ccfos/nightingale/v6/memsto"
|
||||
"github.com/ccfos/nightingale/v6/models"
|
||||
"github.com/ccfos/nightingale/v6/pkg/cas"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/ldapx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/oauth2x"
|
||||
"github.com/ccfos/nightingale/v6/pkg/oidcx"
|
||||
"github.com/ccfos/nightingale/v6/pkg/tplx"
|
||||
|
||||
"github.com/BurntSushi/toml"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
type SsoClient struct {
|
||||
OIDC *oidcx.SsoClient
|
||||
LDAP *ldapx.SsoClient
|
||||
CAS *cas.SsoClient
|
||||
OAuth2 *oauth2x.SsoClient
|
||||
OIDC *oidcx.SsoClient
|
||||
LDAP *ldapx.SsoClient
|
||||
CAS *cas.SsoClient
|
||||
OAuth2 *oauth2x.SsoClient
|
||||
LastUpdateTime int64
|
||||
configCache *memsto.ConfigCache
|
||||
configLastUpdateTime int64
|
||||
}
|
||||
|
||||
const LDAP = `
|
||||
@@ -29,15 +35,21 @@ Port = 389
|
||||
BaseDn = 'dc=example,dc=org'
|
||||
BindUser = 'cn=manager,dc=example,dc=org'
|
||||
BindPass = '*******'
|
||||
SyncAddUsers = false
|
||||
SyncDelUsers = false
|
||||
# unit: s
|
||||
SyncInterval = 86400
|
||||
# openldap format e.g. (&(uid=%s))
|
||||
# AD format e.g. (&(sAMAccountName=%s))
|
||||
AuthFilter = '(&(uid=%s))'
|
||||
UserFilter = '(&(uid=*))'
|
||||
CoverAttributes = true
|
||||
TLS = false
|
||||
StartTLS = true
|
||||
DefaultRoles = ['Standard']
|
||||
|
||||
[Attributes]
|
||||
Username = 'uid'
|
||||
Nickname = 'cn'
|
||||
Phone = 'mobile'
|
||||
Email = 'mail'
|
||||
@@ -46,8 +58,9 @@ Email = 'mail'
|
||||
const OAuth2 = `
|
||||
Enable = false
|
||||
DisplayName = 'OAuth2登录'
|
||||
RedirectURL = 'http://127.0.0.1:18000/callback/oauth'
|
||||
RedirectURL = 'http://n9e.com/callback/oauth'
|
||||
SsoAddr = 'https://sso.example.com/oauth2/authorize'
|
||||
SsoLogoutAddr = 'https://sso.example.com/oauth2/authorize/session/end'
|
||||
TokenAddr = 'https://sso.example.com/oauth2/token'
|
||||
UserInfoAddr = 'https://api.example.com/api/v1/user/info'
|
||||
TranTokenMethod = 'header'
|
||||
@@ -60,7 +73,7 @@ UserinfoPrefix = 'data'
|
||||
Scopes = ['profile', 'email', 'phone']
|
||||
|
||||
[Attributes]
|
||||
Username = 'username'
|
||||
Username = 'sub'
|
||||
Nickname = 'nickname'
|
||||
Phone = 'phone_number'
|
||||
Email = 'email'
|
||||
@@ -68,35 +81,41 @@ Email = 'email'
|
||||
|
||||
const CAS = `
|
||||
Enable = false
|
||||
SsoAddr = 'https://cas.example.com/cas/'
|
||||
# LoginPath = ''
|
||||
RedirectURL = 'http://127.0.0.1:18000/callback/cas'
|
||||
DisplayName = 'CAS登录'
|
||||
CoverAttributes = false
|
||||
RedirectURL = 'http://n9e.com/callback/cas'
|
||||
SsoAddr = 'https://cas.example.com/cas/'
|
||||
SsoLogoutAddr = 'https://cas.example.com/cas/session/end'
|
||||
# LoginPath = ''
|
||||
CoverAttributes = true
|
||||
DefaultRoles = ['Standard']
|
||||
|
||||
[Attributes]
|
||||
Username = 'sub'
|
||||
Nickname = 'nickname'
|
||||
Phone = 'phone_number'
|
||||
Email = 'email'
|
||||
`
|
||||
|
||||
const OIDC = `
|
||||
Enable = false
|
||||
DisplayName = 'OIDC登录'
|
||||
RedirectURL = 'http://n9e.com/callback'
|
||||
SsoAddr = 'http://sso.example.org'
|
||||
SsoLogoutAddr = 'http://sso.example.org/session/end'
|
||||
ClientId = ''
|
||||
ClientSecret = ''
|
||||
CoverAttributes = true
|
||||
DefaultRoles = ['Standard']
|
||||
Scopes = ['openid', 'profile', 'email', 'phone']
|
||||
|
||||
[Attributes]
|
||||
Username = 'sub'
|
||||
Nickname = 'nickname'
|
||||
Phone = 'phone_number'
|
||||
Email = 'email'
|
||||
`
|
||||
|
||||
func Init(center cconf.Center, ctx *ctx.Context) *SsoClient {
|
||||
func Init(center cconf.Center, ctx *ctx.Context, configCache *memsto.ConfigCache) *SsoClient {
|
||||
ssoClient := new(SsoClient)
|
||||
m := make(map[string]string)
|
||||
m["LDAP"] = LDAP
|
||||
@@ -125,6 +144,11 @@ func Init(center cconf.Center, ctx *ctx.Context) *SsoClient {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
}
|
||||
if configCache == nil {
|
||||
logger.Error("configCache is nil, sso initialization failed")
|
||||
}
|
||||
ssoClient.configCache = configCache
|
||||
userVariableMap := configCache.Get()
|
||||
|
||||
configs, err := models.SsoConfigGets(ctx)
|
||||
if err != nil {
|
||||
@@ -132,6 +156,7 @@ func Init(center cconf.Center, ctx *ctx.Context) *SsoClient {
|
||||
}
|
||||
|
||||
for _, cfg := range configs {
|
||||
cfg.Content = tplx.ReplaceTemplateUseText(cfg.Name, cfg.Content, userVariableMap)
|
||||
switch cfg.Name {
|
||||
case "LDAP":
|
||||
var config ldapx.Config
|
||||
@@ -169,5 +194,84 @@ func Init(center cconf.Center, ctx *ctx.Context) *SsoClient {
|
||||
ssoClient.OAuth2 = oauth2x.New(config)
|
||||
}
|
||||
}
|
||||
|
||||
go ssoClient.SyncSsoUsers(ctx)
|
||||
go ssoClient.Reload(ctx)
|
||||
return ssoClient
|
||||
}
|
||||
|
||||
// 定期更新sso配置
|
||||
func (s *SsoClient) reload(ctx *ctx.Context) error {
|
||||
lastUpdateTime, err := models.SsoConfigLastUpdateTime(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
lastCacheUpdateTime := s.configCache.GetLastUpdateTime()
|
||||
if lastUpdateTime == s.LastUpdateTime && lastCacheUpdateTime == s.configLastUpdateTime {
|
||||
return nil
|
||||
}
|
||||
|
||||
configs, err := models.SsoConfigGets(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
userVariableMap := s.configCache.Get()
|
||||
for _, cfg := range configs {
|
||||
cfg.Content = tplx.ReplaceTemplateUseText(cfg.Name, cfg.Content, userVariableMap)
|
||||
switch cfg.Name {
|
||||
case "LDAP":
|
||||
var config ldapx.Config
|
||||
err := toml.Unmarshal([]byte(cfg.Content), &config)
|
||||
if err != nil {
|
||||
logger.Warning("reload ldap failed", err)
|
||||
continue
|
||||
}
|
||||
s.LDAP.Reload(config)
|
||||
case "OIDC":
|
||||
var config oidcx.Config
|
||||
err := toml.Unmarshal([]byte(cfg.Content), &config)
|
||||
if err != nil {
|
||||
logger.Warning("reload oidc failed:", err)
|
||||
continue
|
||||
}
|
||||
|
||||
logger.Info("reload oidc..")
|
||||
err = s.OIDC.Reload(config)
|
||||
if err != nil {
|
||||
logger.Error("reload oidc failed:", err)
|
||||
continue
|
||||
}
|
||||
case "CAS":
|
||||
var config cas.Config
|
||||
err := toml.Unmarshal([]byte(cfg.Content), &config)
|
||||
if err != nil {
|
||||
logger.Warning("reload cas failed:", err)
|
||||
continue
|
||||
}
|
||||
s.CAS.Reload(config)
|
||||
case "OAuth2":
|
||||
var config oauth2x.Config
|
||||
err := toml.Unmarshal([]byte(cfg.Content), &config)
|
||||
if err != nil {
|
||||
logger.Warning("reload oauth2 failed:", err)
|
||||
continue
|
||||
}
|
||||
s.OAuth2.Reload(config)
|
||||
}
|
||||
}
|
||||
|
||||
s.LastUpdateTime = lastUpdateTime
|
||||
s.configLastUpdateTime = lastCacheUpdateTime
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *SsoClient) Reload(ctx *ctx.Context) {
|
||||
duration := time.Duration(9000) * time.Millisecond
|
||||
for {
|
||||
time.Sleep(duration)
|
||||
if err := s.reload(ctx); err != nil {
|
||||
logger.Warning("reload sso client err:", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
37
center/sso/sync.go
Normal file
37
center/sso/sync.go
Normal file
@@ -0,0 +1,37 @@
|
||||
package sso
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/ccfos/nightingale/v6/pkg/ctx"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
func (s *SsoClient) SyncSsoUsers(ctx *ctx.Context) {
|
||||
if err := s.LDAP.SyncAddAndDelUsers(ctx); err != nil {
|
||||
fmt.Println("failed to sync the addition and deletion of ldap users:", err)
|
||||
}
|
||||
|
||||
if err := s.LDAP.SyncDelUsers(ctx); err != nil {
|
||||
fmt.Println("failed to sync deletion of ldap users:", err)
|
||||
}
|
||||
|
||||
go s.loopSyncSsoUsers(ctx)
|
||||
}
|
||||
|
||||
func (s *SsoClient) loopSyncSsoUsers(ctx *ctx.Context) {
|
||||
for {
|
||||
select {
|
||||
case <-s.LDAP.Ticker.C:
|
||||
lc := s.LDAP.Copy()
|
||||
|
||||
if err := lc.SyncAddAndDelUsers(ctx); err != nil {
|
||||
logger.Warningf("failed to sync the addition and deletion of ldap users: %v", err)
|
||||
}
|
||||
|
||||
if err := lc.SyncDelUsers(ctx); err != nil {
|
||||
logger.Warningf("failed to sync deletion of ldap users: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -8,6 +8,8 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/alert"
|
||||
"github.com/ccfos/nightingale/v6/alert/astats"
|
||||
"github.com/ccfos/nightingale/v6/alert/process"
|
||||
alertrt "github.com/ccfos/nightingale/v6/alert/router"
|
||||
"github.com/ccfos/nightingale/v6/center/metas"
|
||||
"github.com/ccfos/nightingale/v6/conf"
|
||||
"github.com/ccfos/nightingale/v6/dumper"
|
||||
"github.com/ccfos/nightingale/v6/memsto"
|
||||
@@ -16,11 +18,12 @@ import (
|
||||
"github.com/ccfos/nightingale/v6/pkg/logx"
|
||||
"github.com/ccfos/nightingale/v6/prom"
|
||||
"github.com/ccfos/nightingale/v6/pushgw/idents"
|
||||
pushgwrt "github.com/ccfos/nightingale/v6/pushgw/router"
|
||||
"github.com/ccfos/nightingale/v6/pushgw/writer"
|
||||
"github.com/ccfos/nightingale/v6/storage"
|
||||
"github.com/ccfos/nightingale/v6/tdengine"
|
||||
|
||||
alertrt "github.com/ccfos/nightingale/v6/alert/router"
|
||||
pushgwrt "github.com/ccfos/nightingale/v6/pushgw/router"
|
||||
"github.com/flashcatcloud/ibex/src/cmd/ibex"
|
||||
)
|
||||
|
||||
func Initialize(configDir string, cryptoKey string) (func(), error) {
|
||||
@@ -39,13 +42,20 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
|
||||
}
|
||||
ctx := ctx.NewContext(context.Background(), nil, false, config.CenterApi)
|
||||
|
||||
var redis storage.Redis
|
||||
redis, err = storage.NewRedis(config.Redis)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
syncStats := memsto.NewSyncStats()
|
||||
|
||||
targetCache := memsto.NewTargetCache(ctx, syncStats, nil)
|
||||
targetCache := memsto.NewTargetCache(ctx, syncStats, redis)
|
||||
busiGroupCache := memsto.NewBusiGroupCache(ctx, syncStats)
|
||||
idents := idents.New(ctx)
|
||||
idents := idents.New(ctx, redis)
|
||||
metas := metas.New(redis)
|
||||
writers := writer.NewWriters(config.Pushgw)
|
||||
pushgwRouter := pushgwrt.New(config.HTTP, config.Pushgw, targetCache, busiGroupCache, idents, writers, ctx)
|
||||
pushgwRouter := pushgwrt.New(config.HTTP, config.Pushgw, config.Alert, targetCache, busiGroupCache, idents, metas, writers, ctx)
|
||||
r := httpx.GinEngine(config.Global.RunMode, config.HTTP)
|
||||
pushgwRouter.Config(r)
|
||||
|
||||
@@ -58,17 +68,22 @@ func Initialize(configDir string, cryptoKey string) (func(), error) {
|
||||
notifyConfigCache := memsto.NewNotifyConfigCache(ctx, configCache)
|
||||
userCache := memsto.NewUserCache(ctx, syncStats)
|
||||
userGroupCache := memsto.NewUserGroupCache(ctx, syncStats)
|
||||
taskTplsCache := memsto.NewTaskTplCache(ctx)
|
||||
|
||||
promClients := prom.NewPromClient(ctx)
|
||||
tdengineClients := tdengine.NewTdengineClient(ctx, config.Alert.Heartbeat)
|
||||
externalProcessors := process.NewExternalProcessors()
|
||||
|
||||
alert.Start(config.Alert, config.Pushgw, syncStats, alertStats, externalProcessors, targetCache, busiGroupCache, alertMuteCache,
|
||||
alertRuleCache, notifyConfigCache, dsCache, ctx, promClients, tdengineClients, userCache, userGroupCache)
|
||||
alertRuleCache, notifyConfigCache, taskTplsCache, dsCache, ctx, promClients, tdengineClients, userCache, userGroupCache)
|
||||
|
||||
alertrtRouter := alertrt.New(config.HTTP, config.Alert, alertMuteCache, targetCache, busiGroupCache, alertStats, ctx, externalProcessors)
|
||||
|
||||
alertrtRouter.Config(r)
|
||||
|
||||
if config.Ibex.Enable {
|
||||
ibex.ServerStart(false, nil, redis, config.HTTP.APIForService.BasicAuth, config.Alert.Heartbeat, &config.CenterApi, r, nil, config.Ibex, config.HTTP.Port)
|
||||
}
|
||||
}
|
||||
|
||||
dumper.ConfigRouter(r)
|
||||
|
||||
12
conf/conf.go
12
conf/conf.go
@@ -27,6 +27,7 @@ type ConfigType struct {
|
||||
Pushgw pconf.Pushgw
|
||||
Alert aconf.Alert
|
||||
Center cconf.Center
|
||||
Ibex Ibex
|
||||
}
|
||||
|
||||
type CenterApi struct {
|
||||
@@ -40,6 +41,17 @@ type GlobalConfig struct {
|
||||
RunMode string
|
||||
}
|
||||
|
||||
type Ibex struct {
|
||||
Enable bool
|
||||
RPCListen string
|
||||
Output Output
|
||||
}
|
||||
|
||||
type Output struct {
|
||||
ComeFrom string
|
||||
AgtdPort int
|
||||
}
|
||||
|
||||
func InitConfig(configDir, cryptoKey string) (*ConfigType, error) {
|
||||
var config = new(ConfigType)
|
||||
|
||||
|
||||
@@ -1,12 +0,0 @@
|
||||
FROM python:3-slim
|
||||
#FROM ubuntu:21.04
|
||||
|
||||
WORKDIR /app
|
||||
ADD n9e /app
|
||||
ADD http://download.flashcat.cloud/wait /wait
|
||||
RUN chmod +x /wait
|
||||
RUN chmod +x n9e
|
||||
|
||||
EXPOSE 17000
|
||||
|
||||
CMD ["/app/n9e", "-h"]
|
||||
@@ -3,7 +3,7 @@ FROM --platform=$TARGETPLATFORM python:3-slim
|
||||
|
||||
WORKDIR /app
|
||||
ADD n9e /app/
|
||||
ADD etc /app/
|
||||
ADD etc /app/etc/
|
||||
ADD integrations /app/integrations/
|
||||
RUN pip install requests
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@ FROM --platform=$TARGETPLATFORM python:3-slim
|
||||
|
||||
WORKDIR /app
|
||||
ADD n9e /app/
|
||||
ADD etc /app/
|
||||
ADD etc /app/etc/
|
||||
ADD integrations /app/integrations/
|
||||
|
||||
EXPOSE 17000
|
||||
|
||||
@@ -69,27 +69,6 @@ services:
|
||||
command:
|
||||
- "--loggerTimezone=Asia/Shanghai"
|
||||
|
||||
ibex:
|
||||
image: flashcatcloud/ibex:v1.2.0
|
||||
container_name: ibex
|
||||
hostname: ibex
|
||||
restart: always
|
||||
environment:
|
||||
GIN_MODE: release
|
||||
TZ: Asia/Shanghai
|
||||
WAIT_HOSTS: mysql:3306
|
||||
volumes:
|
||||
- ./etc-ibex:/app/etc
|
||||
networks:
|
||||
- nightingale
|
||||
ports:
|
||||
- "10090:10090"
|
||||
- "20090:20090"
|
||||
depends_on:
|
||||
- mysql
|
||||
command: >
|
||||
sh -c "/app/ibex server"
|
||||
|
||||
nightingale:
|
||||
image: flashcatcloud/nightingale:latest
|
||||
container_name: nightingale
|
||||
@@ -105,6 +84,7 @@ services:
|
||||
- nightingale
|
||||
ports:
|
||||
- "17000:17000"
|
||||
- "20090:20090"
|
||||
depends_on:
|
||||
- mysql
|
||||
- redis
|
||||
@@ -122,7 +102,7 @@ services:
|
||||
HOST_PROC: /hostfs/proc
|
||||
HOST_SYS: /hostfs/sys
|
||||
HOST_MOUNT_PREFIX: /hostfs
|
||||
WAIT_HOSTS: nightingale:17000, ibex:20090
|
||||
WAIT_HOSTS: nightingale:17000, nightingale:20090
|
||||
volumes:
|
||||
- ./etc-categraf:/etc/categraf/conf
|
||||
- /:/hostfs
|
||||
|
||||
@@ -78,6 +78,6 @@ enable = true
|
||||
## ibex flush interval
|
||||
interval = "1000ms"
|
||||
## n9e ibex server rpc address
|
||||
servers = ["ibex:20090"]
|
||||
servers = ["nightingale:20090"]
|
||||
## temp script dir
|
||||
meta_dir = "./meta"
|
||||
|
||||
@@ -1,97 +0,0 @@
|
||||
# debug, release
|
||||
RunMode = "release"
|
||||
|
||||
[Log]
|
||||
# log write dir
|
||||
Dir = "logs-server"
|
||||
# log level: DEBUG INFO WARNING ERROR
|
||||
Level = "DEBUG"
|
||||
# stdout, stderr, file
|
||||
Output = "stdout"
|
||||
# # rotate by time
|
||||
# KeepHours: 4
|
||||
# # rotate by size
|
||||
# RotateNum = 3
|
||||
# # unit: MB
|
||||
# RotateSize = 256
|
||||
|
||||
[HTTP]
|
||||
Enable = true
|
||||
# http listening address
|
||||
Host = "0.0.0.0"
|
||||
# http listening port
|
||||
Port = 10090
|
||||
# https cert file path
|
||||
CertFile = ""
|
||||
# https key file path
|
||||
KeyFile = ""
|
||||
# whether print access log
|
||||
PrintAccessLog = true
|
||||
# whether enable pprof
|
||||
PProf = false
|
||||
# http graceful shutdown timeout, unit: s
|
||||
ShutdownTimeout = 30
|
||||
# max content length: 64M
|
||||
MaxContentLength = 67108864
|
||||
# http server read timeout, unit: s
|
||||
ReadTimeout = 20
|
||||
# http server write timeout, unit: s
|
||||
WriteTimeout = 40
|
||||
# http server idle timeout, unit: s
|
||||
IdleTimeout = 120
|
||||
|
||||
[BasicAuth]
|
||||
# using when call apis
|
||||
ibex = "ibex"
|
||||
|
||||
[RPC]
|
||||
Listen = "0.0.0.0:20090"
|
||||
|
||||
[Heartbeat]
|
||||
# auto detect if blank
|
||||
IP = ""
|
||||
# unit: ms
|
||||
Interval = 1000
|
||||
|
||||
[Output]
|
||||
# database | remote
|
||||
ComeFrom = "database"
|
||||
AgtdPort = 2090
|
||||
|
||||
[Gorm]
|
||||
# enable debug mode or not
|
||||
Debug = false
|
||||
# mysql postgres
|
||||
DBType = "mysql"
|
||||
# unit: s
|
||||
MaxLifetime = 7200
|
||||
# max open connections
|
||||
MaxOpenConns = 150
|
||||
# max idle connections
|
||||
MaxIdleConns = 50
|
||||
# table prefix
|
||||
TablePrefix = ""
|
||||
|
||||
[MySQL]
|
||||
# mysql address host:port
|
||||
Address = "mysql:3306"
|
||||
# mysql username
|
||||
User = "root"
|
||||
# mysql password
|
||||
Password = "1234"
|
||||
# database name
|
||||
DBName = "ibex"
|
||||
# connection params
|
||||
Parameters = "charset=utf8mb4&parseTime=True&loc=Local&allowNativePasswords=true"
|
||||
|
||||
[Postgres]
|
||||
# pg address host:port
|
||||
Address = "postgres:5432"
|
||||
# pg user
|
||||
User = "root"
|
||||
# pg password
|
||||
Password = "1234"
|
||||
# database name
|
||||
DBName = "ibex"
|
||||
# ssl mode
|
||||
SSLMode = "disable"
|
||||
@@ -55,8 +55,6 @@ Enable = true
|
||||
user001 = "ccc26da7b9aba533cbb263a36c07dcc5"
|
||||
|
||||
[HTTP.JWTAuth]
|
||||
# signing key
|
||||
SigningKey = "5b94a0fd640fe2765af826acfe42d151"
|
||||
# unit: min
|
||||
AccessExpired = 1500
|
||||
# unit: min
|
||||
@@ -96,8 +94,6 @@ MaxLifetime = 7200
|
||||
MaxOpenConns = 150
|
||||
# max idle connections
|
||||
MaxIdleConns = 50
|
||||
# table prefix
|
||||
TablePrefix = ""
|
||||
# enable auto migrate or not
|
||||
# EnableAutoMigrate = false
|
||||
|
||||
@@ -140,7 +136,7 @@ AlertDetail = false
|
||||
LabelRewrite = true
|
||||
# # default busigroup key name
|
||||
# BusiGroupLabelKey = "busigroup"
|
||||
# ForceUseServerTS = false
|
||||
ForceUseServerTS = true
|
||||
|
||||
# [Pushgw.DebugSample]
|
||||
# ident = "xx"
|
||||
@@ -181,3 +177,7 @@ MaxIdleConnsPerHost = 100
|
||||
# Regex = "([^:]+)(?::\\d+)?"
|
||||
# Replacement = "$1:80"
|
||||
# TargetLabel = "__address__"
|
||||
|
||||
[Ibex]
|
||||
Enable = true
|
||||
RPCListen = "0.0.0.0:20090"
|
||||
143
docker/compose-host-network-metric-log/docker-compose.yaml
Normal file
143
docker/compose-host-network-metric-log/docker-compose.yaml
Normal file
@@ -0,0 +1,143 @@
|
||||
version: "3.7"
|
||||
|
||||
services:
|
||||
mysql:
|
||||
image: "mysql:8"
|
||||
container_name: mysql
|
||||
hostname: mysql
|
||||
restart: always
|
||||
environment:
|
||||
TZ: Asia/Shanghai
|
||||
MYSQL_ROOT_PASSWORD: 1234
|
||||
volumes:
|
||||
- ./mysqldata:/var/lib/mysql/
|
||||
- ../initsql:/docker-entrypoint-initdb.d/
|
||||
- ./etc-mysql/my.cnf:/etc/my.cnf
|
||||
network_mode: host
|
||||
|
||||
redis:
|
||||
image: "redis:6.2"
|
||||
container_name: redis
|
||||
hostname: redis
|
||||
restart: always
|
||||
environment:
|
||||
TZ: Asia/Shanghai
|
||||
network_mode: host
|
||||
|
||||
prometheus:
|
||||
image: prom/prometheus
|
||||
container_name: prometheus
|
||||
hostname: prometheus
|
||||
restart: always
|
||||
environment:
|
||||
TZ: Asia/Shanghai
|
||||
volumes:
|
||||
- ./etc-prometheus:/etc/prometheus
|
||||
network_mode: host
|
||||
command:
|
||||
- "--config.file=/etc/prometheus/prometheus.yml"
|
||||
- "--storage.tsdb.path=/prometheus"
|
||||
- "--web.console.libraries=/usr/share/prometheus/console_libraries"
|
||||
- "--web.console.templates=/usr/share/prometheus/consoles"
|
||||
- "--enable-feature=remote-write-receiver"
|
||||
- "--query.lookback-delta=2m"
|
||||
|
||||
n9e:
|
||||
image: flashcatcloud/nightingale:latest
|
||||
container_name: n9e
|
||||
hostname: n9e
|
||||
restart: always
|
||||
environment:
|
||||
GIN_MODE: release
|
||||
TZ: Asia/Shanghai
|
||||
WAIT_HOSTS: 127.0.0.1:3306, 127.0.0.1:6379
|
||||
volumes:
|
||||
- ./etc-nightingale:/app/etc
|
||||
- ./n9e-logs:/app/logs
|
||||
network_mode: host
|
||||
depends_on:
|
||||
- mysql
|
||||
- redis
|
||||
- prometheus
|
||||
command: >
|
||||
sh -c "/app/n9e"
|
||||
|
||||
categraf:
|
||||
image: "flashcatcloud/categraf:latest"
|
||||
container_name: "categraf"
|
||||
hostname: "categraf01"
|
||||
restart: always
|
||||
environment:
|
||||
TZ: Asia/Shanghai
|
||||
HOST_PROC: /hostfs/proc
|
||||
HOST_SYS: /hostfs/sys
|
||||
HOST_MOUNT_PREFIX: /hostfs
|
||||
WAIT_HOSTS: 127.0.0.1:17000, 127.0.0.1:20090, 127.0.0.1:9092
|
||||
volumes:
|
||||
- ./etc-categraf:/etc/categraf/conf
|
||||
- ./n9e-logs:/logs
|
||||
- /:/hostfs
|
||||
network_mode: host
|
||||
depends_on:
|
||||
- n9e
|
||||
- kafka
|
||||
|
||||
zookeeper:
|
||||
image: bitnami/zookeeper:3.9
|
||||
container_name: "zookeeper"
|
||||
restart: always
|
||||
environment:
|
||||
- TZ=Asia/Shanghai
|
||||
- ALLOW_ANONYMOUS_LOGIN=yes
|
||||
network_mode: host
|
||||
depends_on:
|
||||
- n9e
|
||||
|
||||
kafka:
|
||||
image: bitnami/kafka:3.4
|
||||
container_name: "kafka"
|
||||
restart: always
|
||||
environment:
|
||||
TZ: Asia/Shanghai
|
||||
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://127.0.0.1:9092
|
||||
KAFKA_LISTENERS: PLAINTEXT://0.0.0.0:9092
|
||||
KAFKA_ZOOKEEPER_CONNECT: 127.0.0.1:2181
|
||||
KAFKA_CFG_MESSAGE_MAX_BYTES: 2000000
|
||||
network_mode: host
|
||||
depends_on:
|
||||
- zookeeper
|
||||
|
||||
elasticsearch:
|
||||
image: docker.elastic.co/elasticsearch/elasticsearch:7.10.1
|
||||
container_name: "elasticsearch"
|
||||
restart: always
|
||||
environment:
|
||||
- TZ=Asia/Shanghai
|
||||
- discovery.type=single-node
|
||||
network_mode: host
|
||||
depends_on:
|
||||
- kafka
|
||||
|
||||
logstash:
|
||||
image: docker.elastic.co/logstash/logstash:8.11.3
|
||||
container_name: "logstash"
|
||||
restart: always
|
||||
environment:
|
||||
- TZ=Asia/Shanghai
|
||||
- LS_JAVA_OPTS=-Xmx256m -Xms256m
|
||||
volumes:
|
||||
- ./etc-logstash/logstash.yaml:/etc/logstash/conf.d/logstash.yaml
|
||||
entrypoint:
|
||||
- logstash
|
||||
- -f
|
||||
- /etc/logstash/conf.d/logstash.yaml
|
||||
network_mode: host
|
||||
depends_on:
|
||||
- elasticsearch
|
||||
- kafka
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "200m"
|
||||
max-file: "3"
|
||||
|
||||
@@ -0,0 +1,83 @@
|
||||
[global]
|
||||
# whether print configs
|
||||
print_configs = false
|
||||
|
||||
# add label(agent_hostname) to series
|
||||
# "" -> auto detect hostname
|
||||
# "xx" -> use specified string xx
|
||||
# "$hostname" -> auto detect hostname
|
||||
# "$ip" -> auto detect ip
|
||||
# "$hostname-$ip" -> auto detect hostname and ip to replace the vars
|
||||
hostname = "$HOSTNAME"
|
||||
|
||||
# will not add label(agent_hostname) if true
|
||||
omit_hostname = false
|
||||
|
||||
# s | ms
|
||||
precision = "ms"
|
||||
|
||||
# global collect interval
|
||||
interval = 15
|
||||
|
||||
[global.labels]
|
||||
source="categraf"
|
||||
# region = "shanghai"
|
||||
# env = "localhost"
|
||||
|
||||
[writer_opt]
|
||||
# default: 2000
|
||||
batch = 2000
|
||||
# channel(as queue) size
|
||||
chan_size = 10000
|
||||
|
||||
[[writers]]
|
||||
url = "http://127.0.0.1:17000/prometheus/v1/write"
|
||||
|
||||
# Basic auth username
|
||||
basic_auth_user = ""
|
||||
|
||||
# Basic auth password
|
||||
basic_auth_pass = ""
|
||||
|
||||
# timeout settings, unit: ms
|
||||
timeout = 5000
|
||||
dial_timeout = 2500
|
||||
max_idle_conns_per_host = 100
|
||||
|
||||
[http]
|
||||
enable = false
|
||||
address = ":9100"
|
||||
print_access = false
|
||||
run_mode = "release"
|
||||
|
||||
[heartbeat]
|
||||
enable = true
|
||||
|
||||
# report os version cpu.util mem.util metadata
|
||||
url = "http://127.0.0.1:17000/v1/n9e/heartbeat"
|
||||
|
||||
# interval, unit: s
|
||||
interval = 10
|
||||
|
||||
# Basic auth username
|
||||
basic_auth_user = ""
|
||||
|
||||
# Basic auth password
|
||||
basic_auth_pass = ""
|
||||
|
||||
## Optional headers
|
||||
# headers = ["X-From", "categraf", "X-Xyz", "abc"]
|
||||
|
||||
# timeout settings, unit: ms
|
||||
timeout = 5000
|
||||
dial_timeout = 2500
|
||||
max_idle_conns_per_host = 100
|
||||
|
||||
[ibex]
|
||||
enable = true
|
||||
## ibex flush interval
|
||||
interval = "1000ms"
|
||||
## n9e ibex server rpc address
|
||||
servers = ["127.0.0.1:20090"]
|
||||
## temp script dir
|
||||
meta_dir = "./meta"
|
||||
@@ -0,0 +1,5 @@
|
||||
# # collect interval
|
||||
# interval = 15
|
||||
|
||||
# # whether collect per cpu
|
||||
# collect_per_cpu = false
|
||||
@@ -0,0 +1,11 @@
|
||||
# # collect interval
|
||||
# interval = 15
|
||||
|
||||
# # By default stats will be gathered for all mount points.
|
||||
# # Set mount_points will restrict the stats to only the specified mount points.
|
||||
# mount_points = ["/"]
|
||||
|
||||
# Ignore mount points by filesystem type.
|
||||
ignore_fs = ["tmpfs", "devtmpfs", "devfs", "iso9660", "overlay", "aufs", "squashfs"]
|
||||
|
||||
ignore_mount_points = ["/boot"]
|
||||
@@ -0,0 +1,6 @@
|
||||
# # collect interval
|
||||
# interval = 15
|
||||
|
||||
# # By default, categraf will gather stats for all devices including disk partitions.
|
||||
# # Setting devices will restrict the stats to the specified devices.
|
||||
# devices = ["sda", "sdb", "vd*"]
|
||||
@@ -0,0 +1,2 @@
|
||||
# # collect interval
|
||||
# interval = 15
|
||||
@@ -0,0 +1,5 @@
|
||||
# # collect interval
|
||||
# interval = 15
|
||||
|
||||
# # whether collect platform specified metrics
|
||||
collect_platform_fields = true
|
||||
@@ -0,0 +1,8 @@
|
||||
# # collect interval
|
||||
# interval = 15
|
||||
|
||||
# # whether collect protocol stats on Linux
|
||||
# collect_protocol_stats = false
|
||||
|
||||
# # setting interfaces will tell categraf to gather these explicit interfaces
|
||||
# interfaces = ["eth0"]
|
||||
@@ -0,0 +1,2 @@
|
||||
# # collect interval
|
||||
# interval = 15
|
||||
@@ -0,0 +1,5 @@
|
||||
# # collect interval
|
||||
# interval = 15
|
||||
|
||||
# # whether collect metric: system_n_users
|
||||
# collect_user_number = false
|
||||
@@ -0,0 +1,73 @@
|
||||
[logs]
|
||||
## just a placholder
|
||||
api_key = "ef4ahfbwzwwtlwfpbertgq1i6mq0ab1q"
|
||||
## enable log collect or not
|
||||
enable = true
|
||||
## the server receive logs, http/tcp/kafka, only kafka brokers can be multiple ip:ports with concatenation character ","
|
||||
send_to = "127.0.0.1:9092"
|
||||
## send logs with protocol: http/tcp/kafka
|
||||
send_type = "kafka"
|
||||
topic = "flashcatcloud"
|
||||
## send logs with compression or not
|
||||
use_compress = false
|
||||
## use ssl or not
|
||||
send_with_tls = false
|
||||
## send logs in batchs
|
||||
batch_wait = 5
|
||||
## save offset in this path
|
||||
run_path = "/opt/categraf/run"
|
||||
## max files can be open
|
||||
open_files_limit = 100
|
||||
## scan config file in 10 seconds
|
||||
scan_period = 10
|
||||
## read buffer of udp
|
||||
frame_size = 9000
|
||||
|
||||
## channal size, default 100
|
||||
## 读取日志缓冲区,行数
|
||||
chan_size = 1000
|
||||
## pipeline num , default 4
|
||||
## 有多少线程处理日志
|
||||
pipeline=4
|
||||
## configuration for kafka
|
||||
## 指定kafka版本
|
||||
kafka_version="2.8.1"
|
||||
# 默认0 表示串行,如果对日志顺序有要求,保持默认配置
|
||||
batch_max_concurrence = 0
|
||||
# 最大并发批次, 默认100
|
||||
batch_max_size=100
|
||||
# 每次最大发送的内容上限 默认1000000
|
||||
batch_max_contentsize=1000000
|
||||
# client timeout in seconds
|
||||
producer_timeout= 10
|
||||
|
||||
# 是否开启sasl模式
|
||||
sasl_enable = false
|
||||
sasl_user = "admin"
|
||||
sasl_password = "admin"
|
||||
# PLAIN
|
||||
sasl_mechanism= "PLAIN"
|
||||
# v1
|
||||
sasl_version=1
|
||||
# set true
|
||||
sasl_handshake = true
|
||||
# optional
|
||||
# sasl_auth_identity=""
|
||||
#
|
||||
##
|
||||
# v0.3.39以上版本新增,是否开启pod日志采集
|
||||
enable_collect_container=false
|
||||
|
||||
# 是否采集所有pod的stdout stderr
|
||||
collect_container_all = false
|
||||
## glog processing rules
|
||||
# [[logs.Processing_rules]]
|
||||
## single log configure
|
||||
[[logs.items]]
|
||||
## file/journald/tcp/udp
|
||||
type = "file"
|
||||
## type=file, path is required; type=journald/tcp/udp, port is required
|
||||
path = "/logs/*"
|
||||
source = "n9e"
|
||||
service = "n9e_service"
|
||||
|
||||
@@ -0,0 +1,22 @@
|
||||
input {
|
||||
kafka {
|
||||
bootstrap_servers => "127.0.0.1:9092"
|
||||
topics => ["flashcatcloud"]
|
||||
codec => json
|
||||
type => n9e
|
||||
}
|
||||
}
|
||||
|
||||
filter {
|
||||
grok {
|
||||
match => {"message" => "%{LOGLEVEL:status}"}
|
||||
overwrite => ["status"]
|
||||
}
|
||||
}
|
||||
|
||||
output {
|
||||
elasticsearch {
|
||||
hosts => ["127.0.0.1:9200"]
|
||||
index => "n9e-%{+YYYY.MM.DD}"
|
||||
}
|
||||
}
|
||||
5
docker/compose-host-network-metric-log/etc-mysql/my.cnf
Normal file
5
docker/compose-host-network-metric-log/etc-mysql/my.cnf
Normal file
@@ -0,0 +1,5 @@
|
||||
[mysqld]
|
||||
pid-file = /var/run/mysqld/mysqld.pid
|
||||
socket = /var/run/mysqld/mysqld.sock
|
||||
datadir = /var/lib/mysql
|
||||
bind-address = 127.0.0.1
|
||||
@@ -0,0 +1,184 @@
|
||||
[Global]
|
||||
RunMode = "release"
|
||||
|
||||
[Log]
|
||||
# log write dir
|
||||
Dir = "logs"
|
||||
# log level: DEBUG INFO WARNING ERROR
|
||||
Level = "DEBUG"
|
||||
# stdout, stderr, file
|
||||
Output = "file"
|
||||
# # rotate by time
|
||||
KeepHours = 4
|
||||
# # rotate by size
|
||||
# RotateNum = 3
|
||||
# # unit: MB
|
||||
# RotateSize = 256
|
||||
|
||||
[HTTP]
|
||||
# http listening address
|
||||
Host = "0.0.0.0"
|
||||
# http listening port
|
||||
Port = 17000
|
||||
# https cert file path
|
||||
CertFile = ""
|
||||
# https key file path
|
||||
KeyFile = ""
|
||||
# whether print access log
|
||||
PrintAccessLog = false
|
||||
# whether enable pprof
|
||||
PProf = false
|
||||
# expose prometheus /metrics?
|
||||
ExposeMetrics = true
|
||||
# http graceful shutdown timeout, unit: s
|
||||
ShutdownTimeout = 30
|
||||
# max content length: 64M
|
||||
MaxContentLength = 67108864
|
||||
# http server read timeout, unit: s
|
||||
ReadTimeout = 20
|
||||
# http server write timeout, unit: s
|
||||
WriteTimeout = 40
|
||||
# http server idle timeout, unit: s
|
||||
IdleTimeout = 120
|
||||
|
||||
[HTTP.ShowCaptcha]
|
||||
Enable = false
|
||||
|
||||
[HTTP.APIForAgent]
|
||||
Enable = true
|
||||
# [HTTP.APIForAgent.BasicAuth]
|
||||
# user001 = "ccc26da7b9aba533cbb263a36c07dcc5"
|
||||
|
||||
[HTTP.APIForService]
|
||||
Enable = true
|
||||
[HTTP.APIForService.BasicAuth]
|
||||
user001 = "ccc26da7b9aba533cbb263a36c07dcc5"
|
||||
|
||||
[HTTP.JWTAuth]
|
||||
# unit: min
|
||||
AccessExpired = 1500
|
||||
# unit: min
|
||||
RefreshExpired = 10080
|
||||
RedisKeyPrefix = "/jwt/"
|
||||
|
||||
[HTTP.ProxyAuth]
|
||||
# if proxy auth enabled, jwt auth is disabled
|
||||
Enable = false
|
||||
# username key in http proxy header
|
||||
HeaderUserNameKey = "X-User-Name"
|
||||
DefaultRoles = ["Standard"]
|
||||
|
||||
[HTTP.RSA]
|
||||
# open RSA
|
||||
OpenRSA = false
|
||||
# Before replacing the key file, make sure that there are no encrypted variables in the database "configs".
|
||||
# It is recommended to decrypt and remove all encrypted values from the database before replacing the key file.
|
||||
# This will prevent any potential issues with accessing or decrypting the variables using the new key file.
|
||||
# RSA public key (auto carete)
|
||||
RSAPublicKeyPath = "etc/rsa/public.pem"
|
||||
# RSA private key (auto carete)
|
||||
RSAPrivateKeyPath = "etc/rsa/private.pem"
|
||||
# RSA private key password
|
||||
RSAPassWord = "n9e@n9e!"
|
||||
|
||||
[DB]
|
||||
# postgres: host=%s port=%s user=%s dbname=%s password=%s sslmode=%s
|
||||
# postgres: DSN="host=127.0.0.1 port=5432 user=root dbname=n9e_v6 password=1234 sslmode=disable"
|
||||
DSN="root:1234@tcp(127.0.0.1:3306)/n9e_v6?charset=utf8mb4&parseTime=True&loc=Local&allowNativePasswords=true"
|
||||
# enable debug mode or not
|
||||
Debug = false
|
||||
# mysql postgres
|
||||
DBType = "mysql"
|
||||
# unit: s
|
||||
MaxLifetime = 7200
|
||||
# max open connections
|
||||
MaxOpenConns = 150
|
||||
# max idle connections
|
||||
MaxIdleConns = 50
|
||||
# enable auto migrate or not
|
||||
# EnableAutoMigrate = false
|
||||
|
||||
[Redis]
|
||||
# address, ip:port or ip1:port,ip2:port for cluster and sentinel(SentinelAddrs)
|
||||
Address = "127.0.0.1:6379"
|
||||
# Username = ""
|
||||
# Password = ""
|
||||
# DB = 0
|
||||
# UseTLS = false
|
||||
# TLSMinVersion = "1.2"
|
||||
# standalone cluster sentinel
|
||||
RedisType = "standalone"
|
||||
# Mastername for sentinel type
|
||||
# MasterName = "mymaster"
|
||||
# SentinelUsername = ""
|
||||
# SentinelPassword = ""
|
||||
|
||||
[Alert]
|
||||
[Alert.Heartbeat]
|
||||
# auto detect if blank
|
||||
IP = ""
|
||||
# unit ms
|
||||
Interval = 1000
|
||||
EngineName = "default"
|
||||
|
||||
# [Alert.Alerting]
|
||||
# NotifyConcurrency = 10
|
||||
|
||||
[Center]
|
||||
MetricsYamlFile = "./etc/metrics.yaml"
|
||||
I18NHeaderKey = "X-Language"
|
||||
|
||||
[Center.AnonymousAccess]
|
||||
PromQuerier = true
|
||||
AlertDetail = true
|
||||
|
||||
[Pushgw]
|
||||
# use target labels in database instead of in series
|
||||
LabelRewrite = true
|
||||
# # default busigroup key name
|
||||
# BusiGroupLabelKey = "busigroup"
|
||||
ForceUseServerTS = true
|
||||
|
||||
# [Pushgw.DebugSample]
|
||||
# ident = "xx"
|
||||
# __name__ = "xx"
|
||||
|
||||
# [Pushgw.WriterOpt]
|
||||
# QueueMaxSize = 1000000
|
||||
# QueuePopSize = 1000
|
||||
|
||||
[[Pushgw.Writers]]
|
||||
# Url = "http://127.0.0.1:8480/insert/0/prometheus/api/v1/write"
|
||||
Url = "http://127.0.0.1:9090/api/v1/write"
|
||||
# Basic auth username
|
||||
BasicAuthUser = ""
|
||||
# Basic auth password
|
||||
BasicAuthPass = ""
|
||||
# timeout settings, unit: ms
|
||||
Headers = ["X-From", "n9e"]
|
||||
Timeout = 10000
|
||||
DialTimeout = 3000
|
||||
TLSHandshakeTimeout = 30000
|
||||
ExpectContinueTimeout = 1000
|
||||
IdleConnTimeout = 90000
|
||||
# time duration, unit: ms
|
||||
KeepAlive = 30000
|
||||
MaxConnsPerHost = 0
|
||||
MaxIdleConns = 100
|
||||
MaxIdleConnsPerHost = 100
|
||||
## Optional TLS Config
|
||||
# UseTLS = false
|
||||
# TLSCA = "/etc/n9e/ca.pem"
|
||||
# TLSCert = "/etc/n9e/cert.pem"
|
||||
# TLSKey = "/etc/n9e/key.pem"
|
||||
# InsecureSkipVerify = false
|
||||
# [[Writers.WriteRelabels]]
|
||||
# Action = "replace"
|
||||
# SourceLabels = ["__address__"]
|
||||
# Regex = "([^:]+)(?::\\d+)?"
|
||||
# Replacement = "$1:80"
|
||||
# TargetLabel = "__address__"
|
||||
|
||||
[Ibex]
|
||||
Enable = true
|
||||
RPCListen = "0.0.0.0:20090"
|
||||
@@ -0,0 +1,774 @@
|
||||
zh:
|
||||
ip_conntrack_count: 连接跟踪表条目总数(单位:int, count)
|
||||
ip_conntrack_max: 连接跟踪表最大容量(单位:int, size)
|
||||
cpu_usage_idle: CPU空闲率(单位:%)
|
||||
cpu_usage_active: CPU使用率(单位:%)
|
||||
cpu_usage_system: CPU内核态时间占比(单位:%)
|
||||
cpu_usage_user: CPU用户态时间占比(单位:%)
|
||||
cpu_usage_nice: 低优先级用户态CPU时间占比,也就是进程nice值被调整为1-19之间的CPU时间。这里注意,nice可取值范围是-20到19,数值越大,优先级反而越低(单位:%)
|
||||
cpu_usage_iowait: CPU等待I/O的时间占比(单位:%)
|
||||
cpu_usage_irq: CPU处理硬中断的时间占比(单位:%)
|
||||
cpu_usage_softirq: CPU处理软中断的时间占比(单位:%)
|
||||
cpu_usage_steal: 在虚拟机环境下有该指标,表示CPU被其他虚拟机争用的时间占比,超过20就表示争抢严重(单位:%)
|
||||
cpu_usage_guest: 通过虚拟化运行其他操作系统的时间,也就是运行虚拟机的CPU时间占比(单位:%)
|
||||
cpu_usage_guest_nice: 以低优先级运行虚拟机的时间占比(单位:%)
|
||||
|
||||
disk_free: 硬盘分区剩余量(单位:byte)
|
||||
disk_used: 硬盘分区使用量(单位:byte)
|
||||
disk_used_percent: 硬盘分区使用率(单位:%)
|
||||
disk_total: 硬盘分区总量(单位:byte)
|
||||
disk_inodes_free: 硬盘分区inode剩余量
|
||||
disk_inodes_used: 硬盘分区inode使用量
|
||||
disk_inodes_total: 硬盘分区inode总量
|
||||
|
||||
diskio_io_time: 从设备视角来看I/O请求总时间,队列中有I/O请求就计数(单位:毫秒),counter类型,需要用函数求rate才有使用价值
|
||||
diskio_iops_in_progress: 已经分配给设备驱动且尚未完成的IO请求,不包含在队列中但尚未分配给设备驱动的IO请求,gauge类型
|
||||
diskio_merged_reads: 相邻读请求merge读的次数,counter类型
|
||||
diskio_merged_writes: 相邻写请求merge写的次数,counter类型
|
||||
diskio_read_bytes: 读取的byte数量,counter类型,需要用函数求rate才有使用价值
|
||||
diskio_read_time: 读请求总时间(单位:毫秒),counter类型,需要用函数求rate才有使用价值
|
||||
diskio_reads: 读请求次数,counter类型,需要用函数求rate才有使用价值
|
||||
diskio_weighted_io_time: 从I/O请求视角来看I/O等待总时间,如果同时有多个I/O请求,时间会叠加(单位:毫秒)
|
||||
diskio_write_bytes: 写入的byte数量,counter类型,需要用函数求rate才有使用价值
|
||||
diskio_write_time: 写请求总时间(单位:毫秒),counter类型,需要用函数求rate才有使用价值
|
||||
diskio_writes: 写请求次数,counter类型,需要用函数求rate才有使用价值
|
||||
|
||||
kernel_boot_time: 内核启动时间
|
||||
kernel_context_switches: 内核上下文切换次数
|
||||
kernel_entropy_avail: linux系统内部的熵池
|
||||
kernel_interrupts: 内核中断次数
|
||||
kernel_processes_forked: fork的进程数
|
||||
|
||||
mem_active: 活跃使用的内存总数(包括cache和buffer内存)
|
||||
mem_available: 可用内存大小(bytes)
|
||||
mem_available_percent: 内存剩余百分比(0~100)
|
||||
mem_buffered: 用来给文件做缓冲大小
|
||||
mem_cached: 被高速缓冲存储器(cache memory)用的内存的大小(等于 diskcache minus SwapCache )
|
||||
mem_commit_limit: 根据超额分配比率('vm.overcommit_ratio'),这是当前在系统上分配可用的内存总量,这个限制只是在模式2('vm.overcommit_memory')时启用
|
||||
mem_committed_as: 目前在系统上分配的内存量。是所有进程申请的内存的总和
|
||||
mem_dirty: 等待被写回到磁盘的内存大小
|
||||
mem_free: 空闲内存大小(bytes)
|
||||
mem_high_free: 未被使用的高位内存大小
|
||||
mem_high_total: 高位内存总大小(Highmem是指所有内存高于860MB的物理内存,Highmem区域供用户程序使用,或用于页面缓存。该区域不是直接映射到内核空间。内核必须使用不同的手法使用该段内存)
|
||||
mem_huge_page_size: 每个大页的大小
|
||||
mem_huge_pages_free: 池中尚未分配的 HugePages 数量
|
||||
mem_huge_pages_total: 预留HugePages的总个数
|
||||
mem_inactive: 空闲的内存数(包括free和avalible的内存)
|
||||
mem_low_free: 未被使用的低位大小
|
||||
mem_low_total: 低位内存总大小,低位可以达到高位内存一样的作用,而且它还能够被内核用来记录一些自己的数据结构
|
||||
mem_mapped: 设备和文件等映射的大小
|
||||
mem_page_tables: 管理内存分页页面的索引表的大小
|
||||
mem_shared: 多个进程共享的内存总额
|
||||
mem_slab: 内核数据结构缓存的大小,可以减少申请和释放内存带来的消耗
|
||||
mem_sreclaimable: 可收回Slab的大小
|
||||
mem_sunreclaim: 不可收回Slab的大小(SUnreclaim+SReclaimable=Slab)
|
||||
mem_swap_cached: 被高速缓冲存储器(cache memory)用的交换空间的大小,已经被交换出来的内存,但仍然被存放在swapfile中。用来在需要的时候很快的被替换而不需要再次打开I/O端口
|
||||
mem_swap_free: 未被使用交换空间的大小
|
||||
mem_swap_total: 交换空间的总大小
|
||||
mem_total: 内存总数
|
||||
mem_used: 已用内存数
|
||||
mem_used_percent: 已用内存数百分比(0~100)
|
||||
mem_vmalloc_chunk: 最大的连续未被使用的vmalloc区域
|
||||
mem_vmalloc_totalL: 可以vmalloc虚拟内存大小
|
||||
mem_vmalloc_used: vmalloc已使用的虚拟内存大小
|
||||
mem_write_back: 正在被写回到磁盘的内存大小
|
||||
mem_write_back_tmp: FUSE用于临时写回缓冲区的内存
|
||||
|
||||
net_bytes_recv: 网卡收包总数(bytes),计算每秒速率时需要用到rate/irate函数
|
||||
net_bytes_sent: 网卡发包总数(bytes),计算每秒速率时需要用到rate/irate函数
|
||||
net_drop_in: 网卡收丢包数量
|
||||
net_drop_out: 网卡发丢包数量
|
||||
net_err_in: 网卡收包错误数量
|
||||
net_err_out: 网卡发包错误数量
|
||||
net_packets_recv: 网卡收包数量
|
||||
net_packets_sent: 网卡发包数量
|
||||
net_bits_recv: 网卡收包总数(bits),计算每秒速率时需要用到rate/irate函数
|
||||
net_bits_sent: 网卡发包总数(bits),计算每秒速率时需要用到rate/irate函数
|
||||
|
||||
netstat_tcp_established: ESTABLISHED状态的网络链接数
|
||||
netstat_tcp_fin_wait1: FIN_WAIT1状态的网络链接数
|
||||
netstat_tcp_fin_wait2: FIN_WAIT2状态的网络链接数
|
||||
netstat_tcp_last_ack: LAST_ACK状态的网络链接数
|
||||
netstat_tcp_listen: LISTEN状态的网络链接数
|
||||
netstat_tcp_syn_recv: SYN_RECV状态的网络链接数
|
||||
netstat_tcp_syn_sent: SYN_SENT状态的网络链接数
|
||||
netstat_tcp_time_wait: TIME_WAIT状态的网络链接数
|
||||
netstat_udp_socket: UDP状态的网络链接数
|
||||
|
||||
netstat_sockets_used: 已使用的所有协议套接字总量
|
||||
netstat_tcp_inuse: 正在使用(正在侦听)的TCP套接字数量
|
||||
netstat_tcp_orphan: 无主(不属于任何进程)的TCP连接数(无用、待销毁的TCP socket数)
|
||||
netstat_tcp_tw: TIME_WAIT状态的TCP连接数
|
||||
netstat_tcp_alloc: 已分配(已建立、已申请到sk_buff)的TCP套接字数量
|
||||
netstat_tcp_mem: TCP套接字内存Page使用量
|
||||
netstat_udp_inuse: 在使用的UDP套接字数量
|
||||
netstat_udp_mem: UDP套接字内存Page使用量
|
||||
netstat_udplite_inuse: 正在使用的 udp lite 数量
|
||||
netstat_raw_inuse: 正在使用的 raw socket 数量
|
||||
netstat_frag_inuse: ip fragement 数量
|
||||
netstat_frag_memory: ip fragement 已经分配的内存(byte)
|
||||
|
||||
#[ping]
|
||||
ping_percent_packet_loss: ping数据包丢失百分比(%)
|
||||
ping_result_code: ping返回码('0','1')
|
||||
|
||||
net_response_result_code: 网络探测结果,0表示正常,非0表示异常
|
||||
net_response_response_time: 网络探测时延,单位:秒
|
||||
|
||||
processes_blocked: 不可中断的睡眠状态下的进程数('U','D','L')
|
||||
processes_dead: 回收中的进程数('X')
|
||||
processes_idle: 挂起的空闲进程数('I')
|
||||
processes_paging: 分页进程数('P')
|
||||
processes_running: 运行中的进程数('R')
|
||||
processes_sleeping: 可中断进程数('S')
|
||||
processes_stopped: 暂停状态进程数('T')
|
||||
processes_total: 总进程数
|
||||
processes_total_threads: 总线程数
|
||||
processes_unknown: 未知状态进程数
|
||||
processes_zombies: 僵尸态进程数('Z')
|
||||
|
||||
swap_used_percent: Swap空间换出数据量
|
||||
|
||||
system_load1: 1分钟平均load值
|
||||
system_load5: 5分钟平均load值
|
||||
system_load15: 15分钟平均load值
|
||||
system_load_norm_1: 1分钟平均load值/逻辑CPU个数
|
||||
system_load_norm_5: 5分钟平均load值/逻辑CPU个数
|
||||
system_load_norm_15: 15分钟平均load值/逻辑CPU个数
|
||||
system_n_users: 用户数
|
||||
system_n_cpus: CPU核数
|
||||
system_uptime: 系统启动时间
|
||||
|
||||
nginx_accepts: 自nginx启动起,与客户端建立过得连接总数
|
||||
nginx_active: 当前nginx正在处理的活动连接数,等于Reading/Writing/Waiting总和
|
||||
nginx_handled: 自nginx启动起,处理过的客户端连接总数
|
||||
nginx_reading: 正在读取HTTP请求头部的连接总数
|
||||
nginx_requests: 自nginx启动起,处理过的客户端请求总数,由于存在HTTP Krrp-Alive请求,该值会大于handled值
|
||||
nginx_upstream_check_fall: upstream_check模块检测到后端失败的次数
|
||||
nginx_upstream_check_rise: upstream_check模块对后端的检测次数
|
||||
nginx_upstream_check_status_code: 后端upstream的状态,up为1,down为0
|
||||
nginx_waiting: 开启 keep-alive 的情况下,这个值等于 active – (reading+writing), 意思就是 Nginx 已经处理完正在等候下一次请求指令的驻留连接
|
||||
nginx_writing: 正在向客户端发送响应的连接总数
|
||||
|
||||
http_response_content_length: HTTP消息实体的传输长度
|
||||
http_response_http_response_code: http响应状态码
|
||||
http_response_response_time: http响应用时
|
||||
http_response_result_code: url探测结果0为正常否则url无法访问
|
||||
|
||||
# [aws cloudwatch rds]
|
||||
cloudwatch_aws_rds_bin_log_disk_usage_average: rds 磁盘使用平均值
|
||||
cloudwatch_aws_rds_bin_log_disk_usage_maximum: rds 磁盘使用量最大值
|
||||
cloudwatch_aws_rds_bin_log_disk_usage_minimum: rds binlog 磁盘使用量最低
|
||||
cloudwatch_aws_rds_bin_log_disk_usage_sample_count: rds binlog 磁盘使用情况样本计数
|
||||
cloudwatch_aws_rds_bin_log_disk_usage_sum: rds binlog 磁盘使用总和
|
||||
cloudwatch_aws_rds_burst_balance_average: rds 突发余额平均值
|
||||
cloudwatch_aws_rds_burst_balance_maximum: rds 突发余额最大值
|
||||
cloudwatch_aws_rds_burst_balance_minimum: rds 突发余额最低
|
||||
cloudwatch_aws_rds_burst_balance_sample_count: rds 突发平衡样本计数
|
||||
cloudwatch_aws_rds_burst_balance_sum: rds 突发余额总和
|
||||
cloudwatch_aws_rds_cpu_utilization_average: rds cpu 利用率平均值
|
||||
cloudwatch_aws_rds_cpu_utilization_maximum: rds cpu 利用率最大值
|
||||
cloudwatch_aws_rds_cpu_utilization_minimum: rds cpu 利用率最低
|
||||
cloudwatch_aws_rds_cpu_utilization_sample_count: rds cpu 利用率样本计数
|
||||
cloudwatch_aws_rds_cpu_utilization_sum: rds cpu 利用率总和
|
||||
cloudwatch_aws_rds_database_connections_average: rds 数据库连接平均值
|
||||
cloudwatch_aws_rds_database_connections_maximum: rds 数据库连接数最大值
|
||||
cloudwatch_aws_rds_database_connections_minimum: rds 数据库连接最小
|
||||
cloudwatch_aws_rds_database_connections_sample_count: rds 数据库连接样本数
|
||||
cloudwatch_aws_rds_database_connections_sum: rds 数据库连接总和
|
||||
cloudwatch_aws_rds_db_load_average: rds db 平均负载
|
||||
cloudwatch_aws_rds_db_load_cpu_average: rds db 负载 cpu 平均值
|
||||
cloudwatch_aws_rds_db_load_cpu_maximum: rds db 负载 cpu 最大值
|
||||
cloudwatch_aws_rds_db_load_cpu_minimum: rds db 负载 cpu 最小值
|
||||
cloudwatch_aws_rds_db_load_cpu_sample_count: rds db 加载 CPU 样本数
|
||||
cloudwatch_aws_rds_db_load_cpu_sum: rds db 加载cpu总和
|
||||
cloudwatch_aws_rds_db_load_maximum: rds 数据库负载最大值
|
||||
cloudwatch_aws_rds_db_load_minimum: rds 数据库负载最小值
|
||||
cloudwatch_aws_rds_db_load_non_cpu_average: rds 加载非 CPU 平均值
|
||||
cloudwatch_aws_rds_db_load_non_cpu_maximum: rds 加载非 cpu 最大值
|
||||
cloudwatch_aws_rds_db_load_non_cpu_minimum: rds 加载非 cpu 最小值
|
||||
cloudwatch_aws_rds_db_load_non_cpu_sample_count: rds 加载非 cpu 样本计数
|
||||
cloudwatch_aws_rds_db_load_non_cpu_sum: rds 加载非cpu总和
|
||||
cloudwatch_aws_rds_db_load_sample_count: rds db 加载样本计数
|
||||
cloudwatch_aws_rds_db_load_sum: rds db 负载总和
|
||||
cloudwatch_aws_rds_disk_queue_depth_average: rds 磁盘队列深度平均值
|
||||
cloudwatch_aws_rds_disk_queue_depth_maximum: rds 磁盘队列深度最大值
|
||||
cloudwatch_aws_rds_disk_queue_depth_minimum: rds 磁盘队列深度最小值
|
||||
cloudwatch_aws_rds_disk_queue_depth_sample_count: rds 磁盘队列深度样本计数
|
||||
cloudwatch_aws_rds_disk_queue_depth_sum: rds 磁盘队列深度总和
|
||||
cloudwatch_aws_rds_ebs_byte_balance__average: rds ebs 字节余额平均值
|
||||
cloudwatch_aws_rds_ebs_byte_balance__maximum: rds ebs 字节余额最大值
|
||||
cloudwatch_aws_rds_ebs_byte_balance__minimum: rds ebs 字节余额最低
|
||||
cloudwatch_aws_rds_ebs_byte_balance__sample_count: rds ebs 字节余额样本数
|
||||
cloudwatch_aws_rds_ebs_byte_balance__sum: rds ebs 字节余额总和
|
||||
cloudwatch_aws_rds_ebsio_balance__average: rds ebsio 余额平均值
|
||||
cloudwatch_aws_rds_ebsio_balance__maximum: rds ebsio 余额最大值
|
||||
cloudwatch_aws_rds_ebsio_balance__minimum: rds ebsio 余额最低
|
||||
cloudwatch_aws_rds_ebsio_balance__sample_count: rds ebsio 平衡样本计数
|
||||
cloudwatch_aws_rds_ebsio_balance__sum: rds ebsio 余额总和
|
||||
cloudwatch_aws_rds_free_storage_space_average: rds 免费存储空间平均
|
||||
cloudwatch_aws_rds_free_storage_space_maximum: rds 最大可用存储空间
|
||||
cloudwatch_aws_rds_free_storage_space_minimum: rds 最低可用存储空间
|
||||
cloudwatch_aws_rds_free_storage_space_sample_count: rds 可用存储空间样本数
|
||||
cloudwatch_aws_rds_free_storage_space_sum: rds 免费存储空间总和
|
||||
cloudwatch_aws_rds_freeable_memory_average: rds 可用内存平均值
|
||||
cloudwatch_aws_rds_freeable_memory_maximum: rds 最大可用内存
|
||||
cloudwatch_aws_rds_freeable_memory_minimum: rds 最小可用内存
|
||||
cloudwatch_aws_rds_freeable_memory_sample_count: rds 可释放内存样本数
|
||||
cloudwatch_aws_rds_freeable_memory_sum: rds 可释放内存总和
|
||||
cloudwatch_aws_rds_lvm_read_iops_average: rds lvm 读取 iops 平均值
|
||||
cloudwatch_aws_rds_lvm_read_iops_maximum: rds lvm 读取 iops 最大值
|
||||
cloudwatch_aws_rds_lvm_read_iops_minimum: rds lvm 读取 iops 最低
|
||||
cloudwatch_aws_rds_lvm_read_iops_sample_count: rds lvm 读取 iops 样本计数
|
||||
cloudwatch_aws_rds_lvm_read_iops_sum: rds lvm 读取 iops 总和
|
||||
cloudwatch_aws_rds_lvm_write_iops_average: rds lvm 写入 iops 平均值
|
||||
cloudwatch_aws_rds_lvm_write_iops_maximum: rds lvm 写入 iops 最大值
|
||||
cloudwatch_aws_rds_lvm_write_iops_minimum: rds lvm 写入 iops 最低
|
||||
cloudwatch_aws_rds_lvm_write_iops_sample_count: rds lvm 写入 iops 样本计数
|
||||
cloudwatch_aws_rds_lvm_write_iops_sum: rds lvm 写入 iops 总和
|
||||
cloudwatch_aws_rds_network_receive_throughput_average: rds 网络接收吞吐量平均
|
||||
cloudwatch_aws_rds_network_receive_throughput_maximum: rds 网络接收吞吐量最大值
|
||||
cloudwatch_aws_rds_network_receive_throughput_minimum: rds 网络接收吞吐量最小值
|
||||
cloudwatch_aws_rds_network_receive_throughput_sample_count: rds 网络接收吞吐量样本计数
|
||||
cloudwatch_aws_rds_network_receive_throughput_sum: rds 网络接收吞吐量总和
|
||||
cloudwatch_aws_rds_network_transmit_throughput_average: rds 网络传输吞吐量平均值
|
||||
cloudwatch_aws_rds_network_transmit_throughput_maximum: rds 网络传输吞吐量最大
|
||||
cloudwatch_aws_rds_network_transmit_throughput_minimum: rds 网络传输吞吐量最小值
|
||||
cloudwatch_aws_rds_network_transmit_throughput_sample_count: rds 网络传输吞吐量样本计数
|
||||
cloudwatch_aws_rds_network_transmit_throughput_sum: rds 网络传输吞吐量总和
|
||||
cloudwatch_aws_rds_read_iops_average: rds 读取 iops 平均值
|
||||
cloudwatch_aws_rds_read_iops_maximum: rds 最大读取 iops
|
||||
cloudwatch_aws_rds_read_iops_minimum: rds 读取 iops 最低
|
||||
cloudwatch_aws_rds_read_iops_sample_count: rds 读取 iops 样本计数
|
||||
cloudwatch_aws_rds_read_iops_sum: rds 读取 iops 总和
|
||||
cloudwatch_aws_rds_read_latency_average: rds 读取延迟平均值
|
||||
cloudwatch_aws_rds_read_latency_maximum: rds 读取延迟最大值
|
||||
cloudwatch_aws_rds_read_latency_minimum: rds 最小读取延迟
|
||||
cloudwatch_aws_rds_read_latency_sample_count: rds 读取延迟样本计数
|
||||
cloudwatch_aws_rds_read_latency_sum: rds 读取延迟总和
|
||||
cloudwatch_aws_rds_read_throughput_average: rds 读取吞吐量平均值
|
||||
cloudwatch_aws_rds_read_throughput_maximum: rds 最大读取吞吐量
|
||||
cloudwatch_aws_rds_read_throughput_minimum: rds 最小读取吞吐量
|
||||
cloudwatch_aws_rds_read_throughput_sample_count: rds 读取吞吐量样本计数
|
||||
cloudwatch_aws_rds_read_throughput_sum: rds 读取吞吐量总和
|
||||
cloudwatch_aws_rds_swap_usage_average: rds 交换使用平均值
|
||||
cloudwatch_aws_rds_swap_usage_maximum: rds 交换使用最大值
|
||||
cloudwatch_aws_rds_swap_usage_minimum: rds 交换使用量最低
|
||||
cloudwatch_aws_rds_swap_usage_sample_count: rds 交换使用示例计数
|
||||
cloudwatch_aws_rds_swap_usage_sum: rds 交换使用总和
|
||||
cloudwatch_aws_rds_write_iops_average: rds 写入 iops 平均值
|
||||
cloudwatch_aws_rds_write_iops_maximum: rds 写入 iops 最大值
|
||||
cloudwatch_aws_rds_write_iops_minimum: rds 写入 iops 最低
|
||||
cloudwatch_aws_rds_write_iops_sample_count: rds 写入 iops 样本计数
|
||||
cloudwatch_aws_rds_write_iops_sum: rds 写入 iops 总和
|
||||
cloudwatch_aws_rds_write_latency_average: rds 写入延迟平均值
|
||||
cloudwatch_aws_rds_write_latency_maximum: rds 最大写入延迟
|
||||
cloudwatch_aws_rds_write_latency_minimum: rds 写入延迟最小值
|
||||
cloudwatch_aws_rds_write_latency_sample_count: rds 写入延迟样本计数
|
||||
cloudwatch_aws_rds_write_latency_sum: rds 写入延迟总和
|
||||
cloudwatch_aws_rds_write_throughput_average: rds 写入吞吐量平均值
|
||||
cloudwatch_aws_rds_write_throughput_maximum: rds 最大写入吞吐量
|
||||
cloudwatch_aws_rds_write_throughput_minimum: rds 写入吞吐量最小值
|
||||
cloudwatch_aws_rds_write_throughput_sample_count: rds 写入吞吐量样本计数
|
||||
cloudwatch_aws_rds_write_throughput_sum: rds 写入吞吐量总和
|
||||
|
||||
en:
|
||||
ip_conntrack_count: the number of entries in the conntrack table(unit:int, count)
|
||||
ip_conntrack_max: the max capacity of the conntrack table(unit:int, size)
|
||||
cpu_usage_idle: "CPU idle rate(unit:%)"
|
||||
cpu_usage_active: "CPU usage rate(unit:%)"
|
||||
cpu_usage_system: "CPU kernel state time proportion(unit:%)"
|
||||
cpu_usage_user: "CPU user attitude time proportion(unit:%)"
|
||||
cpu_usage_nice: "The proportion of low priority CPU time, that is, the process NICE value is adjusted to the CPU time between 1-19. Note here that the value range of NICE is -20 to 19, the larger the value, the lower the priority, the lower the priority(unit:%)"
|
||||
cpu_usage_iowait: "CPU waiting for I/O time proportion(unit:%)"
|
||||
cpu_usage_irq: "CPU processing hard interrupt time proportion(unit:%)"
|
||||
cpu_usage_softirq: "CPU processing soft interrupt time proportion(unit:%)"
|
||||
cpu_usage_steal: "In the virtual machine environment, there is this indicator, which means that the CPU is used by other virtual machines for the proportion of time.(unit:%)"
|
||||
cpu_usage_guest: "The time to run other operating systems by virtualization, that is, the proportion of CPU time running the virtual machine(unit:%)"
|
||||
cpu_usage_guest_nice: "The proportion of time to run the virtual machine at low priority(unit:%)"
|
||||
|
||||
disk_free: "The remaining amount of the hard disk partition (unit: byte)"
|
||||
disk_used: "Hard disk partitional use (unit: byte)"
|
||||
disk_used_percent: "Hard disk partitional use rate (unit:%)"
|
||||
disk_total: "Total amount of hard disk partition (unit: byte)"
|
||||
disk_inodes_free: "Hard disk partition INODE remaining amount"
|
||||
disk_inodes_used: "Hard disk partition INODE usage amount"
|
||||
disk_inodes_total: "The total amount of hard disk partition INODE"
|
||||
|
||||
diskio_io_time: "From the perspective of the device perspective, the total time of I/O request, the I/O request in the queue is count (unit: millisecond), the counter type, you need to use the function to find the value"
|
||||
diskio_iops_in_progress: "IO requests that have been assigned to device -driven and have not yet been completed, not included in the queue but not yet assigned to the device -driven IO request, Gauge type"
|
||||
diskio_merged_reads: "The number of times of adjacent reading request Merge, the counter type"
|
||||
diskio_merged_writes: "The number of times the request Merge writes, the counter type"
|
||||
diskio_read_bytes: "The number of byte reads, the counter type, you need to use the function to find the Rate to use the value"
|
||||
diskio_read_time: "The total time of reading request (unit: millisecond), the counter type, you need to use the function to find the Rate to have the value of use"
|
||||
diskio_reads: "Read the number of requests, the counter type, you need to use the function to find the Rate to use the value"
|
||||
diskio_weighted_io_time: "From the perspective of the I/O request perspective, I/O wait for the total time. If there are multiple I/O requests at the same time, the time will be superimposed (unit: millisecond)"
|
||||
diskio_write_bytes: "The number of bytes written, the counter type, you need to use the function to find the Rate to use the value"
|
||||
diskio_write_time: "The total time of the request (unit: millisecond), the counter type, you need to use the function to find the rate to have the value of use"
|
||||
diskio_writes: "Write the number of requests, the counter type, you need to use the function to find the rate to use value"
|
||||
|
||||
kernel_boot_time: "Kernel startup time"
|
||||
kernel_context_switches: "Number of kernel context switching times"
|
||||
kernel_entropy_avail: "Entropy pool inside the Linux system"
|
||||
kernel_interrupts: "Number of kernel interruption"
|
||||
kernel_processes_forked: "ForK's process number"
|
||||
|
||||
mem_active: "The total number of memory (including Cache and BUFFER memory)"
|
||||
mem_available: "Application can use memory numbers"
|
||||
mem_available_percent: "Memory remaining percentage (0 ~ 100)"
|
||||
mem_buffered: "Used to make buffer size for the file"
|
||||
mem_cached: "The size of the memory used by the cache memory (equal to diskcache minus Swap Cache )"
|
||||
mem_commit_limit: "According to the over allocation ratio ('vm.overCommit _ Ratio'), this is the current total memory that can be allocated on the system."
|
||||
mem_committed_as: "Currently allocated on the system. It is the sum of the memory of all process applications"
|
||||
mem_dirty: "Waiting to be written back to the memory size of the disk"
|
||||
mem_free: "Senior memory number"
|
||||
mem_high_free: "Unused high memory size"
|
||||
mem_high_total: "The total memory size of the high memory (Highmem refers to all the physical memory that is higher than 860 MB of memory, the HighMem area is used for user programs, or for page cache. This area is not directly mapped to the kernel space. The kernels must use different methods to use this section of memory. )"
|
||||
mem_huge_page_size: "The size of each big page"
|
||||
mem_huge_pages_free: "The number of Huge Pages in the pool that have not been allocated"
|
||||
mem_huge_pages_total: "Reserve the total number of Huge Pages"
|
||||
mem_inactive: "Free memory (including the memory of free and avalible)"
|
||||
mem_low_free: "Unused low size"
|
||||
mem_low_total: "The total size of the low memory memory can achieve the same role of high memory, and it can be used by the kernel to record some of its own data structure"
|
||||
mem_mapped: "The size of the mapping of equipment and files"
|
||||
mem_page_tables: "The size of the index table of the management of the memory paging page"
|
||||
mem_shared: "The total memory shared by multiple processes"
|
||||
mem_slab: "The size of the kernel data structure cache can reduce the consumption of application and release memory"
|
||||
mem_sreclaimable: "The size of the SLAB can be recovered"
|
||||
mem_sunreclaim: "The size of the SLAB cannot be recovered(SUnreclaim+SReclaimable=Slab)"
|
||||
mem_swap_cached: "The size of the swap space used by the cache memory (cache memory), the memory that has been swapped out, but is still stored in the swapfile. Used to be quickly replaced when needed without opening the I/O port again"
|
||||
mem_swap_free: "The size of the switching space is not used"
|
||||
mem_swap_total: "The total size of the exchange space"
|
||||
mem_total: "Total memory"
|
||||
mem_used: "Memory number"
|
||||
mem_used_percent: "The memory has been used by several percentage (0 ~ 100)"
|
||||
mem_vmalloc_chunk: "The largest continuous unused vmalloc area"
|
||||
mem_vmalloc_totalL: "You can vmalloc virtual memory size"
|
||||
mem_vmalloc_used: "Vmalloc's virtual memory size"
|
||||
mem_write_back: "The memory size of the disk is being written back to the disk"
|
||||
mem_write_back_tmp: "Fuse is used to temporarily write back the memory of the buffer area"
|
||||
|
||||
net_bytes_recv: "Total inbound traffic(bytes) of network card"
|
||||
net_bytes_sent: "Total outbound traffic(bytes) of network card"
|
||||
net_bits_recv: "Total inbound traffic(bits) of network card"
|
||||
net_bits_sent: "Total outbound traffic(bits) of network card"
|
||||
net_drop_in: "The number of packets for network cards"
|
||||
net_drop_out: "The number of packets issued by the network card"
|
||||
net_err_in: "The number of incorrect packets of the network card"
|
||||
net_err_out: "Number of incorrect number of network cards"
|
||||
net_packets_recv: "Net card collection quantity"
|
||||
net_packets_sent: "Number of network card issuance"
|
||||
|
||||
netstat_tcp_established: "ESTABLISHED status network link number"
|
||||
netstat_tcp_fin_wait1: "FIN _ WAIT1 status network link number"
|
||||
netstat_tcp_fin_wait2: "FIN _ WAIT2 status number of network links"
|
||||
netstat_tcp_last_ack: "LAST_ ACK status number of network links"
|
||||
netstat_tcp_listen: "Number of network links in Listen status"
|
||||
netstat_tcp_syn_recv: "SYN _ RECV status number of network links"
|
||||
netstat_tcp_syn_sent: "SYN _ SENT status number of network links"
|
||||
netstat_tcp_time_wait: "Time _ WAIT status network link number"
|
||||
netstat_udp_socket: "Number of network links in UDP status"
|
||||
|
||||
processes_blocked: "The number of processes in the unreprudible sleep state('U','D','L')"
|
||||
processes_dead: "Number of processes in recycling('X')"
|
||||
processes_idle: "Number of idle processes hanging('I')"
|
||||
processes_paging: "Number of paging processes('P')"
|
||||
processes_running: "Number of processes during operation('R')"
|
||||
processes_sleeping: "Can interrupt the number of processes('S')"
|
||||
processes_stopped: "Pushing status process number('T')"
|
||||
processes_total: "Total process number"
|
||||
processes_total_threads: "Number of threads"
|
||||
processes_unknown: "Unknown status process number"
|
||||
processes_zombies: "Number of zombies('Z')"
|
||||
|
||||
swap_used_percent: "SWAP space replace the data volume"
|
||||
|
||||
system_load1: "1 minute average load value"
|
||||
system_load5: "5 minutes average load value"
|
||||
system_load15: "15 minutes average load value"
|
||||
system_load_norm_1: "1 minute average load value/logical CPU number"
|
||||
system_load_norm_5: "5 minutes average load value/logical CPU number"
|
||||
system_load_norm_15: "15 minutes average load value/logical CPU number"
|
||||
system_n_users: "User number"
|
||||
system_n_cpus: "CPU nuclear number"
|
||||
system_uptime: "System startup time"
|
||||
|
||||
nginx_accepts: "Since Nginx started, the total number of connections has been established with the client"
|
||||
nginx_active: "The current number of activity connections that Nginx is being processed is equal to Reading/Writing/Waiting"
|
||||
nginx_handled: "Starting from Nginx, the total number of client connections that have been processed"
|
||||
nginx_reading: "Reading the total number of connections on the http request header"
|
||||
nginx_requests: "Since nginx is started, the total number of client requests processed, due to the existence of HTTP Krrp - Alive requests, this value will be greater than the handled value"
|
||||
nginx_upstream_check_fall: "UPStream_CHECK module detects the number of back -end failures"
|
||||
nginx_upstream_check_rise: "UPSTREAM _ Check module to detect the number of back -end"
|
||||
nginx_upstream_check_status_code: "The state of the backstream is 1, and the down is 0"
|
||||
nginx_waiting: "When keep-alive is enabled, this value is equal to active – (reading+writing), which means that Nginx has processed the resident connection that is waiting for the next request command"
|
||||
nginx_writing: "The total number of connections to send a response to the client"
|
||||
|
||||
http_response_content_length: "HTTP message entity transmission length"
|
||||
http_response_http_response_code: "http response status code"
|
||||
http_response_response_time: "When http ring application"
|
||||
http_response_result_code: "URL detection result 0 is normal, otherwise the URL cannot be accessed"
|
||||
|
||||
# [mysqld_exporter]
|
||||
mysql_global_status_uptime: The number of seconds that the server has been up.(Gauge)
|
||||
mysql_global_status_uptime_since_flush_status: The number of seconds since the most recent FLUSH STATUS statement.(Gauge)
|
||||
mysql_global_status_queries: The number of statements executed by the server. This variable includes statements executed within stored programs, unlike the Questions variable. It does not count COM_PING or COM_STATISTICS commands.(Counter)
|
||||
mysql_global_status_threads_connected: The number of currently open connections.(Counter)
|
||||
mysql_global_status_connections: The number of connection attempts (successful or not) to the MySQL server.(Gauge)
|
||||
mysql_global_status_max_used_connections: The maximum number of connections that have been in use simultaneously since the server started.(Gauge)
|
||||
mysql_global_status_threads_running: The number of threads that are not sleeping.(Gauge)
|
||||
mysql_global_status_questions: The number of statements executed by the server. This includes only statements sent to the server by clients and not statements executed within stored programs, unlike the Queries variable. This variable does not count COM_PING, COM_STATISTICS, COM_STMT_PREPARE, COM_STMT_CLOSE, or COM_STMT_RESET commands.(Counter)
|
||||
mysql_global_status_threads_cached: The number of threads in the thread cache.(Counter)
|
||||
mysql_global_status_threads_created: The number of threads created to handle connections. If Threads_created is big, you may want to increase the thread_cache_size value. The cache miss rate can be calculated as Threads_created/Connections.(Counter)
|
||||
mysql_global_status_created_tmp_tables: The number of internal temporary tables created by the server while executing statements.(Counter)
|
||||
mysql_global_status_created_tmp_disk_tables: The number of internal on-disk temporary tables created by the server while executing statements. You can compare the number of internal on-disk temporary tables created to the total number of internal temporary tables created by comparing Created_tmp_disk_tables and Created_tmp_tables values.(Counter)
|
||||
mysql_global_status_created_tmp_files: How many temporary files mysqld has created.(Counter)
|
||||
mysql_global_status_select_full_join: The number of joins that perform table scans because they do not use indexes. If this value is not 0, you should carefully check the indexes of your tables.(Counter)
|
||||
mysql_global_status_select_full_range_join: The number of joins that used a range search on a reference table.(Counter)
|
||||
mysql_global_status_select_range: The number of joins that used ranges on the first table. This is normally not a critical issue even if the value is quite large.(Counter)
|
||||
mysql_global_status_select_range_check: The number of joins without keys that check for key usage after each row. If this is not 0, you should carefully check the indexes of your tables.(Counter)
|
||||
mysql_global_status_select_scan: The number of joins that did a full scan of the first table.(Counter)
|
||||
mysql_global_status_sort_rows: The number of sorted rows.(Counter)
|
||||
mysql_global_status_sort_range: The number of sorts that were done using ranges.(Counter)
|
||||
mysql_global_status_sort_merge_passes: The number of merge passes that the sort algorithm has had to do. If this value is large, you should consider increasing the value of the sort_buffer_size system variable.(Counter)
|
||||
mysql_global_status_sort_scan: The number of sorts that were done by scanning the table.(Counter)
|
||||
mysql_global_status_slow_queries: The number of queries that have taken more than long_query_time seconds. This counter increments regardless of whether the slow query log is enabled.(Counter)
|
||||
mysql_global_status_aborted_connects: The number of failed attempts to connect to the MySQL server.(Counter)
|
||||
mysql_global_status_aborted_clients: The number of connections that were aborted because the client died without closing the connection properly.(Counter)
|
||||
mysql_global_status_table_locks_immediate: The number of times that a request for a table lock could be granted immediately. Locks Immediate rising and falling is normal activity.(Counter)
|
||||
mysql_global_status_table_locks_waited: The number of times that a request for a table lock could not be granted immediately and a wait was needed. If this is high and you have performance problems, you should first optimize your queries, and then either split your table or tables or use replication.(Counter)
|
||||
mysql_global_status_bytes_received: The number of bytes received from all clients.(Counter)
|
||||
mysql_global_status_bytes_sent: The number of bytes sent to all clients.(Counter)
|
||||
mysql_global_status_innodb_page_size: InnoDB page size (default 16KB). Many values are counted in pages; the page size enables them to be easily converted to bytes.(Gauge)
|
||||
mysql_global_status_buffer_pool_pages: The number of pages in the InnoDB buffer pool.(Gauge)
|
||||
mysql_global_status_commands_total: The number of times each xxx statement has been executed.(Counter)
|
||||
mysql_global_status_handlers_total: Handler statistics are internal statistics on how MySQL is selecting, updating, inserting, and modifying rows, tables, and indexes. This is in fact the layer between the Storage Engine and MySQL.(Counter)
|
||||
mysql_global_status_opened_files: The number of files that have been opened with my_open() (a mysys library function). Parts of the server that open files without using this function do not increment the count.(Counter)
|
||||
mysql_global_status_open_tables: The number of tables that are open.(Gauge)
|
||||
mysql_global_status_opened_tables: The number of tables that have been opened. If Opened_tables is big, your table_open_cache value is probably too small.(Counter)
|
||||
mysql_global_status_table_open_cache_hits: The number of hits for open tables cache lookups.(Counter)
|
||||
mysql_global_status_table_open_cache_misses: The number of misses for open tables cache lookups.(Counter)
|
||||
mysql_global_status_table_open_cache_overflows: The number of overflows for the open tables cache.(Counter)
|
||||
mysql_global_status_innodb_num_open_files: The number of files InnoDB currently holds open.(Gauge)
|
||||
mysql_global_status_connection_errors_total: These variables provide information about errors that occur during the client connection process.(Counter)
|
||||
mysql_global_status_innodb_buffer_pool_read_requests: The number of logical read requests.(Counter)
|
||||
mysql_global_status_innodb_buffer_pool_reads: The number of logical reads that InnoDB could not satisfy from the buffer pool, and had to read directly from disk.(Counter)
|
||||
|
||||
mysql_global_variables_thread_cache_size: How many threads the server should cache for reuse.(Gauge)
|
||||
mysql_global_variables_max_connections: The maximum permitted number of simultaneous client connections.(Gauge)
|
||||
mysql_global_variables_innodb_buffer_pool_size: The size in bytes of the buffer pool, the memory area where InnoDB caches table and index data. The default value is 134217728 bytes (128MB).(Gauge)
|
||||
mysql_global_variables_innodb_log_buffer_size: The size in bytes of the buffer that InnoDB uses to write to the log files on disk.(Gauge)
|
||||
mysql_global_variables_key_buffer_size: Index blocks for MyISAM tables are buffered and are shared by all threads.(Gauge)
|
||||
mysql_global_variables_query_cache_size: The amount of memory allocated for caching query results.(Gauge)
|
||||
mysql_global_variables_table_open_cache: The number of open tables for all threads.(Gauge)
|
||||
mysql_global_variables_open_files_limit: The number of file descriptors available to mysqld from the operating system.(Gauge)
|
||||
|
||||
# [redis_exporter]
|
||||
redis_active_defrag_running: When activedefrag is enabled, this indicates whether defragmentation is currently active, and the CPU percentage it intends to utilize.
|
||||
redis_allocator_active_bytes: Total bytes in the allocator active pages, this includes external-fragmentation.
|
||||
redis_allocator_allocated_bytes: Total bytes allocated form the allocator, including internal-fragmentation. Normally the same as used_memory.
|
||||
redis_allocator_frag_bytes: Delta between allocator_active and allocator_allocated. See note about mem_fragmentation_bytes.
|
||||
redis_allocator_frag_ratio: Ratio between allocator_active and allocator_allocated. This is the true (external) fragmentation metric (not mem_fragmentation_ratio).
|
||||
redis_allocator_resident_bytes: Total bytes resident (RSS) in the allocator, this includes pages that can be released to the OS (by MEMORY PURGE, or just waiting).
|
||||
redis_allocator_rss_bytes: Delta between allocator_resident and allocator_active.
|
||||
redis_allocator_rss_ratio: Ratio between allocator_resident and allocator_active. This usually indicates pages that the allocator can and probably will soon release back to the OS.
|
||||
redis_aof_current_rewrite_duration_sec: Duration of the on-going AOF rewrite operation if any.
|
||||
redis_aof_enabled: Flag indicating AOF logging is activated.
|
||||
redis_aof_last_bgrewrite_status: Status of the last AOF rewrite operation.
|
||||
redis_aof_last_cow_size_bytes: The size in bytes of copy-on-write memory during the last AOF rewrite operation.
|
||||
redis_aof_last_rewrite_duration_sec: Duration of the last AOF rewrite operation in seconds.
|
||||
redis_aof_last_write_status: Status of the last write operation to the AOF.
|
||||
redis_aof_rewrite_in_progress: Flag indicating a AOF rewrite operation is on-going.
|
||||
redis_aof_rewrite_scheduled: Flag indicating an AOF rewrite operation will be scheduled once the on-going RDB save is complete.
|
||||
redis_blocked_clients: Number of clients pending on a blocking call (BLPOP, BRPOP, BRPOPLPUSH, BLMOVE, BZPOPMIN, BZPOPMAX).
|
||||
redis_client_recent_max_input_buffer_bytes: Biggest input buffer among current client connections.
|
||||
redis_client_recent_max_output_buffer_bytes: Biggest output buffer among current client connections.
|
||||
redis_cluster_enabled: Indicate Redis cluster is enabled.
|
||||
redis_commands_duration_seconds_total: The total CPU time consumed by these commands.(Counter)
|
||||
redis_commands_processed_total: Total number of commands processed by the server.(Counter)
|
||||
redis_commands_total: The number of calls that reached command execution (not rejected).(Counter)
|
||||
redis_config_maxclients: The value of the maxclients configuration directive. This is the upper limit for the sum of connected_clients, connected_slaves and cluster_connections.
|
||||
redis_config_maxmemory: The value of the maxmemory configuration directive.
|
||||
redis_connected_clients: Number of client connections (excluding connections from replicas).
|
||||
redis_connected_slaves: Number of connected replicas.
|
||||
redis_connections_received_total: Total number of connections accepted by the server.(Counter)
|
||||
redis_cpu_sys_children_seconds_total: System CPU consumed by the background processes.(Counter)
|
||||
redis_cpu_sys_seconds_total: System CPU consumed by the Redis server, which is the sum of system CPU consumed by all threads of the server process (main thread and background threads).(Counter)
|
||||
redis_cpu_user_children_seconds_total: User CPU consumed by the background processes.(Counter)
|
||||
redis_cpu_user_seconds_total: User CPU consumed by the Redis server, which is the sum of user CPU consumed by all threads of the server process (main thread and background threads).(Counter)
|
||||
redis_db_keys: Total number of keys by DB.
|
||||
redis_db_keys_expiring: Total number of expiring keys by DB
|
||||
redis_defrag_hits: Number of value reallocations performed by active the defragmentation process.
|
||||
redis_defrag_misses: Number of aborted value reallocations started by the active defragmentation process.
|
||||
redis_defrag_key_hits: Number of keys that were actively defragmented.
|
||||
redis_defrag_key_misses: Number of keys that were skipped by the active defragmentation process.
|
||||
redis_evicted_keys_total: Number of evicted keys due to maxmemory limit.(Counter)
|
||||
redis_expired_keys_total: Total number of key expiration events.(Counter)
|
||||
redis_expired_stale_percentage: The percentage of keys probably expired.
|
||||
redis_expired_time_cap_reached_total: The count of times that active expiry cycles have stopped early.
|
||||
redis_exporter_last_scrape_connect_time_seconds: The duration(in seconds) to connect when scrape.
|
||||
redis_exporter_last_scrape_duration_seconds: The last scrape duration.
|
||||
redis_exporter_last_scrape_error: The last scrape error status.
|
||||
redis_exporter_scrape_duration_seconds_count: Durations of scrapes by the exporter
|
||||
redis_exporter_scrape_duration_seconds_sum: Durations of scrapes by the exporter
|
||||
redis_exporter_scrapes_total: Current total redis scrapes.(Counter)
|
||||
redis_instance_info: Information about the Redis instance.
|
||||
redis_keyspace_hits_total: Hits total.(Counter)
|
||||
redis_keyspace_misses_total: Misses total.(Counter)
|
||||
redis_last_key_groups_scrape_duration_milliseconds: Duration of the last key group metrics scrape in milliseconds.
|
||||
redis_last_slow_execution_duration_seconds: The amount of time needed for last slow execution, in seconds.
|
||||
redis_latest_fork_seconds: The amount of time needed for last fork, in seconds.
|
||||
redis_lazyfree_pending_objects: The number of objects waiting to be freed (as a result of calling UNLINK, or FLUSHDB and FLUSHALL with the ASYNC option).
|
||||
redis_master_repl_offset: The server's current replication offset.
|
||||
redis_mem_clients_normal: Memory used by normal clients.(Gauge)
|
||||
redis_mem_clients_slaves: Memory used by replica clients - Starting Redis 7.0, replica buffers share memory with the replication backlog, so this field can show 0 when replicas don't trigger an increase of memory usage.
|
||||
redis_mem_fragmentation_bytes: Delta between used_memory_rss and used_memory. Note that when the total fragmentation bytes is low (few megabytes), a high ratio (e.g. 1.5 and above) is not an indication of an issue.
|
||||
redis_mem_fragmentation_ratio: Ratio between used_memory_rss and used_memory. Note that this doesn't only includes fragmentation, but also other process overheads (see the allocator_* metrics), and also overheads like code, shared libraries, stack, etc.
|
||||
redis_mem_not_counted_for_eviction_bytes: (Gauge)
|
||||
redis_memory_max_bytes: Max memory limit in bytes.
|
||||
redis_memory_used_bytes: Total number of bytes allocated by Redis using its allocator (either standard libc, jemalloc, or an alternative allocator such as tcmalloc)
|
||||
redis_memory_used_dataset_bytes: The size in bytes of the dataset (used_memory_overhead subtracted from used_memory)
|
||||
redis_memory_used_lua_bytes: Number of bytes used by the Lua engine.
|
||||
redis_memory_used_overhead_bytes: The sum in bytes of all overheads that the server allocated for managing its internal data structures.
|
||||
redis_memory_used_peak_bytes: Peak memory consumed by Redis (in bytes)
|
||||
redis_memory_used_rss_bytes: Number of bytes that Redis allocated as seen by the operating system (a.k.a resident set size). This is the number reported by tools such as top(1) and ps(1)
|
||||
redis_memory_used_scripts_bytes: Number of bytes used by cached Lua scripts
|
||||
redis_memory_used_startup_bytes: Initial amount of memory consumed by Redis at startup in bytes
|
||||
redis_migrate_cached_sockets_total: The number of sockets open for MIGRATE purposes
|
||||
redis_net_input_bytes_total: Total input bytes(Counter)
|
||||
redis_net_output_bytes_total: Total output bytes(Counter)
|
||||
redis_process_id: Process ID
|
||||
redis_pubsub_channels: Global number of pub/sub channels with client subscriptions
|
||||
redis_pubsub_patterns: Global number of pub/sub pattern with client subscriptions
|
||||
redis_rdb_bgsave_in_progress: Flag indicating a RDB save is on-going
|
||||
redis_rdb_changes_since_last_save: Number of changes since the last dump
|
||||
redis_rdb_current_bgsave_duration_sec: Duration of the on-going RDB save operation if any
|
||||
redis_rdb_last_bgsave_duration_sec: Duration of the last RDB save operation in seconds
|
||||
redis_rdb_last_bgsave_status: Status of the last RDB save operation
|
||||
redis_rdb_last_cow_size_bytes: The size in bytes of copy-on-write memory during the last RDB save operation
|
||||
redis_rdb_last_save_timestamp_seconds: Epoch-based timestamp of last successful RDB save
|
||||
redis_rejected_connections_total: Number of connections rejected because of maxclients limit(Counter)
|
||||
redis_repl_backlog_first_byte_offset: The master offset of the replication backlog buffer
|
||||
redis_repl_backlog_history_bytes: Size in bytes of the data in the replication backlog buffer
|
||||
redis_repl_backlog_is_active: Flag indicating replication backlog is active
|
||||
redis_replica_partial_resync_accepted: The number of accepted partial resync requests(Gauge)
|
||||
redis_replica_partial_resync_denied: The number of denied partial resync requests(Gauge)
|
||||
redis_replica_resyncs_full: The number of full resyncs with replicas
|
||||
redis_replication_backlog_bytes: Memory used by replication backlog
|
||||
redis_second_repl_offset: The offset up to which replication IDs are accepted.
|
||||
redis_slave_expires_tracked_keys: The number of keys tracked for expiry purposes (applicable only to writable replicas)(Gauge)
|
||||
redis_slowlog_last_id: Last id of slowlog
|
||||
redis_slowlog_length: Total slowlog
|
||||
redis_start_time_seconds: Start time of the Redis instance since unix epoch in seconds.
|
||||
redis_target_scrape_request_errors_total: Errors in requests to the exporter
|
||||
redis_up: Flag indicating redis instance is up
|
||||
redis_uptime_in_seconds: Number of seconds since Redis server start
|
||||
|
||||
# [windows_exporter]
|
||||
windows_cpu_clock_interrupts_total: Total number of received and serviced clock tick interrupts(counter)
|
||||
windows_cpu_core_frequency_mhz: Core frequency in megahertz(gauge)
|
||||
windows_cpu_cstate_seconds_total: Time spent in low-power idle state(counter)
|
||||
windows_cpu_dpcs_total: Total number of received and serviced deferred procedure calls (DPCs)(counter)
|
||||
windows_cpu_idle_break_events_total: Total number of time processor was woken from idle(counter)
|
||||
windows_cpu_interrupts_total: Total number of received and serviced hardware interrupts(counter)
|
||||
windows_cpu_parking_status: Parking Status represents whether a processor is parked or not(gauge)
|
||||
windows_cpu_processor_performance: Processor Performance is the average performance of the processor while it is executing instructions, as a percentage of the nominal performance of the processor. On some processors, Processor Performance may exceed 100%(gauge)
|
||||
windows_cpu_time_total: Time that processor spent in different modes (idle, user, system, ...)(counter)
|
||||
windows_cs_hostname: Labeled system hostname information as provided by ComputerSystem.DNSHostName and ComputerSystem.Domain(gauge)
|
||||
windows_cs_logical_processors: ComputerSystem.NumberOfLogicalProcessors(gauge)
|
||||
windows_cs_physical_memory_bytes: ComputerSystem.TotalPhysicalMemory(gauge)
|
||||
windows_exporter_build_info: A metric with a constant '1' value labeled by version, revision, branch, and goversion from which windows_exporter was built.(gauge)
|
||||
windows_exporter_collector_duration_seconds: Duration of a collection.(gauge)
|
||||
windows_exporter_collector_success: Whether the collector was successful.(gauge)
|
||||
windows_exporter_collector_timeout: Whether the collector timed out.(gauge)
|
||||
windows_exporter_perflib_snapshot_duration_seconds: Duration of perflib snapshot capture(gauge)
|
||||
windows_logical_disk_free_bytes: Free space in bytes (LogicalDisk.PercentFreeSpace)(gauge)
|
||||
windows_logical_disk_idle_seconds_total: Seconds that the disk was idle (LogicalDisk.PercentIdleTime)(counter)
|
||||
windows_logical_disk_read_bytes_total: The number of bytes transferred from the disk during read operations (LogicalDisk.DiskReadBytesPerSec)(counter)
|
||||
windows_logical_disk_read_latency_seconds_total: Shows the average time, in seconds, of a read operation from the disk (LogicalDisk.AvgDiskSecPerRead)(counter)
|
||||
windows_logical_disk_read_seconds_total: Seconds that the disk was busy servicing read requests (LogicalDisk.PercentDiskReadTime)(counter)
|
||||
windows_logical_disk_read_write_latency_seconds_total: Shows the time, in seconds, of the average disk transfer (LogicalDisk.AvgDiskSecPerTransfer)(counter)
|
||||
windows_logical_disk_reads_total: The number of read operations on the disk (LogicalDisk.DiskReadsPerSec)(counter)
|
||||
windows_logical_disk_requests_queued: The number of requests queued to the disk (LogicalDisk.CurrentDiskQueueLength)(gauge)
|
||||
windows_logical_disk_size_bytes: Total space in bytes (LogicalDisk.PercentFreeSpace_Base)(gauge)
|
||||
windows_logical_disk_split_ios_total: The number of I/Os to the disk were split into multiple I/Os (LogicalDisk.SplitIOPerSec)(counter)
|
||||
windows_logical_disk_write_bytes_total: The number of bytes transferred to the disk during write operations (LogicalDisk.DiskWriteBytesPerSec)(counter)
|
||||
windows_logical_disk_write_latency_seconds_total: Shows the average time, in seconds, of a write operation to the disk (LogicalDisk.AvgDiskSecPerWrite)(counter)
|
||||
windows_logical_disk_write_seconds_total: Seconds that the disk was busy servicing write requests (LogicalDisk.PercentDiskWriteTime)(counter)
|
||||
windows_logical_disk_writes_total: The number of write operations on the disk (LogicalDisk.DiskWritesPerSec)(counter)
|
||||
windows_net_bytes_received_total: (Network.BytesReceivedPerSec)(counter)
|
||||
windows_net_bytes_sent_total: (Network.BytesSentPerSec)(counter)
|
||||
windows_net_bytes_total: (Network.BytesTotalPerSec)(counter)
|
||||
windows_net_current_bandwidth: (Network.CurrentBandwidth)(gauge)
|
||||
windows_net_packets_outbound_discarded_total: (Network.PacketsOutboundDiscarded)(counter)
|
||||
windows_net_packets_outbound_errors_total: (Network.PacketsOutboundErrors)(counter)
|
||||
windows_net_packets_received_discarded_total: (Network.PacketsReceivedDiscarded)(counter)
|
||||
windows_net_packets_received_errors_total: (Network.PacketsReceivedErrors)(counter)
|
||||
windows_net_packets_received_total: (Network.PacketsReceivedPerSec)(counter)
|
||||
windows_net_packets_received_unknown_total: (Network.PacketsReceivedUnknown)(counter)
|
||||
windows_net_packets_sent_total: (Network.PacketsSentPerSec)(counter)
|
||||
windows_net_packets_total: (Network.PacketsPerSec)(counter)
|
||||
windows_os_info: OperatingSystem.Caption, OperatingSystem.Version(gauge)
|
||||
windows_os_paging_free_bytes: OperatingSystem.FreeSpaceInPagingFiles(gauge)
|
||||
windows_os_paging_limit_bytes: OperatingSystem.SizeStoredInPagingFiles(gauge)
|
||||
windows_os_physical_memory_free_bytes: OperatingSystem.FreePhysicalMemory(gauge)
|
||||
windows_os_process_memory_limix_bytes: OperatingSystem.MaxProcessMemorySize(gauge)
|
||||
windows_os_processes: OperatingSystem.NumberOfProcesses(gauge)
|
||||
windows_os_processes_limit: OperatingSystem.MaxNumberOfProcesses(gauge)
|
||||
windows_os_time: OperatingSystem.LocalDateTime(gauge)
|
||||
windows_os_timezone: OperatingSystem.LocalDateTime(gauge)
|
||||
windows_os_users: OperatingSystem.NumberOfUsers(gauge)
|
||||
windows_os_virtual_memory_bytes: OperatingSystem.TotalVirtualMemorySize(gauge)
|
||||
windows_os_virtual_memory_free_bytes: OperatingSystem.FreeVirtualMemory(gauge)
|
||||
windows_os_visible_memory_bytes: OperatingSystem.TotalVisibleMemorySize(gauge)
|
||||
windows_service_info: A metric with a constant '1' value labeled with service information(gauge)
|
||||
windows_service_start_mode: The start mode of the service (StartMode)(gauge)
|
||||
windows_service_state: The state of the service (State)(gauge)
|
||||
windows_service_status: The status of the service (Status)(gauge)
|
||||
windows_system_context_switches_total: Total number of context switches (WMI source is PerfOS_System.ContextSwitchesPersec)(counter)
|
||||
windows_system_exception_dispatches_total: Total number of exceptions dispatched (WMI source is PerfOS_System.ExceptionDispatchesPersec)(counter)
|
||||
windows_system_processor_queue_length: Length of processor queue (WMI source is PerfOS_System.ProcessorQueueLength)(gauge)
|
||||
windows_system_system_calls_total: Total number of system calls (WMI source is PerfOS_System.SystemCallsPersec)(counter)
|
||||
windows_system_system_up_time: System boot time (WMI source is PerfOS_System.SystemUpTime)(gauge)
|
||||
windows_system_threads: Current number of threads (WMI source is PerfOS_System.Threads)(gauge)
|
||||
|
||||
# [node_exporter]
|
||||
# SYSTEM
|
||||
# CPU context switch 次数
|
||||
node_context_switches_total: context_switches
|
||||
# Interrupts 次数
|
||||
node_intr_total: Interrupts
|
||||
# 运行的进程数
|
||||
node_procs_running: Processes in runnable state
|
||||
# 熵池大小
|
||||
node_entropy_available_bits: Entropy available to random number generators
|
||||
node_time_seconds: System time in seconds since epoch (1970)
|
||||
node_boot_time_seconds: Node boot time, in unixtime
|
||||
# CPU
|
||||
node_cpu_seconds_total: Seconds the CPUs spent in each mode
|
||||
node_load1: cpu load 1m
|
||||
node_load5: cpu load 5m
|
||||
node_load15: cpu load 15m
|
||||
|
||||
# MEM
|
||||
# 内核态
|
||||
# 内核用于缓存数据结构供自己使用的内存
|
||||
node_memory_Slab_bytes: Memory used by the kernel to cache data structures for its own use
|
||||
# slab中可回收的部分
|
||||
node_memory_SReclaimable_bytes: SReclaimable - Part of Slab, that might be reclaimed, such as caches
|
||||
# slab中不可回收的部分
|
||||
node_memory_SUnreclaim_bytes: Part of Slab, that cannot be reclaimed on memory pressure
|
||||
# Vmalloc内存区的大小
|
||||
node_memory_VmallocTotal_bytes: Total size of vmalloc memory area
|
||||
# vmalloc已分配的内存,虚拟地址空间上的连续的内存
|
||||
node_memory_VmallocUsed_bytes: Amount of vmalloc area which is used
|
||||
# vmalloc区可用的连续最大快的大小,通过此指标可以知道vmalloc可分配连续内存的最大值
|
||||
node_memory_VmallocChunk_bytes: Largest contigious block of vmalloc area which is free
|
||||
# 内存的硬件故障删除掉的内存页的总大小
|
||||
node_memory_HardwareCorrupted_bytes: Amount of RAM that the kernel identified as corrupted / not working
|
||||
# 用于在虚拟和物理内存地址之间映射的内存
|
||||
node_memory_PageTables_bytes: Memory used to map between virtual and physical memory addresses (gauge)
|
||||
# 内核栈内存,常驻内存,不可回收
|
||||
node_memory_KernelStack_bytes: Kernel memory stack. This is not reclaimable
|
||||
# 用来访问高端内存,复制高端内存的临时buffer,称为“bounce buffering”,会降低I/O 性能
|
||||
node_memory_Bounce_bytes: Memory used for block device bounce buffers
|
||||
#用户态
|
||||
# 单个巨页大小
|
||||
node_memory_Hugepagesize_bytes: Huge Page size
|
||||
# 系统分配的常驻巨页数
|
||||
node_memory_HugePages_Total: Total size of the pool of huge pages
|
||||
# 系统空闲的巨页数
|
||||
node_memory_HugePages_Free: Huge pages in the pool that are not yet allocated
|
||||
# 进程已申请但未使用的巨页数
|
||||
node_memory_HugePages_Rsvd: Huge pages for which a commitment to allocate from the pool has been made, but no allocation
|
||||
# 超过系统设定的常驻HugePages数量的个数
|
||||
node_memory_HugePages_Surp: Huge pages in the pool above the value in /proc/sys/vm/nr_hugepages
|
||||
# 透明巨页 Transparent HugePages (THP)
|
||||
node_memory_AnonHugePages_bytes: Memory in anonymous huge pages
|
||||
# inactivelist中的File-backed内存
|
||||
node_memory_Inactive_file_bytes: File-backed memory on inactive LRU list
|
||||
# inactivelist中的Anonymous内存
|
||||
node_memory_Inactive_anon_bytes: Anonymous and swap cache on inactive LRU list, including tmpfs (shmem)
|
||||
# activelist中的File-backed内存
|
||||
node_memory_Active_file_bytes: File-backed memory on active LRU list
|
||||
# activelist中的Anonymous内存
|
||||
node_memory_Active_anon_bytes: Anonymous and swap cache on active least-recently-used (LRU) list, including tmpfs
|
||||
# 禁止换出的页,对应 Unevictable 链表
|
||||
node_memory_Unevictable_bytes: Amount of unevictable memory that can't be swapped out for a variety of reasons
|
||||
# 共享内存
|
||||
node_memory_Shmem_bytes: Used shared memory (shared between several processes, thus including RAM disks)
|
||||
# 匿名页内存大小
|
||||
node_memory_AnonPages_bytes: Memory in user pages not backed by files
|
||||
# 被关联的内存页大小
|
||||
node_memory_Mapped_bytes: Used memory in mapped pages files which have been mmaped, such as libraries
|
||||
# file-backed内存页缓存大小
|
||||
node_memory_Cached_bytes: Parked file data (file content) cache
|
||||
# 系统中有多少匿名页曾经被swap-out、现在又被swap-in并且swap-in之后页面中的内容一直没发生变化
|
||||
node_memory_SwapCached_bytes: Memory that keeps track of pages that have been fetched from swap but not yet been modified
|
||||
# 被mlock()系统调用锁定的内存大小
|
||||
node_memory_Mlocked_bytes: Size of pages locked to memory using the mlock() system call
|
||||
# 块设备(block device)所占用的缓存页
|
||||
node_memory_Buffers_bytes: Block device (e.g. harddisk) cache
|
||||
node_memory_SwapTotal_bytes: Memory information field SwapTotal_bytes
|
||||
node_memory_SwapFree_bytes: Memory information field SwapFree_bytes
|
||||
|
||||
# DISK
|
||||
node_filesystem_avail_bytes: Filesystem space available to non-root users in byte
|
||||
node_filesystem_free_bytes: Filesystem free space in bytes
|
||||
node_filesystem_size_bytes: Filesystem size in bytes
|
||||
node_filesystem_files_free: Filesystem total free file nodes
|
||||
node_filesystem_files: Filesystem total free file nodes
|
||||
node_filefd_maximum: Max open files
|
||||
node_filefd_allocated: Open files
|
||||
node_filesystem_readonly: Filesystem read-only status
|
||||
node_filesystem_device_error: Whether an error occurred while getting statistics for the given device
|
||||
node_disk_reads_completed_total: The total number of reads completed successfully
|
||||
node_disk_writes_completed_total: The total number of writes completed successfully
|
||||
node_disk_reads_merged_total: The number of reads merged
|
||||
node_disk_writes_merged_total: The number of writes merged
|
||||
node_disk_read_bytes_total: The total number of bytes read successfully
|
||||
node_disk_written_bytes_total: The total number of bytes written successfully
|
||||
node_disk_io_time_seconds_total: Total seconds spent doing I/Os
|
||||
node_disk_read_time_seconds_total: The total number of seconds spent by all reads
|
||||
node_disk_write_time_seconds_total: The total number of seconds spent by all writes
|
||||
node_disk_io_time_weighted_seconds_total: The weighted of seconds spent doing I/Os
|
||||
|
||||
# NET
|
||||
node_network_receive_bytes_total: Network device statistic receive_bytes (counter)
|
||||
node_network_transmit_bytes_total: Network device statistic transmit_bytes (counter)
|
||||
node_network_receive_packets_total: Network device statistic receive_bytes
|
||||
node_network_transmit_packets_total: Network device statistic transmit_bytes
|
||||
node_network_receive_errs_total: Network device statistic receive_errs
|
||||
node_network_transmit_errs_total: Network device statistic transmit_errs
|
||||
node_network_receive_drop_total: Network device statistic receive_drop
|
||||
node_network_transmit_drop_total: Network device statistic transmit_drop
|
||||
node_nf_conntrack_entries: Number of currently allocated flow entries for connection tracking
|
||||
node_sockstat_TCP_alloc: Number of TCP sockets in state alloc
|
||||
node_sockstat_TCP_inuse: Number of TCP sockets in state inuse
|
||||
node_sockstat_TCP_orphan: Number of TCP sockets in state orphan
|
||||
node_sockstat_TCP_tw: Number of TCP sockets in state tw
|
||||
node_netstat_Tcp_CurrEstab: Statistic TcpCurrEstab
|
||||
node_sockstat_sockets_used: Number of IPv4 sockets in use
|
||||
|
||||
# [kafka_exporter]
|
||||
kafka_brokers: count of kafka_brokers (gauge)
|
||||
kafka_topic_partitions: Number of partitions for this Topic (gauge)
|
||||
kafka_topic_partition_current_offset: Current Offset of a Broker at Topic/Partition (gauge)
|
||||
kafka_consumergroup_current_offset: Current Offset of a ConsumerGroup at Topic/Partition (gauge)
|
||||
kafka_consumer_lag_millis: Current approximation of consumer lag for a ConsumerGroup at Topic/Partition (gauge)
|
||||
kafka_topic_partition_under_replicated_partition: 1 if Topic/Partition is under Replicated
|
||||
|
||||
# [zookeeper_exporter]
|
||||
zk_znode_count: The total count of znodes stored
|
||||
zk_ephemerals_count: The number of Ephemerals nodes
|
||||
zk_watch_count: The number of watchers setup over Zookeeper nodes.
|
||||
zk_approximate_data_size: Size of data in bytes that a zookeeper server has in its data tree
|
||||
zk_outstanding_requests: Number of currently executing requests
|
||||
zk_packets_sent: Count of the number of zookeeper packets sent from a server
|
||||
zk_packets_received: Count of the number of zookeeper packets received by a server
|
||||
zk_num_alive_connections: Number of active clients connected to a zookeeper server
|
||||
zk_open_file_descriptor_count: Number of file descriptors that a zookeeper server has open
|
||||
zk_max_file_descriptor_count: Maximum number of file descriptors that a zookeeper server can open
|
||||
zk_avg_latency: Average time in milliseconds for requests to be processed
|
||||
zk_min_latency: Maximum time in milliseconds for a request to be processed
|
||||
zk_max_latency: Minimum time in milliseconds for a request to be processed
|
||||
@@ -0,0 +1,216 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: UTF-8 -*-
|
||||
import sys
|
||||
import json
|
||||
import urllib2
|
||||
import smtplib
|
||||
from email.mime.text import MIMEText
|
||||
|
||||
reload(sys)
|
||||
sys.setdefaultencoding('utf8')
|
||||
|
||||
notify_channel_funcs = {
|
||||
"email":"email",
|
||||
"sms":"sms",
|
||||
"voice":"voice",
|
||||
"dingtalk":"dingtalk",
|
||||
"wecom":"wecom",
|
||||
"feishu":"feishu"
|
||||
}
|
||||
|
||||
mail_host = "smtp.163.com"
|
||||
mail_port = 994
|
||||
mail_user = "ulricqin"
|
||||
mail_pass = "password"
|
||||
mail_from = "ulricqin@163.com"
|
||||
|
||||
class Sender(object):
|
||||
@classmethod
|
||||
def send_email(cls, payload):
|
||||
if mail_user == "ulricqin" and mail_pass == "password":
|
||||
print("invalid smtp configuration")
|
||||
return
|
||||
|
||||
users = payload.get('event').get("notify_users_obj")
|
||||
|
||||
emails = {}
|
||||
for u in users:
|
||||
if u.get("email"):
|
||||
emails[u.get("email")] = 1
|
||||
|
||||
if not emails:
|
||||
return
|
||||
|
||||
recipients = emails.keys()
|
||||
mail_body = payload.get('tpls').get("email.tpl", "email.tpl not found")
|
||||
message = MIMEText(mail_body, 'html', 'utf-8')
|
||||
message['From'] = mail_from
|
||||
message['To'] = ", ".join(recipients)
|
||||
message["Subject"] = payload.get('tpls').get("subject.tpl", "subject.tpl not found")
|
||||
|
||||
try:
|
||||
smtp = smtplib.SMTP_SSL(mail_host, mail_port)
|
||||
smtp.login(mail_user, mail_pass)
|
||||
smtp.sendmail(mail_from, recipients, message.as_string())
|
||||
smtp.close()
|
||||
except smtplib.SMTPException, error:
|
||||
print(error)
|
||||
|
||||
@classmethod
|
||||
def send_wecom(cls, payload):
|
||||
users = payload.get('event').get("notify_users_obj")
|
||||
|
||||
tokens = {}
|
||||
|
||||
for u in users:
|
||||
contacts = u.get("contacts")
|
||||
if contacts.get("wecom_robot_token", ""):
|
||||
tokens[contacts.get("wecom_robot_token", "")] = 1
|
||||
|
||||
opener = urllib2.build_opener(urllib2.HTTPHandler())
|
||||
method = "POST"
|
||||
|
||||
for t in tokens:
|
||||
url = "https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key={}".format(t)
|
||||
body = {
|
||||
"msgtype": "markdown",
|
||||
"markdown": {
|
||||
"content": payload.get('tpls').get("wecom.tpl", "wecom.tpl not found")
|
||||
}
|
||||
}
|
||||
request = urllib2.Request(url, data=json.dumps(body))
|
||||
request.add_header("Content-Type",'application/json;charset=utf-8')
|
||||
request.get_method = lambda: method
|
||||
try:
|
||||
connection = opener.open(request)
|
||||
print(connection.read())
|
||||
except urllib2.HTTPError, error:
|
||||
print(error)
|
||||
|
||||
@classmethod
|
||||
def send_dingtalk(cls, payload):
|
||||
event = payload.get('event')
|
||||
users = event.get("notify_users_obj")
|
||||
|
||||
rule_name = event.get("rule_name")
|
||||
event_state = "Triggered"
|
||||
if event.get("is_recovered"):
|
||||
event_state = "Recovered"
|
||||
|
||||
tokens = {}
|
||||
phones = {}
|
||||
|
||||
for u in users:
|
||||
if u.get("phone"):
|
||||
phones[u.get("phone")] = 1
|
||||
|
||||
contacts = u.get("contacts")
|
||||
if contacts.get("dingtalk_robot_token", ""):
|
||||
tokens[contacts.get("dingtalk_robot_token", "")] = 1
|
||||
|
||||
opener = urllib2.build_opener(urllib2.HTTPHandler())
|
||||
method = "POST"
|
||||
|
||||
for t in tokens:
|
||||
url = "https://oapi.dingtalk.com/robot/send?access_token={}".format(t)
|
||||
body = {
|
||||
"msgtype": "markdown",
|
||||
"markdown": {
|
||||
"title": "{} - {}".format(event_state, rule_name),
|
||||
"text": payload.get('tpls').get("dingtalk.tpl", "dingtalk.tpl not found") + ' '.join(["@"+i for i in phones.keys()])
|
||||
},
|
||||
"at": {
|
||||
"atMobiles": phones.keys(),
|
||||
"isAtAll": False
|
||||
}
|
||||
}
|
||||
request = urllib2.Request(url, data=json.dumps(body))
|
||||
request.add_header("Content-Type",'application/json;charset=utf-8')
|
||||
request.get_method = lambda: method
|
||||
try:
|
||||
connection = opener.open(request)
|
||||
print(connection.read())
|
||||
except urllib2.HTTPError, error:
|
||||
print(error)
|
||||
|
||||
@classmethod
|
||||
def send_feishu(cls, payload):
|
||||
users = payload.get('event').get("notify_users_obj")
|
||||
|
||||
tokens = {}
|
||||
phones = {}
|
||||
|
||||
for u in users:
|
||||
if u.get("phone"):
|
||||
phones[u.get("phone")] = 1
|
||||
|
||||
contacts = u.get("contacts")
|
||||
if contacts.get("feishu_robot_token", ""):
|
||||
tokens[contacts.get("feishu_robot_token", "")] = 1
|
||||
|
||||
opener = urllib2.build_opener(urllib2.HTTPHandler())
|
||||
method = "POST"
|
||||
|
||||
for t in tokens:
|
||||
url = "https://open.feishu.cn/open-apis/bot/v2/hook/{}".format(t)
|
||||
body = {
|
||||
"msg_type": "text",
|
||||
"content": {
|
||||
"text": payload.get('tpls').get("feishu.tpl", "feishu.tpl not found")
|
||||
},
|
||||
"at": {
|
||||
"atMobiles": phones.keys(),
|
||||
"isAtAll": False
|
||||
}
|
||||
}
|
||||
request = urllib2.Request(url, data=json.dumps(body))
|
||||
request.add_header("Content-Type",'application/json;charset=utf-8')
|
||||
request.get_method = lambda: method
|
||||
try:
|
||||
connection = opener.open(request)
|
||||
print(connection.read())
|
||||
except urllib2.HTTPError, error:
|
||||
print(error)
|
||||
|
||||
@classmethod
|
||||
def send_sms(cls, payload):
|
||||
users = payload.get('event').get("notify_users_obj")
|
||||
phones = {}
|
||||
for u in users:
|
||||
if u.get("phone"):
|
||||
phones[u.get("phone")] = 1
|
||||
if phones:
|
||||
print("send_sms not implemented, phones: {}".format(phones.keys()))
|
||||
|
||||
@classmethod
|
||||
def send_voice(cls, payload):
|
||||
users = payload.get('event').get("notify_users_obj")
|
||||
phones = {}
|
||||
for u in users:
|
||||
if u.get("phone"):
|
||||
phones[u.get("phone")] = 1
|
||||
if phones:
|
||||
print("send_voice not implemented, phones: {}".format(phones.keys()))
|
||||
|
||||
def main():
|
||||
payload = json.load(sys.stdin)
|
||||
with open(".payload", 'w') as f:
|
||||
f.write(json.dumps(payload, indent=4))
|
||||
for ch in payload.get('event').get('notify_channels'):
|
||||
send_func_name = "send_{}".format(notify_channel_funcs.get(ch.strip()))
|
||||
if not hasattr(Sender, send_func_name):
|
||||
print("function: {} not found", send_func_name)
|
||||
continue
|
||||
send_func = getattr(Sender, send_func_name)
|
||||
send_func(payload)
|
||||
|
||||
def hello():
|
||||
print("hello nightingale")
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) == 1:
|
||||
main()
|
||||
elif sys.argv[1] == "hello":
|
||||
hello()
|
||||
else:
|
||||
print("I am confused")
|
||||
@@ -0,0 +1,73 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: UTF-8 -*-
|
||||
import sys
|
||||
import json
|
||||
|
||||
class Sender(object):
|
||||
@classmethod
|
||||
def send_email(cls, payload):
|
||||
# already done in go code
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def send_wecom(cls, payload):
|
||||
# already done in go code
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def send_dingtalk(cls, payload):
|
||||
# already done in go code
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def send_feishu(cls, payload):
|
||||
# already done in go code
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def send_mm(cls, payload):
|
||||
# already done in go code
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def send_sms(cls, payload):
|
||||
users = payload.get('event').get("notify_users_obj")
|
||||
phones = {}
|
||||
for u in users:
|
||||
if u.get("phone"):
|
||||
phones[u.get("phone")] = 1
|
||||
if phones:
|
||||
print("send_sms not implemented, phones: {}".format(phones.keys()))
|
||||
|
||||
@classmethod
|
||||
def send_voice(cls, payload):
|
||||
users = payload.get('event').get("notify_users_obj")
|
||||
phones = {}
|
||||
for u in users:
|
||||
if u.get("phone"):
|
||||
phones[u.get("phone")] = 1
|
||||
if phones:
|
||||
print("send_voice not implemented, phones: {}".format(phones.keys()))
|
||||
|
||||
def main():
|
||||
payload = json.load(sys.stdin)
|
||||
with open(".payload", 'w') as f:
|
||||
f.write(json.dumps(payload, indent=4))
|
||||
for ch in payload.get('event').get('notify_channels'):
|
||||
send_func_name = "send_{}".format(ch.strip())
|
||||
if not hasattr(Sender, send_func_name):
|
||||
print("function: {} not found", send_func_name)
|
||||
continue
|
||||
send_func = getattr(Sender, send_func_name)
|
||||
send_func(payload)
|
||||
|
||||
def hello():
|
||||
print("hello nightingale")
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) == 1:
|
||||
main()
|
||||
elif sys.argv[1] == "hello":
|
||||
hello()
|
||||
else:
|
||||
print("I am confused")
|
||||
@@ -0,0 +1,92 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: UTF-8 -*-
|
||||
import sys
|
||||
import json
|
||||
import requests
|
||||
|
||||
class Sender(object):
|
||||
@classmethod
|
||||
def send_email(cls, payload):
|
||||
# already done in go code
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def send_wecom(cls, payload):
|
||||
# already done in go code
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def send_dingtalk(cls, payload):
|
||||
# already done in go code
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def send_ifeishu(cls, payload):
|
||||
users = payload.get('event').get("notify_users_obj")
|
||||
tokens = {}
|
||||
phones = {}
|
||||
|
||||
for u in users:
|
||||
if u.get("phone"):
|
||||
phones[u.get("phone")] = 1
|
||||
|
||||
contacts = u.get("contacts")
|
||||
if contacts.get("feishu_robot_token", ""):
|
||||
tokens[contacts.get("feishu_robot_token", "")] = 1
|
||||
|
||||
headers = {
|
||||
"Content-Type": "application/json;charset=utf-8",
|
||||
"Host": "open.feishu.cn"
|
||||
}
|
||||
|
||||
for t in tokens:
|
||||
url = "https://open.feishu.cn/open-apis/bot/v2/hook/{}".format(t)
|
||||
body = {
|
||||
"msg_type": "text",
|
||||
"content": {
|
||||
"text": payload.get('tpls').get("feishu", "feishu not found")
|
||||
},
|
||||
"at": {
|
||||
"atMobiles": list(phones.keys()),
|
||||
"isAtAll": False
|
||||
}
|
||||
}
|
||||
|
||||
response = requests.post(url, headers=headers, data=json.dumps(body))
|
||||
print(f"notify_ifeishu: token={t} status_code={response.status_code} response_text={response.text}")
|
||||
|
||||
@classmethod
|
||||
def send_mm(cls, payload):
|
||||
# already done in go code
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def send_sms(cls, payload):
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def send_voice(cls, payload):
|
||||
pass
|
||||
|
||||
def main():
|
||||
payload = json.load(sys.stdin)
|
||||
with open(".payload", 'w') as f:
|
||||
f.write(json.dumps(payload, indent=4))
|
||||
for ch in payload.get('event').get('notify_channels'):
|
||||
send_func_name = "send_{}".format(ch.strip())
|
||||
if not hasattr(Sender, send_func_name):
|
||||
print("function: {} not found", send_func_name)
|
||||
continue
|
||||
send_func = getattr(Sender, send_func_name)
|
||||
send_func(payload)
|
||||
|
||||
def hello():
|
||||
print("hello nightingale")
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) == 1:
|
||||
main()
|
||||
elif sys.argv[1] == "hello":
|
||||
hello()
|
||||
else:
|
||||
print("I am confused")
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user