mirror of
https://github.com/ccfos/nightingale.git
synced 2026-03-03 14:38:55 +00:00
Compare commits
15 Commits
makefile
...
notify_ref
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0841e62554 | ||
|
|
17c7361620 | ||
|
|
c45cbd02cc | ||
|
|
04cb501ab4 | ||
|
|
ba6f089c78 | ||
|
|
ab0cb6fc47 | ||
|
|
2847a315b1 | ||
|
|
65439df7fb | ||
|
|
b6436b09ce | ||
|
|
92354d5765 | ||
|
|
05651ad744 | ||
|
|
b7ff82d722 | ||
|
|
a285966560 | ||
|
|
538880b0e0 | ||
|
|
299270f74e |
10
Makefile
10
Makefile
@@ -2,10 +2,15 @@
|
||||
|
||||
NOW = $(shell date -u '+%Y%m%d%I%M%S')
|
||||
|
||||
RELEASE_VERSION = 5.9.6
|
||||
|
||||
APP = n9e
|
||||
SERVER_BIN = $(APP)
|
||||
ROOT:=$(shell pwd -P)
|
||||
GIT_COMMIT:=$(shell git --work-tree ${ROOT} rev-parse 'HEAD^{commit}')
|
||||
_GIT_VERSION:=$(shell git --work-tree ${ROOT} describe --tags --abbrev=14 "${GIT_COMMIT}^{commit}" 2>/dev/null)
|
||||
TAG=$(shell echo "${_GIT_VERSION}" | awk -F"-" '{print $$1}')
|
||||
RELEASE_VERSION:="$(TAG)-$(GIT_COMMIT)"
|
||||
|
||||
# RELEASE_ROOT = release
|
||||
# RELEASE_SERVER = release/${APP}
|
||||
# GIT_COUNT = $(shell git rev-list --all --count)
|
||||
@@ -17,6 +22,9 @@ all: build
|
||||
build:
|
||||
go build -ldflags "-w -s -X github.com/didi/nightingale/v5/src/pkg/version.VERSION=$(RELEASE_VERSION)" -o $(SERVER_BIN) ./src
|
||||
|
||||
build-linux:
|
||||
GOOS=linux GOARCH=amd64 go build -ldflags "-w -s -X github.com/didi/nightingale/v5/src/pkg/version.VERSION=$(RELEASE_VERSION)" -o $(SERVER_BIN) ./src
|
||||
|
||||
# start:
|
||||
# @go run -ldflags "-X main.VERSION=$(RELEASE_TAG)" ./cmd/${APP}/main.go web -c ./configs/config.toml -m ./configs/model.conf --menu ./configs/menu.yaml
|
||||
run_webapi:
|
||||
|
||||
118
README.md
118
README.md
@@ -1,66 +1,78 @@
|
||||
<img src="doc/img/ccf-n9e.png" width="240">
|
||||
<p align="center">
|
||||
<a href="https://github.com/ccfos/nightingale">
|
||||
<img src="doc/img/ccf-n9e.png" alt="nightingale - cloud native monitoring" width="240" /></a>
|
||||
<p align="center">夜莺是一款开源的云原生监控系统,采用 all-in-one 的设计,提供企业级的功能特性,开箱即用的产品体验。推荐升级您的 Prometheus + AlertManager + Grafana 组合方案到夜莺</p>
|
||||
</p>
|
||||
|
||||
<p align="center">
|
||||
<img alt="GitHub latest release" src="https://img.shields.io/github/v/release/ccfos/nightingale"/>
|
||||
<a href="https://n9e.github.io">
|
||||
<img alt="Docs" src="https://img.shields.io/badge/docs-get%20started-brightgreen"/></a>
|
||||
<a href="https://hub.docker.com/u/flashcatcloud">
|
||||
<img alt="Docker pulls" src="https://img.shields.io/docker/pulls/flashcatcloud/nightingale"/></a>
|
||||
<img alt="GitHub Repo stars" src="https://img.shields.io/github/stars/ccfos/nightingale">
|
||||
<img alt="GitHub forks" src="https://img.shields.io/github/forks/ccfos/nightingale">
|
||||
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
|
||||
<img alt="GitHub contributors" src="https://img.shields.io/github/contributors-anon/ccfos/nightingale"/></a>
|
||||
<img alt="License" src="https://img.shields.io/badge/license-Apache--2.0-blue"/>
|
||||
</p>
|
||||
|
||||
[English](./README_EN.md) | [中文](./README.md)
|
||||
|
||||
> 夜莺是一款开源的云原生监控系统,采用 All-In-One 的设计,提供企业级的功能特性,开箱即用的产品体验。推荐升级您的 Prometheus + AlertManager + Grafana 组合方案到夜莺。
|
||||
## Highlighted Features
|
||||
|
||||
**夜莺监控具有以下特点:**
|
||||
- **开箱即用**
|
||||
- 支持 Docker、Helm Chart 等多种部署方式,内置多种监控大盘、快捷视图、告警规则模板,导入即可快速使用,活跃、专业的社区用户也在持续迭代和沉淀更多的最佳实践于产品中;
|
||||
- **兼容并包**
|
||||
- 支持 [Categraf](https://github.com/flashcatcloud/categraf)、Telegraf、Grafana-agent 等多种采集器,支持 Prometheus、VictoriaMetrics、M3DB 等各种时序数据库,支持对接 Grafana,与云原生生态无缝集成;
|
||||
- 集数据采集、可视化、监控告警、数据分析于一体,与云原生生态紧密集成,提供开箱即用的企业级监控分析和告警能力;
|
||||
- **开放社区**
|
||||
- 托管于[中国计算机学会开源发展委员会](https://www.ccf.org.cn/kyfzwyh/),有[快猫星云](https://flashcat.cloud)的持续投入,和数千名社区用户的积极参与,以及夜莺监控项目清晰明确的定位,都保证了夜莺开源社区健康、长久的发展;
|
||||
- **高性能**
|
||||
- 得益于夜莺的多数据源管理引擎,和夜莺引擎侧优秀的架构设计,借助于高性能时序库,可以满足数亿时间线的采集、存储、告警分析场景,节省大量成本;
|
||||
- **高可用**
|
||||
- 夜莺监控组件均可水平扩展,无单点,已在上千家企业部署落地,经受了严苛的生产实践检验。众多互联网头部公司,夜莺集群机器达百台,处理十亿级时间线,重度使用夜莺监控;
|
||||
- **灵活扩展**
|
||||
- 夜莺监控,可部署在1核1G的云主机,可在上百台机器部署集群,可运行在K8s中;也可将时序库、告警引擎等组件下沉到各机房、各region,兼顾边缘部署和中心化管理;
|
||||
|
||||
#### 1. 开箱即用
|
||||
支持 Docker、Helm Chart 等多种部署方式,内置多种监控大盘、快捷视图、告警规则模板,导入即可快速使用,活跃、专业的社区用户也在持续迭代和沉淀更多的最佳实践于产品中;
|
||||
|
||||
#### 2. 兼容并包
|
||||
支持 [Categraf](https://github.com/flashcatcloud/categraf)、Telegraf、Grafana-agent 等多种采集器,支持 Prometheus、VictoriaMetrics、M3DB 等各种时序数据库,支持对接 Grafana,与云原生生态无缝集成;
|
||||
|
||||
#### 3. 开放社区
|
||||
托管于[中国计算机学会开源发展委员会](https://www.ccf.org.cn/kyfzwyh/),有[快猫星云](https://flashcat.cloud)的持续投入,和数千名社区用户的积极参与,以及夜莺监控项目清晰明确的定位,都保证了夜莺开源社区健康、长久的发展;
|
||||
|
||||
#### 4. 高性能
|
||||
得益于夜莺的多数据源管理引擎,和夜莺引擎侧优秀的架构设计,借助于高性能时序库,可以满足数亿时间线的采集、存储、告警分析场景,节省大量成本;
|
||||
|
||||
#### 5. 高可用
|
||||
夜莺监控组件均可水平扩展,无单点,已在上千家企业部署落地,经受了严苛的生产实践检验。众多互联网头部公司,夜莺集群机器达百台,处理十亿级时间线,重度使用夜莺监控;
|
||||
|
||||
#### 6. 灵活扩展
|
||||
夜莺监控,可部署在1核1G的云主机,可在上百台机器部署集群,可运行在K8s中;也可将时序库、告警引擎等组件下沉到各机房、各region,兼顾边缘部署和中心化管理;
|
||||
|
||||
|
||||
#### 如果您在使用 Prometheus 过程中,有以下的一个或者多个需求场景,推荐您升级到夜莺:
|
||||
> 如果您在使用 Prometheus 过程中,有以下的一个或者多个需求场景,推荐您升级到夜莺:
|
||||
|
||||
- Prometheus、Alertmanager、Grafana 等多个系统较为割裂,缺乏统一视图,无法开箱即用;
|
||||
- 通过修改配置文件来管理 Prometheus、Alertmanager 的方式,学习曲线大,协同有难度;
|
||||
- 数据量过大而无法扩展您的 Prometheus 集群;
|
||||
- 生产环境运行多套 Prometheus 集群,面临管理和使用成本高的问题;
|
||||
|
||||
#### 如果您在使用 Zabbix,有以下的场景,推荐您升级到夜莺:
|
||||
> 如果您在使用 Zabbix,有以下的场景,推荐您升级到夜莺:
|
||||
|
||||
- 监控的数据量太大,希望有更好的扩展解决方案;
|
||||
- 学习曲线高,多人多团队模式下,希望有更好的协同使用效率;
|
||||
- 微服务和云原生架构下,监控数据的生命周期多变、监控数据维度基数高,Zabbix 数据模型不易适配;
|
||||
|
||||
#### 如果您在使用 [open-falcon](https://github.com/open-falcon/falcon-plus),我们更推荐您升级到夜莺:
|
||||
- 关于open-falcon和夜莺的详细介绍,请参考阅读[《云原生监控的十个特点和趋势》](https://mp.weixin.qq.com/s?__biz=MzkzNjI5OTM5Nw==&mid=2247483738&idx=1&sn=e8bdbb974a2cd003c1abcc2b5405dd18&chksm=c2a19fb0f5d616a63185cd79277a79a6b80118ef2185890d0683d2bb20451bd9303c78d083c5#rd)。
|
||||
> 如果您在使用 [Open-Falcon](https://github.com/open-falcon/falcon-plus),我们更推荐您升级到夜莺:
|
||||
|
||||
#### 我们推荐您使用 [Categraf](https://github.com/flashcatcloud/categraf) 作为首选的监控数据采集器:
|
||||
- Categraf 是夜莺监控的默认采集器,采用开放插件机制和 all-in-one 的设计,同时支持 metric、log、trace、event 的采集。Categraf 不仅可以采集 CPU、内存、网络等系统层面的指标,也集成了众多开源组件的采集能力,支持K8s生态。Categraf 内置了对应的仪表盘和告警规则,开箱即用。
|
||||
- 关于 Open-Falcon 和夜莺的详细介绍,请参考阅读[《云原生监控的十个特点和趋势》](https://mp.weixin.qq.com/s?__biz=MzkzNjI5OTM5Nw==&mid=2247483738&idx=1&sn=e8bdbb974a2cd003c1abcc2b5405dd18&chksm=c2a19fb0f5d616a63185cd79277a79a6b80118ef2185890d0683d2bb20451bd9303c78d083c5#rd)。
|
||||
|
||||
> 我们推荐您使用 [Categraf](https://github.com/flashcatcloud/categraf) 作为首选的监控数据采集器:
|
||||
|
||||
- [Categraf](https://github.com/flashcatcloud/categraf) 是夜莺监控的默认采集器,采用开放插件机制和 all-in-one 的设计,同时支持 metric、log、trace、event 的采集。Categraf 不仅可以采集 CPU、内存、网络等系统层面的指标,也集成了众多开源组件的采集能力,支持K8s生态。Categraf 内置了对应的仪表盘和告警规则,开箱即用。
|
||||
|
||||
|
||||
## 资料链接
|
||||
## Getting Started
|
||||
|
||||
- [快速安装](https://mp.weixin.qq.com/s/iEC4pfL1TgjMDOWYh8H-FA)
|
||||
- [详细文档](https://n9e.github.io/)
|
||||
- [社区分享](https://n9e.github.io/docs/prologue/share/)
|
||||
|
||||
## 产品演示
|
||||
## Screenshots
|
||||
|
||||
<img src="doc/img/intro.gif" width="680">
|
||||
|
||||
## 架构介绍
|
||||
|
||||
## Architecture
|
||||
|
||||
<img src="doc/img/arch-product.png" width="680">
|
||||
|
||||
Nightingale 可以接收各种采集器上报的监控数据(比如 [Categraf](https://github.com/flashcatcloud/categraf)、telegraf、grafana-agent、Prometheus),并写入多种流行的时序数据库中(可以支持Prometheus、M3DB、VictoriaMetrics、Thanos、TDEngine等),提供告警规则、屏蔽规则、订阅规则的配置能力,提供监控数据的查看能力,提供告警自愈机制(告警触发之后自动回调某个webhook地址或者执行某个脚本),提供历史告警事件的存储管理、分组查看的能力。
|
||||
|
||||
夜莺监控可以接收各种采集器上报的监控数据(比如 [Categraf](https://github.com/flashcatcloud/categraf)、telegraf、grafana-agent、Prometheus),并写入多种流行的时序数据库中(可以支持Prometheus、M3DB、VictoriaMetrics、Thanos、TDEngine等),提供告警规则、屏蔽规则、订阅规则的配置能力,提供监控数据的查看能力,提供告警自愈机制(告警触发之后自动回调某个webhook地址或者执行某个脚本),提供历史告警事件的存储管理、分组查看的能力。
|
||||
|
||||
<img src="doc/img/arch-system.png" width="680">
|
||||
|
||||
@@ -72,31 +84,39 @@ Nightingale 可以接收各种采集器上报的监控数据(比如 [Categraf]
|
||||
如果单机版本的 Prometheus 性能不够或容灾较差,我们推荐使用 [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics),VictoriaMetrics 架构较为简单,性能优异,易于部署和运维,架构图如上。VictoriaMetrics 更详尽的文档,还请参考其[官网](https://victoriametrics.com/)。
|
||||
|
||||
|
||||
## 如何参与
|
||||
## Community
|
||||
|
||||
开源项目要更有生命力,离不开开放的治理架构和源源不断的开发者和用户共同参与,我们致力于建立开放、中立的开源治理架构,吸纳更多来自企业、高校等各方面对云原生监控感兴趣、有热情的计算机专业人士,打造专业、有活力的开发者社区。关于《夜莺开源项目和社区治理架构(草案)》,请查阅 [doc/community-governance.md](./doc/community-governance.md).
|
||||
开源项目要更有生命力,离不开开放的治理架构和源源不断的开发者和用户共同参与,我们致力于建立开放、中立的开源治理架构,吸纳更多来自企业、高校等各方面对云原生监控感兴趣、有热情的开发者,一起打造有活力的夜莺开源社区。关于《夜莺开源项目和社区治理架构(草案)》,请查阅 **[COMMUNITY GOVERNANCE](./doc/community-governance.md)**.
|
||||
|
||||
**我们欢迎您以各种方式参与到夜莺开源项目和开源社区中来,工作包括不限于**:
|
||||
- 补充和完善文档 => [n9e.github.io](https://n9e.github.io/);
|
||||
- 分享您在使用夜莺监控过程中的最佳实践和经验心得 => [文章分享](https://n9e.github.io/docs/prologue/share/);
|
||||
- 提交产品建议 =》 [github issue](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Ffeature&template=enhancement.md);
|
||||
- 提交代码,让夜莺监控更快、更稳、更好用 => [github pull request](https://github.com/didi/nightingale/pulls);
|
||||
|
||||
- 补充和完善文档 => [n9e.github.io](https://n9e.github.io/)
|
||||
- 分享您在使用夜莺监控过程中的最佳实践和经验心得 => [文章分享](https://n9e.github.io/docs/prologue/share/)
|
||||
- 提交产品建议 =》 [github issue](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Ffeature&template=enhancement.md)
|
||||
- 提交代码,让夜莺监控更快、更稳、更好用 => [github pull request](https://github.com/didi/nightingale/pulls)
|
||||
|
||||
**尊重、认可和记录每一位贡献者的工作**是夜莺开源社区的第一指导原则,我们提倡**高效的提问**,这既是对开发者时间的尊重,也是对整个社区知识沉淀的贡献:
|
||||
1. 提问之前请先查阅 [FAQ](https://www.gitlink.org.cn/ccfos/nightingale/wiki/faq) ;
|
||||
2. 提问之前请先搜索 [github issue](https://github.com/ccfos/nightingale/issues);
|
||||
3. 我们优先推荐通过提交 github issue 来提问,如果[有问题点击这里](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Fbug&template=bug_report.yml) | [有需求建议点击这里](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Ffeature&template=enhancement.md);
|
||||
4. 最后,我们推荐你加入微信群,针对相关开放式问题,相互交流咨询 (请先加好友:[UlricGO](https://www.gitlink.org.cn/UlricQin/gist/tree/master/self.jpeg) 备注:夜莺加群+姓名+公司,交流群里会有开发者团队和专业、热心的群友回答问题);
|
||||
- 提问之前请先查阅 [FAQ](https://www.gitlink.org.cn/ccfos/nightingale/wiki/faq)
|
||||
- 提问之前请先搜索 [github issue](https://github.com/ccfos/nightingale/issues)
|
||||
- 我们优先推荐通过提交 github issue 来提问,如果[有问题点击这里](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Fbug&template=bug_report.yml) | [有需求建议点击这里](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Ffeature&template=enhancement.md)
|
||||
- 最后,我们推荐你加入微信群,针对相关开放式问题,相互交流咨询 (请先加好友:[UlricGO](https://www.gitlink.org.cn/UlricQin/gist/tree/master/self.jpeg) 备注:夜莺加群+姓名+公司,交流群里会有开发者团队和专业、热心的群友回答问题)
|
||||
|
||||
|
||||
## 联系我们
|
||||
- 推荐您关注夜莺监控公众号,及时获取相关产品和社区动态
|
||||
## Who is using
|
||||
|
||||
<img src="doc/img/n9e-vx-new.png" width="180">
|
||||
您可以通过在 **[Who is Using Nightingale](https://github.com/ccfos/nightingale/issues/897)** 登记您的使用情况,分享您的使用经验。
|
||||
|
||||
## Stargazers over time
|
||||
## Stargazers
|
||||
[](https://starchart.cc/ccfos/nightingale)
|
||||
|
||||
## Contributors
|
||||
<a href="https://github.com/ccfos/nightingale/graphs/contributors">
|
||||
<img src="https://contrib.rocks/image?repo=ccfos/nightingale" />
|
||||
</a>
|
||||
|
||||
## License
|
||||
- [Apache License V2.0](https://github.com/didi/nightingale/blob/main/LICENSE)
|
||||
[Apache License V2.0](https://github.com/didi/nightingale/blob/main/LICENSE)
|
||||
|
||||
## Contact Us
|
||||
推荐您关注夜莺监控公众号,及时获取相关产品和社区动态:
|
||||
|
||||
<img src="doc/img/n9e-vx-new.png" width="180">
|
||||
0
doc/active-contributors.md
Normal file
0
doc/active-contributors.md
Normal file
0
doc/committers.md
Normal file
0
doc/committers.md
Normal file
@@ -1,52 +1,74 @@
|
||||
# 夜莺开源项目和社区治理架构(草案)
|
||||
|
||||
#### 用户(User)
|
||||
## 社区架构
|
||||
|
||||
>欢迎任何个人、公司以及组织,使用 Nightingale,并积极的反馈 bug、提交功能需求、以及相互帮助,我们推荐使用 github issue 来跟踪 bug 和管理需求。
|
||||
### 用户(User)
|
||||
|
||||
#### 贡献者(Contributer)
|
||||
> 欢迎任何个人、公司以及组织,使用夜莺监控,并积极的反馈 bug、提交功能需求、以及相互帮助,我们推荐使用 [github issue](https://github.com/ccfos/nightingale/issues) 来跟踪 bug 和管理需求。
|
||||
|
||||
>欢迎每一位用户,包括但不限于以下列方式参与到 Nightingale 开源项目并做出贡献:
|
||||
>1. 在 [github issue](https://github.com/ccfos/nightingale/issues) 中积极参与讨论;
|
||||
>2. 提交代码补丁;
|
||||
>3. 修订、补充和完善文档;
|
||||
>4. 提交建议 / 批评;
|
||||
社区用户,可以通过在 **[Who is Using Nightingale](https://github.com/ccfos/nightingale/issues/897)** 登记您的使用情况,并分享您使用夜莺监控的经验,将会自动进入 **[END USERS](./end-users.md)** 列表,并获得社区的 **VIP Support**。
|
||||
|
||||
#### 提交者(Committer)
|
||||
### 贡献者(Contributer)
|
||||
|
||||
>Committer 是指拥有 Nightingale 代码仓库写操作权限的贡献者,而且他们也签署了 Nightingale 项目贡献者许可协议(CLA),他们拥有 ccf.org.cn 为后缀的邮箱地址。原则上 Committer 能够自主决策某个代码补丁是否可以合入到 Nightingale 代码仓库,但是项目管委会拥有最终的决策权。
|
||||
> 欢迎每一位用户,包括但不限于以下列方式参与到夜莺开源社区并做出贡献:
|
||||
|
||||
#### 项目管委会成员(PMC Member)
|
||||
1. 在 [github issue](https://github.com/ccfos/nightingale/issues) 中积极参与讨论,参与社区活动;
|
||||
1. 提交代码补丁;
|
||||
1. 翻译、修订、补充和完善[文档](https://n9e.github.io);
|
||||
1. 分享夜莺监控的使用经验,积极布道;
|
||||
1. 提交建议 / 批评;
|
||||
|
||||
> 项目管委会成员,从贡献者或者 Committer 中选举产生,他们拥有 Nightingale 代码仓库的写操作权限,拥有 ccf.org.cn 为后缀的邮箱地址,拥有 Nightingale 社区相关事务的投票权、以及提名 Committer 候选人的权利。 项目管委会作为一个实体,为整个项目的发展全权负责。
|
||||
年度累计向 [CCFOS/NIGHTINGALE](https://github.com/ccfos/nightingale) 提交 **5** 个PR(被合并),或者因为其他贡献被**项目管委会**一致认可,将会自动进入到 **[ACTIVE CONTRIBUTORS](./active-contributors.md)** 列表,并获得 **[CCF ODC](https://www.ccf.org.cn/kyfzwyh/)** 颁发的电子证书,享有夜莺开源社区一定的权益和福利。
|
||||
|
||||
#### 项目管委会主席(PMC Chair)
|
||||
|
||||
> 项目管委会主席采用任命制,由 [CCF ODC](https://www.ccf.org.cn/kyfzwyh/) 从项目管委会成员中任命产生。项目管委会作为一个统一的实体,来管理和领导 Nightingale 项目。管委会主席是 CCF ODC 和项目管委会之间的沟通桥梁,履行特定的项目管理职责。
|
||||
### 提交者(Committer)
|
||||
|
||||
# 沟通机制(Communication)
|
||||
> Committer 是指拥有 [CCFOS/NIGHTINGALE](https://github.com/ccfos/nightingale) 代码仓库写操作权限的贡献者,他们拥有 ccf.org.cn 为后缀的邮箱地址(待上线)。原则上 Committer 能够自主决策某个代码补丁是否可以合入到夜莺代码仓库,但是项目管委会拥有最终的决策权。
|
||||
|
||||
Committer 承担以下一个或多个职责:
|
||||
- 积极回应 Issues;
|
||||
- Review PRs;
|
||||
- 参加开发者例行会议,积极讨论项目规划和技术方案;
|
||||
- 代表夜莺开源社区出席相关技术会议并做演讲;
|
||||
|
||||
Committer 记录并公示于 **[COMMITTERS](./committers.md)** 列表,并获得 **[CCF ODC](https://www.ccf.org.cn/kyfzwyh/)** 颁发的电子证书,以及享有夜莺开源社区的各种权益和福利。
|
||||
|
||||
|
||||
### 项目管委会成员(PMC Member)
|
||||
|
||||
> 项目管委会成员,从 Contributor 或者 Committer 中选举产生,他们拥有 [CCFOS/NIGHTINGALE](https://github.com/ccfos/nightingale) 代码仓库的写操作权限,拥有 ccf.org.cn 为后缀的邮箱地址(待上线),拥有 Nightingale 社区相关事务的投票权、以及提名 Committer 候选人的权利。 项目管委会作为一个实体,为整个项目的发展全权负责。项目管委会成员记录并公示于 **[PMC](./pmc.md)** 列表。
|
||||
|
||||
### 项目管委会主席(PMC Chair)
|
||||
|
||||
> 项目管委会主席采用任命制,由 **[CCF ODC](https://www.ccf.org.cn/kyfzwyh/)** 从项目管委会成员中任命产生。项目管委会作为一个统一的实体,来管理和领导夜莺项目。管委会主席是 CCF ODC 和项目管委会之间的沟通桥梁,履行特定的项目管理职责。
|
||||
|
||||
## 沟通机制(Communication)
|
||||
1. 我们推荐使用邮件列表来反馈建议(待发布);
|
||||
2. 我们推荐使用 [github issue](https://github.com/ccfos/nightingale/issues) 跟踪 bug 和管理需求;
|
||||
3. 我们推荐使用 [github milestone](https://github.com/ccfos/nightingale/milestones) 来管理项目进度和规划;
|
||||
4. 我们推荐使用腾讯会议来定期召开项目例会;
|
||||
4. 我们推荐使用腾讯会议来定期召开项目例会(会议 ID 待发布);
|
||||
|
||||
# 文档(Documentation)
|
||||
## 文档(Documentation)
|
||||
1. 我们推荐使用 [github pages](https://n9e.github.io) 来沉淀文档;
|
||||
2. 我们推荐使用 [gitlink wiki](https://www.gitlink.org.cn/ccfos/nightingale/wiki/faq) 来沉淀FAQ;
|
||||
|
||||
|
||||
# 运营机制(Operation)
|
||||
## 运营机制(Operation)
|
||||
|
||||
1. 我们定期组织用户、贡献者、项目管委会成员之间的沟通会议,讨论项目开发的目标、方案、进度,以及讨论相关需求的合理性、优先级等议题;
|
||||
2. 我们定期组织 meetup (线上&线下),创造良好的用户交流分享环境,并沉淀相关内容到文档站点;
|
||||
3. 我们定期组织 Nightingale 开发者大会,分享 best user story、同步年度开发目标和计划、讨论新技术方向等;
|
||||
3. 我们定期组织夜莺开发者大会,分享 best user story、同步年度开发目标和计划、讨论新技术方向等;
|
||||
|
||||
# 社区指导原则(Philosophy)
|
||||
- 尊重、认可和记录每一位贡献者的工作;
|
||||
## 社区指导原则(Philosophy)
|
||||
|
||||
**尊重、认可和记录每一位贡献者的工作。**
|
||||
|
||||
## 关于提问的原则
|
||||
|
||||
# 关于提问的原则
|
||||
按照**尊重、认可、记录每一位贡献者的工作**原则,我们提倡**高效的提问**,这既是对开发者时间的尊重,也是对整个社区的知识沉淀的贡献:
|
||||
|
||||
1. 提问之前请先查阅 [FAQ](https://www.gitlink.org.cn/ccfos/nightingale/wiki/faq) ;
|
||||
2. 提问之前请先搜索 [github issue](https://github.com/ccfos/nightingale/issues);
|
||||
3. 我们优先推荐通过提交 github issue 来提问,如果[有问题点击这里](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Fbug&template=bug_report.yml) | [有需求建议点击这里](https://github.com/ccfos/nightingale/issues/new?assignees=&labels=kind%2Ffeature&template=enhancement.md);
|
||||
4. 最后,我们推荐你加入微信群,针对相关开放式问题,相互交流咨询 (请先加好友:[UlricGO](https://www.gitlink.org.cn/UlricQin/gist/tree/master/self.jpeg) 备注:夜莺加群+姓名+公司,交流群里会有开发者团队和专业、热心的群友回答问题);
|
||||
|
||||
最后,我们推荐你加入微信群,针对相关开放式问题,相互交流咨询 (请先加好友:[UlricGO](https://www.gitlink.org.cn/UlricQin/gist/tree/master/self.jpeg) 备注:夜莺加群+姓名+公司,交流群里会有开发者团队和专业、热心的群友回答问题);
|
||||
0
doc/end-users.md
Normal file
0
doc/end-users.md
Normal file
0
doc/pmc.md
Normal file
0
doc/pmc.md
Normal file
234
doc/server-dash.json
Normal file
234
doc/server-dash.json
Normal file
@@ -0,0 +1,234 @@
|
||||
{
|
||||
"name": "夜莺大盘",
|
||||
"tags": "",
|
||||
"configs": {
|
||||
"var": [],
|
||||
"panels": [
|
||||
{
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "rate(n9e_server_samples_received_total[1m])"
|
||||
}
|
||||
],
|
||||
"name": "每秒接收的数据点个数",
|
||||
"options": {
|
||||
"tooltip": {
|
||||
"mode": "all",
|
||||
"sort": "none"
|
||||
},
|
||||
"legend": {
|
||||
"displayMode": "hidden"
|
||||
},
|
||||
"standardOptions": {},
|
||||
"thresholds": {}
|
||||
},
|
||||
"custom": {
|
||||
"drawStyle": "lines",
|
||||
"lineInterpolation": "smooth",
|
||||
"fillOpacity": 0.5,
|
||||
"stack": "off"
|
||||
},
|
||||
"version": "2.0.0",
|
||||
"type": "timeseries",
|
||||
"layout": {
|
||||
"h": 4,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 0,
|
||||
"i": "53fcb9dc-23f9-41e0-bc5e-121eed14c3a4",
|
||||
"isResizable": true
|
||||
},
|
||||
"id": "53fcb9dc-23f9-41e0-bc5e-121eed14c3a4"
|
||||
},
|
||||
{
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "rate(n9e_server_alerts_total[10m])"
|
||||
}
|
||||
],
|
||||
"name": "每秒产生的告警事件个数",
|
||||
"options": {
|
||||
"tooltip": {
|
||||
"mode": "all",
|
||||
"sort": "none"
|
||||
},
|
||||
"legend": {
|
||||
"displayMode": "hidden"
|
||||
},
|
||||
"standardOptions": {},
|
||||
"thresholds": {}
|
||||
},
|
||||
"custom": {
|
||||
"drawStyle": "lines",
|
||||
"lineInterpolation": "smooth",
|
||||
"fillOpacity": 0.5,
|
||||
"stack": "off"
|
||||
},
|
||||
"version": "2.0.0",
|
||||
"type": "timeseries",
|
||||
"layout": {
|
||||
"h": 4,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0,
|
||||
"i": "47fc6252-9cc8-4b53-8e27-0c5c59a47269",
|
||||
"isResizable": true
|
||||
},
|
||||
"id": "f70dcb8b-b58b-4ef9-9e48-f230d9e17140"
|
||||
},
|
||||
{
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "n9e_server_alert_queue_size"
|
||||
}
|
||||
],
|
||||
"name": "告警事件内存队列长度",
|
||||
"options": {
|
||||
"tooltip": {
|
||||
"mode": "all",
|
||||
"sort": "none"
|
||||
},
|
||||
"legend": {
|
||||
"displayMode": "hidden"
|
||||
},
|
||||
"standardOptions": {},
|
||||
"thresholds": {}
|
||||
},
|
||||
"custom": {
|
||||
"drawStyle": "lines",
|
||||
"lineInterpolation": "smooth",
|
||||
"fillOpacity": 0.5,
|
||||
"stack": "off"
|
||||
},
|
||||
"version": "2.0.0",
|
||||
"type": "timeseries",
|
||||
"layout": {
|
||||
"h": 4,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 4,
|
||||
"i": "ad1af16c-de0c-45f4-8875-cea4e85d51d0",
|
||||
"isResizable": true
|
||||
},
|
||||
"id": "caf23e58-d907-42b0-9ed6-722c8c6f3c5f"
|
||||
},
|
||||
{
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "n9e_server_http_request_duration_seconds_sum/n9e_server_http_request_duration_seconds_count"
|
||||
}
|
||||
],
|
||||
"name": "数据接收接口平均响应时间(单位:秒)",
|
||||
"options": {
|
||||
"tooltip": {
|
||||
"mode": "all",
|
||||
"sort": "desc"
|
||||
},
|
||||
"legend": {
|
||||
"displayMode": "hidden"
|
||||
},
|
||||
"standardOptions": {},
|
||||
"thresholds": {}
|
||||
},
|
||||
"custom": {
|
||||
"drawStyle": "lines",
|
||||
"lineInterpolation": "smooth",
|
||||
"fillOpacity": 0.5,
|
||||
"stack": "noraml"
|
||||
},
|
||||
"version": "2.0.0",
|
||||
"type": "timeseries",
|
||||
"layout": {
|
||||
"h": 4,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 4,
|
||||
"i": "64c3abc2-404c-4462-a82f-c109a21dac91",
|
||||
"isResizable": true
|
||||
},
|
||||
"id": "6b8d2db1-efca-4b9e-b429-57a9d2272bc5"
|
||||
},
|
||||
{
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "n9e_server_sample_queue_size"
|
||||
}
|
||||
],
|
||||
"name": "内存数据队列长度",
|
||||
"options": {
|
||||
"tooltip": {
|
||||
"mode": "all",
|
||||
"sort": "desc"
|
||||
},
|
||||
"legend": {
|
||||
"displayMode": "hidden"
|
||||
},
|
||||
"standardOptions": {},
|
||||
"thresholds": {}
|
||||
},
|
||||
"custom": {
|
||||
"drawStyle": "lines",
|
||||
"lineInterpolation": "smooth",
|
||||
"fillOpacity": 0.5,
|
||||
"stack": "off"
|
||||
},
|
||||
"version": "2.0.0",
|
||||
"type": "timeseries",
|
||||
"layout": {
|
||||
"h": 4,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 8,
|
||||
"i": "1c7da942-58c2-40dc-b42f-983e4a35b89b",
|
||||
"isResizable": true
|
||||
},
|
||||
"id": "bd41677d-40d3-482e-bb6e-fbd25df46d87"
|
||||
},
|
||||
{
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "avg(n9e_server_forward_duration_seconds_sum/n9e_server_forward_duration_seconds_count)"
|
||||
}
|
||||
],
|
||||
"name": "数据发往TSDB平均耗时(单位:秒)",
|
||||
"options": {
|
||||
"tooltip": {
|
||||
"mode": "all",
|
||||
"sort": "desc"
|
||||
},
|
||||
"legend": {
|
||||
"displayMode": "hidden"
|
||||
},
|
||||
"standardOptions": {
|
||||
"decimals": 8
|
||||
},
|
||||
"thresholds": {}
|
||||
},
|
||||
"custom": {
|
||||
"drawStyle": "lines",
|
||||
"lineInterpolation": "smooth",
|
||||
"fillOpacity": 0.5,
|
||||
"stack": "noraml"
|
||||
},
|
||||
"version": "2.0.0",
|
||||
"type": "timeseries",
|
||||
"layout": {
|
||||
"h": 4,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 8,
|
||||
"i": "eed94a0b-954f-48ac-82e5-a2eada1c8a3d",
|
||||
"isResizable": true
|
||||
},
|
||||
"id": "c8642e72-f384-46a5-8410-1e6be2953c3c"
|
||||
}
|
||||
],
|
||||
"version": "2.0.0"
|
||||
}
|
||||
}
|
||||
@@ -6,6 +6,7 @@ networks:
|
||||
|
||||
services:
|
||||
mysql:
|
||||
platform: linux/x86_64
|
||||
image: "mysql:5.7"
|
||||
container_name: mysql
|
||||
hostname: mysql
|
||||
|
||||
@@ -7,12 +7,6 @@ import (
|
||||
"github.com/tidwall/gjson"
|
||||
)
|
||||
|
||||
// the caller can be called for alerting notify by complete this interface
|
||||
type inter interface {
|
||||
Descript() string
|
||||
Notify([]byte)
|
||||
}
|
||||
|
||||
// N9E complete
|
||||
type N9EPlugin struct {
|
||||
Name string
|
||||
@@ -37,9 +31,16 @@ func (n *N9EPlugin) Notify(bs []byte) {
|
||||
}
|
||||
}
|
||||
|
||||
func (n *N9EPlugin) NotifyMaintainer(bs []byte) {
|
||||
fmt.Println("do something... begin")
|
||||
result := string(bs)
|
||||
fmt.Println(result)
|
||||
fmt.Println("do something... end")
|
||||
}
|
||||
|
||||
// will be loaded for alertingCall , The first letter must be capitalized to be exported
|
||||
var N9eCaller = N9EPlugin{
|
||||
Name: "n9e",
|
||||
Description: "演示告警通过动态链接库方式通知",
|
||||
Name: "N9EPlugin",
|
||||
Description: "Notify by lib",
|
||||
BuildAt: time.Now().Local().Format("2006/01/02 15:04:05"),
|
||||
}
|
||||
|
||||
@@ -22,8 +22,8 @@ type TagFilter struct {
|
||||
type AlertMute struct {
|
||||
Id int64 `json:"id" gorm:"primaryKey"`
|
||||
GroupId int64 `json:"group_id"`
|
||||
Prod string `json:"prod"` // product empty means n9e
|
||||
Cluster string `json:"cluster"`
|
||||
Prod string `json:"prod"` // product empty means n9e
|
||||
Cluster string `json:"cluster"` // take effect by clusters, seperated by space
|
||||
Tags ormx.JSONArr `json:"tags"`
|
||||
Cause string `json:"cause"`
|
||||
Btime int64 `json:"btime"`
|
||||
@@ -44,7 +44,7 @@ func AlertMuteGets(prods []string, bgid int64, query string) (lst []AlertMute, e
|
||||
arr := strings.Fields(query)
|
||||
for i := 0; i < len(arr); i++ {
|
||||
qarg := "%" + arr[i] + "%"
|
||||
session = session.Where("cause like ?", qarg, qarg)
|
||||
session = session.Where("cause like ?", qarg)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -66,6 +66,10 @@ func (m *AlertMute) Verify() error {
|
||||
return errors.New("cluster invalid")
|
||||
}
|
||||
|
||||
if IsClusterAll(m.Cluster) {
|
||||
m.Cluster = ClusterAll
|
||||
}
|
||||
|
||||
if m.Etime <= m.Btime {
|
||||
return fmt.Errorf("Oops... etime(%d) <= btime(%d)", m.Etime, m.Btime)
|
||||
}
|
||||
@@ -123,7 +127,7 @@ func AlertMuteDel(ids []int64) error {
|
||||
func AlertMuteStatistics(cluster string) (*Statistics, error) {
|
||||
session := DB().Model(&AlertMute{}).Select("count(*) as total", "max(create_at) as last_updated")
|
||||
if cluster != "" {
|
||||
session = session.Where("cluster = ?", cluster)
|
||||
session = session.Where("(cluster like ? or cluster = ?)", "%"+cluster+"%", ClusterAll)
|
||||
}
|
||||
|
||||
var stats []*Statistics
|
||||
@@ -146,10 +150,19 @@ func AlertMuteGetsByCluster(cluster string) ([]*AlertMute, error) {
|
||||
// get my cluster's mutes
|
||||
session := DB().Model(&AlertMute{})
|
||||
if cluster != "" {
|
||||
session = session.Where("cluster = ?", cluster)
|
||||
session = session.Where("(cluster like ? or cluster = ?)", "%"+cluster+"%", ClusterAll)
|
||||
}
|
||||
|
||||
var lst []*AlertMute
|
||||
var mlst []*AlertMute
|
||||
err = session.Find(&lst).Error
|
||||
return lst, err
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, m := range lst {
|
||||
if MatchCluster(m.Cluster, cluster) {
|
||||
mlst = append(mlst, m)
|
||||
}
|
||||
}
|
||||
return mlst, err
|
||||
}
|
||||
|
||||
@@ -67,6 +67,10 @@ func (ar *AlertRule) Verify() error {
|
||||
return errors.New("cluster is blank")
|
||||
}
|
||||
|
||||
if IsClusterAll(ar.Cluster) {
|
||||
ar.Cluster = ClusterAll
|
||||
}
|
||||
|
||||
if str.Dangerous(ar.Name) {
|
||||
return errors.New("Name has invalid characters")
|
||||
}
|
||||
@@ -301,7 +305,7 @@ func AlertRuleGetsByCluster(cluster string) ([]*AlertRule, error) {
|
||||
session := DB().Where("disabled = ? and prod = ?", 0, "")
|
||||
|
||||
if cluster != "" {
|
||||
session = session.Where("(cluster like ? or cluster like ?)", "%"+cluster+"%", "%"+ClusterAll+"%")
|
||||
session = session.Where("(cluster like ? or cluster = ?)", "%"+cluster+"%", ClusterAll)
|
||||
}
|
||||
|
||||
var lst []*AlertRule
|
||||
@@ -333,7 +337,7 @@ func AlertRuleGetsByCluster(cluster string) ([]*AlertRule, error) {
|
||||
}
|
||||
|
||||
func AlertRulesGetsBy(prods []string, query string) ([]*AlertRule, error) {
|
||||
session := DB().Where("disabled = ? and prod in (?)", 0, prods)
|
||||
session := DB().Where("prod in (?)", prods)
|
||||
|
||||
if query != "" {
|
||||
arr := strings.Fields(query)
|
||||
@@ -393,7 +397,7 @@ func AlertRuleStatistics(cluster string) (*Statistics, error) {
|
||||
|
||||
if cluster != "" {
|
||||
// 简略的判断,当一个clustername是另一个clustername的substring的时候,会出现stats与预期不符,不影响使用
|
||||
session = session.Where("(cluster like ? or cluster like ?)", "%"+cluster+"%", "%"+ClusterAll+"%")
|
||||
session = session.Where("(cluster like ? or cluster = ?)", "%"+cluster+"%", ClusterAll)
|
||||
}
|
||||
|
||||
var stats []*Statistics
|
||||
|
||||
@@ -14,7 +14,7 @@ import (
|
||||
type AlertSubscribe struct {
|
||||
Id int64 `json:"id" gorm:"primaryKey"`
|
||||
GroupId int64 `json:"group_id"`
|
||||
Cluster string `json:"cluster"`
|
||||
Cluster string `json:"cluster"` // take effect by clusters, seperated by space
|
||||
RuleId int64 `json:"rule_id"`
|
||||
RuleName string `json:"rule_name" gorm:"-"` // for fe
|
||||
Tags ormx.JSONArr `json:"tags"`
|
||||
@@ -59,6 +59,10 @@ func (s *AlertSubscribe) Verify() error {
|
||||
return errors.New("cluster invalid")
|
||||
}
|
||||
|
||||
if IsClusterAll(s.Cluster) {
|
||||
s.Cluster = ClusterAll
|
||||
}
|
||||
|
||||
if err := s.Parse(); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -202,7 +206,7 @@ func AlertSubscribeStatistics(cluster string) (*Statistics, error) {
|
||||
session := DB().Model(&AlertSubscribe{}).Select("count(*) as total", "max(update_at) as last_updated")
|
||||
|
||||
if cluster != "" {
|
||||
session = session.Where("cluster = ?", cluster)
|
||||
session = session.Where("(cluster like ? or cluster = ?)", "%"+cluster+"%", ClusterAll)
|
||||
}
|
||||
|
||||
var stats []*Statistics
|
||||
@@ -218,10 +222,19 @@ func AlertSubscribeGetsByCluster(cluster string) ([]*AlertSubscribe, error) {
|
||||
// get my cluster's subscribes
|
||||
session := DB().Model(&AlertSubscribe{})
|
||||
if cluster != "" {
|
||||
session = session.Where("cluster = ?", cluster)
|
||||
session = session.Where("(cluster like ? or cluster = ?)", "%"+cluster+"%", ClusterAll)
|
||||
}
|
||||
|
||||
var lst []*AlertSubscribe
|
||||
var slst []*AlertSubscribe
|
||||
err := session.Find(&lst).Error
|
||||
return lst, err
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, s := range lst {
|
||||
if MatchCluster(s.Cluster, cluster) {
|
||||
slst = append(slst, s)
|
||||
}
|
||||
}
|
||||
return slst, err
|
||||
}
|
||||
|
||||
@@ -60,3 +60,13 @@ func MatchCluster(ruleCluster, targetCluster string) bool {
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func IsClusterAll(ruleCluster string) bool {
|
||||
clusters := strings.Fields(ruleCluster)
|
||||
for _, c := range clusters {
|
||||
if c == ClusterAll {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -50,6 +50,10 @@ func (re *RecordingRule) Verify() error {
|
||||
return errors.New("cluster is blank")
|
||||
}
|
||||
|
||||
if IsClusterAll(re.Cluster) {
|
||||
re.Cluster = ClusterAll
|
||||
}
|
||||
|
||||
if !model.MetricNameRE.MatchString(re.Name) {
|
||||
return errors.New("Name has invalid chreacters")
|
||||
}
|
||||
@@ -190,10 +194,10 @@ func RecordingRuleGetById(id int64) (*RecordingRule, error) {
|
||||
}
|
||||
|
||||
func RecordingRuleGetsByCluster(cluster string) ([]*RecordingRule, error) {
|
||||
session := DB().Where("disabled = ? and prod = ?", 0, "")
|
||||
session := DB().Where("disabled = ?", 0)
|
||||
|
||||
if cluster != "" {
|
||||
session = session.Where("(cluster like ? or cluster like ?)", "%"+cluster+"%", "%"+ClusterAll+"%")
|
||||
session = session.Where("(cluster like ? or cluster = ?)", "%"+cluster+"%", ClusterAll)
|
||||
}
|
||||
|
||||
var lst []*RecordingRule
|
||||
@@ -228,7 +232,7 @@ func RecordingRuleStatistics(cluster string) (*Statistics, error) {
|
||||
session := DB().Model(&RecordingRule{}).Select("count(*) as total", "max(update_at) as last_updated")
|
||||
if cluster != "" {
|
||||
// 简略的判断,当一个clustername是另一个clustername的substring的时候,会出现stats与预期不符,不影响使用
|
||||
session = session.Where("(cluster like ? or cluster like ?)", "%"+cluster+"%", "%"+ClusterAll+"%")
|
||||
session = session.Where("(cluster like ? or cluster = ?)", "%"+cluster+"%", ClusterAll)
|
||||
}
|
||||
|
||||
var stats []*Statistics
|
||||
|
||||
9
src/notifier/notifier.go
Normal file
9
src/notifier/notifier.go
Normal file
@@ -0,0 +1,9 @@
|
||||
package notifier
|
||||
|
||||
type Notifier interface {
|
||||
Descript() string
|
||||
Notify([]byte)
|
||||
NotifyMaintainer([]byte)
|
||||
}
|
||||
|
||||
var Instance Notifier
|
||||
@@ -13,7 +13,7 @@
|
||||
|
||||
// Package v1 provides bindings to the Prometheus HTTP API v1:
|
||||
// http://prometheus.io/docs/querying/api/
|
||||
package reader
|
||||
package prom
|
||||
|
||||
import (
|
||||
"context"
|
||||
@@ -558,10 +558,11 @@ func (qr *queryResult) UnmarshalJSON(b []byte) error {
|
||||
// NewAPI returns a new API for the client.
|
||||
//
|
||||
// It is safe to use the returned API from multiple goroutines.
|
||||
func NewAPI(c api.Client) API {
|
||||
func NewAPI(c api.Client, opt ClientOptions) API {
|
||||
return &httpAPI{
|
||||
client: &apiClientImpl{
|
||||
client: c,
|
||||
opt: opt,
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -891,6 +892,7 @@ type apiClient interface {
|
||||
|
||||
type apiClientImpl struct {
|
||||
client api.Client
|
||||
opt ClientOptions
|
||||
}
|
||||
|
||||
type apiResponse struct {
|
||||
@@ -921,16 +923,16 @@ func (h *apiClientImpl) URL(ep string, args map[string]string) *url.URL {
|
||||
}
|
||||
|
||||
func (h *apiClientImpl) Do(ctx context.Context, req *http.Request) (*http.Response, []byte, Warnings, error) {
|
||||
if Reader.Opts.BasicAuthUser != "" && Reader.Opts.BasicAuthPass != "" {
|
||||
req.SetBasicAuth(Reader.Opts.BasicAuthUser, Reader.Opts.BasicAuthPass)
|
||||
if h.opt.BasicAuthUser != "" && h.opt.BasicAuthPass != "" {
|
||||
req.SetBasicAuth(h.opt.BasicAuthUser, h.opt.BasicAuthPass)
|
||||
}
|
||||
|
||||
headerCount := len(Reader.Opts.Headers)
|
||||
headerCount := len(h.opt.Headers)
|
||||
if headerCount > 0 && headerCount%2 == 0 {
|
||||
for i := 0; i < len(Reader.Opts.Headers); i += 2 {
|
||||
req.Header.Add(Reader.Opts.Headers[i], Reader.Opts.Headers[i+1])
|
||||
if Reader.Opts.Headers[i] == "Host" {
|
||||
req.Host = Reader.Opts.Headers[i+1]
|
||||
for i := 0; i < len(h.opt.Headers); i += 2 {
|
||||
req.Header.Add(h.opt.Headers[i], h.opt.Headers[i+1])
|
||||
if h.opt.Headers[i] == "Host" {
|
||||
req.Host = h.opt.Headers[i+1]
|
||||
}
|
||||
}
|
||||
}
|
||||
7
src/pkg/prom/client_option.go
Normal file
7
src/pkg/prom/client_option.go
Normal file
@@ -0,0 +1,7 @@
|
||||
package prom
|
||||
|
||||
type ClientOptions struct {
|
||||
BasicAuthUser string
|
||||
BasicAuthPass string
|
||||
Headers []string
|
||||
}
|
||||
@@ -2,8 +2,11 @@ package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"net"
|
||||
"os"
|
||||
"plugin"
|
||||
"runtime"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -11,6 +14,7 @@ import (
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/koding/multiconfig"
|
||||
|
||||
"github.com/didi/nightingale/v5/src/notifier"
|
||||
"github.com/didi/nightingale/v5/src/pkg/httpx"
|
||||
"github.com/didi/nightingale/v5/src/pkg/logx"
|
||||
"github.com/didi/nightingale/v5/src/pkg/ormx"
|
||||
@@ -100,6 +104,33 @@ func MustLoad(fpaths ...string) {
|
||||
}
|
||||
}
|
||||
|
||||
if C.Alerting.CallPlugin.Enable {
|
||||
if runtime.GOOS == "windows" {
|
||||
fmt.Println("notify plugin on unsupported os:", runtime.GOOS)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
p, err := plugin.Open(C.Alerting.CallPlugin.PluginPath)
|
||||
if err != nil {
|
||||
fmt.Println("failed to load plugin:", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
caller, err := p.Lookup(C.Alerting.CallPlugin.Caller)
|
||||
if err != nil {
|
||||
fmt.Println("failed to lookup plugin Caller:", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
ins, ok := caller.(notifier.Notifier)
|
||||
if !ok {
|
||||
log.Println("notifier interface not implemented")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
notifier.Instance = ins
|
||||
}
|
||||
|
||||
if C.WriterOpt.QueueMaxSize <= 0 {
|
||||
C.WriterOpt.QueueMaxSize = 100000
|
||||
}
|
||||
|
||||
@@ -9,8 +9,6 @@ import (
|
||||
"net/http"
|
||||
"os/exec"
|
||||
"path"
|
||||
"plugin"
|
||||
"runtime"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@@ -22,6 +20,7 @@ import (
|
||||
"github.com/toolkits/pkg/slice"
|
||||
|
||||
"github.com/didi/nightingale/v5/src/models"
|
||||
"github.com/didi/nightingale/v5/src/notifier"
|
||||
"github.com/didi/nightingale/v5/src/pkg/sys"
|
||||
"github.com/didi/nightingale/v5/src/pkg/tplx"
|
||||
"github.com/didi/nightingale/v5/src/server/common/sender"
|
||||
@@ -103,7 +102,6 @@ func alertingRedisPub(bs []byte) {
|
||||
|
||||
func handleNotice(notice Notice, bs []byte) {
|
||||
alertingCallScript(bs)
|
||||
|
||||
alertingCallPlugin(bs)
|
||||
|
||||
if len(config.C.Alerting.NotifyBuiltinChannels) == 0 {
|
||||
@@ -398,11 +396,6 @@ func alertingCallScript(stdinBytes []byte) {
|
||||
logger.Infof("event_notify: exec %s output: %s", fpath, buf.String())
|
||||
}
|
||||
|
||||
type Notifier interface {
|
||||
Descript() string
|
||||
Notify([]byte)
|
||||
}
|
||||
|
||||
// call notify.so via golang plugin build
|
||||
// ig. etc/script/notify/notify.so
|
||||
func alertingCallPlugin(stdinBytes []byte) {
|
||||
@@ -410,26 +403,8 @@ func alertingCallPlugin(stdinBytes []byte) {
|
||||
return
|
||||
}
|
||||
|
||||
if runtime.GOOS == "windows" {
|
||||
logger.Errorf("call notify plugin on unsupported os: %s", runtime.GOOS)
|
||||
return
|
||||
}
|
||||
|
||||
p, err := plugin.Open(config.C.Alerting.CallPlugin.PluginPath)
|
||||
if err != nil {
|
||||
logger.Errorf("failed to open notify plugin: %v", err)
|
||||
return
|
||||
}
|
||||
caller, err := p.Lookup(config.C.Alerting.CallPlugin.Caller)
|
||||
if err != nil {
|
||||
logger.Errorf("failed to load caller: %v", err)
|
||||
return
|
||||
}
|
||||
notifier, ok := caller.(Notifier)
|
||||
if !ok {
|
||||
logger.Errorf("notifier interface not implemented): %v", err)
|
||||
return
|
||||
}
|
||||
notifier.Notify(stdinBytes)
|
||||
logger.Debugf("alertingCallPlugin done. %s", notifier.Descript())
|
||||
logger.Debugf("alertingCallPlugin begin")
|
||||
logger.Debugf("payload:", string(stdinBytes))
|
||||
notifier.Instance.Notify(stdinBytes)
|
||||
logger.Debugf("alertingCallPlugin done")
|
||||
}
|
||||
|
||||
@@ -1,8 +1,11 @@
|
||||
package engine
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"time"
|
||||
|
||||
"github.com/didi/nightingale/v5/src/models"
|
||||
"github.com/didi/nightingale/v5/src/notifier"
|
||||
"github.com/didi/nightingale/v5/src/server/common/sender"
|
||||
"github.com/didi/nightingale/v5/src/server/config"
|
||||
"github.com/didi/nightingale/v5/src/server/memsto"
|
||||
@@ -10,17 +13,49 @@ import (
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
// notify to maintainer to handle the error
|
||||
func notifyToMaintainer(e error, title string) {
|
||||
type MaintainMessage struct {
|
||||
Tos []*models.User `json:"tos"`
|
||||
Title string `json:"title"`
|
||||
Content string `json:"content"`
|
||||
}
|
||||
|
||||
logger.Errorf("notifyToMaintainer,title:%s, error:%v", title, e)
|
||||
|
||||
if len(config.C.Alerting.NotifyBuiltinChannels) == 0 {
|
||||
func notifyMaintainerWithPlugin(e error, title, triggerTime string, users []*models.User) {
|
||||
if !config.C.Alerting.CallPlugin.Enable {
|
||||
return
|
||||
}
|
||||
|
||||
maintainerUsers := memsto.UserCache.GetMaintainerUsers()
|
||||
if len(maintainerUsers) == 0 {
|
||||
stdinBytes, err := json.Marshal(MaintainMessage{
|
||||
Tos: users,
|
||||
Title: title,
|
||||
Content: "Title: " + title + "\nContent: " + e.Error() + "\nTime: " + triggerTime,
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
logger.Error("failed to marshal MaintainMessage:", err)
|
||||
return
|
||||
}
|
||||
|
||||
notifier.Instance.NotifyMaintainer(stdinBytes)
|
||||
logger.Debugf("notify maintainer with plugin done")
|
||||
}
|
||||
|
||||
// notify to maintainer to handle the error
|
||||
func notifyToMaintainer(e error, title string) {
|
||||
logger.Errorf("notifyToMaintainer, title:%s, error:%v", title, e)
|
||||
|
||||
users := memsto.UserCache.GetMaintainerUsers()
|
||||
if len(users) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
triggerTime := time.Now().Format("2006/01/02 - 15:04:05")
|
||||
|
||||
notifyMaintainerWithPlugin(e, title, triggerTime, users)
|
||||
notifyMaintainerWithBuiltin(e, title, triggerTime, users)
|
||||
}
|
||||
|
||||
func notifyMaintainerWithBuiltin(e error, title, triggerTime string, users []*models.User) {
|
||||
if len(config.C.Alerting.NotifyBuiltinChannels) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
@@ -30,7 +65,7 @@ func notifyToMaintainer(e error, title string) {
|
||||
dingtalkset := make(map[string]struct{})
|
||||
feishuset := make(map[string]struct{})
|
||||
|
||||
for _, user := range maintainerUsers {
|
||||
for _, user := range users {
|
||||
if user.Email != "" {
|
||||
emailset[user.Email] = struct{}{}
|
||||
}
|
||||
@@ -62,7 +97,6 @@ func notifyToMaintainer(e error, title string) {
|
||||
}
|
||||
|
||||
phones := StringSetKeys(phoneset)
|
||||
triggerTime := time.Now().Format("2006/01/02 - 15:04:05")
|
||||
|
||||
for _, ch := range config.C.Alerting.NotifyBuiltinChannels {
|
||||
switch ch {
|
||||
@@ -70,13 +104,13 @@ func notifyToMaintainer(e error, title string) {
|
||||
if len(emailset) == 0 {
|
||||
continue
|
||||
}
|
||||
content := "【内部处理错误】当前标题: " + title + "\n【内部处理错误】当前异常: " + e.Error() + "\n【内部处理错误】发送时间: " + triggerTime
|
||||
content := "Title: " + title + "\nContent: " + e.Error() + "\nTime: " + triggerTime
|
||||
sender.WriteEmail(title, content, StringSetKeys(emailset))
|
||||
case "dingtalk":
|
||||
if len(dingtalkset) == 0 {
|
||||
continue
|
||||
}
|
||||
content := "**【内部处理错误】当前标题: **" + title + "\n**【内部处理错误】当前异常: **" + e.Error() + "\n**【内部处理错误】发送时间: **" + triggerTime
|
||||
content := "**Title: **" + title + "\n**Content: **" + e.Error() + "\n**Time: **" + triggerTime
|
||||
sender.SendDingtalk(sender.DingtalkMessage{
|
||||
Title: title,
|
||||
Text: content,
|
||||
@@ -87,7 +121,7 @@ func notifyToMaintainer(e error, title string) {
|
||||
if len(wecomset) == 0 {
|
||||
continue
|
||||
}
|
||||
content := "**【内部处理错误】当前标题: **" + title + "\n**【内部处理错误】当前异常: **" + e.Error() + "\n**【内部处理错误】发送时间: **" + triggerTime
|
||||
content := "**Title: **" + title + "\n**Content: **" + e.Error() + "\n**Time: **" + triggerTime
|
||||
sender.SendWecom(sender.WecomMessage{
|
||||
Text: content,
|
||||
Tokens: StringSetKeys(wecomset),
|
||||
@@ -97,7 +131,7 @@ func notifyToMaintainer(e error, title string) {
|
||||
continue
|
||||
}
|
||||
|
||||
content := "【内部处理错误】当前标题: " + title + "\n【内部处理错误】当前异常: " + e.Error() + "\n【内部处理错误】发送时间: " + triggerTime
|
||||
content := "Title: " + title + "\nContent: " + e.Error() + "\nTime: " + triggerTime
|
||||
sender.SendFeishu(sender.FeishuMessage{
|
||||
Text: content,
|
||||
AtMobiles: phones,
|
||||
|
||||
@@ -15,6 +15,7 @@ import (
|
||||
"github.com/toolkits/pkg/str"
|
||||
|
||||
"github.com/didi/nightingale/v5/src/models"
|
||||
"github.com/didi/nightingale/v5/src/pkg/prom"
|
||||
"github.com/didi/nightingale/v5/src/server/common/conv"
|
||||
"github.com/didi/nightingale/v5/src/server/config"
|
||||
"github.com/didi/nightingale/v5/src/server/memsto"
|
||||
@@ -111,12 +112,11 @@ func (r RuleEval) Work() {
|
||||
var value model.Value
|
||||
var err error
|
||||
if r.rule.Algorithm == "" {
|
||||
var warnings reader.Warnings
|
||||
value, warnings, err = reader.Reader.Client.Query(context.Background(), promql, time.Now())
|
||||
var warnings prom.Warnings
|
||||
value, warnings, err = reader.Client.Query(context.Background(), promql, time.Now())
|
||||
if err != nil {
|
||||
logger.Errorf("rule_eval:%d promql:%s, error:%v", r.RuleID(), promql, err)
|
||||
// 告警查询prometheus逻辑出错,发告警信息给管理员
|
||||
notifyToMaintainer(err, "查询prometheus出错")
|
||||
notifyToMaintainer(err, "failed to query prometheus")
|
||||
return
|
||||
}
|
||||
|
||||
@@ -189,7 +189,6 @@ func (ws *WorkersType) Build(rids []int64) {
|
||||
elst, err := models.AlertCurEventGetByRule(rules[hash].Id)
|
||||
if err != nil {
|
||||
logger.Errorf("worker_build: AlertCurEventGetByRule failed: %v", err)
|
||||
notifyToMaintainer(err, "AlertCurEventGetByRule Error,ruleID="+fmt.Sprint(rules[hash].Id))
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -557,7 +556,7 @@ func (r RecordingRuleEval) Work() {
|
||||
return
|
||||
}
|
||||
|
||||
value, warnings, err := reader.Reader.Client.Query(context.Background(), promql, time.Now())
|
||||
value, warnings, err := reader.Client.Query(context.Background(), promql, time.Now())
|
||||
if err != nil {
|
||||
logger.Errorf("recording_rule_eval:%d promql:%s, error:%v", r.RuleID(), promql, err)
|
||||
return
|
||||
|
||||
@@ -5,16 +5,12 @@ import (
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/didi/nightingale/v5/src/pkg/prom"
|
||||
"github.com/didi/nightingale/v5/src/server/config"
|
||||
"github.com/prometheus/client_golang/api"
|
||||
)
|
||||
|
||||
type ReaderType struct {
|
||||
Opts config.ReaderOptions
|
||||
Client API
|
||||
}
|
||||
|
||||
var Reader ReaderType
|
||||
var Client prom.API
|
||||
|
||||
func Init(opts config.ReaderOptions) error {
|
||||
cli, err := api.NewClient(api.Config{
|
||||
@@ -40,10 +36,11 @@ func Init(opts config.ReaderOptions) error {
|
||||
return err
|
||||
}
|
||||
|
||||
Reader = ReaderType{
|
||||
Opts: opts,
|
||||
Client: NewAPI(cli),
|
||||
}
|
||||
Client = prom.NewAPI(cli, prom.ClientOptions{
|
||||
BasicAuthUser: opts.BasicAuthUser,
|
||||
BasicAuthPass: opts.BasicAuthPass,
|
||||
Headers: opts.Headers,
|
||||
})
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ package router
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/didi/nightingale/v5/src/models"
|
||||
"github.com/didi/nightingale/v5/src/server/config"
|
||||
@@ -14,14 +15,46 @@ import (
|
||||
)
|
||||
|
||||
func pushEventToQueue(c *gin.Context) {
|
||||
var event models.AlertCurEvent
|
||||
var event *models.AlertCurEvent
|
||||
ginx.BindJSON(c, &event)
|
||||
if event.RuleId == 0 {
|
||||
ginx.Bomb(200, "event is illegal")
|
||||
}
|
||||
|
||||
event.TagsMap = make(map[string]string)
|
||||
for i := 0; i < len(event.TagsJSON); i++ {
|
||||
pair := strings.TrimSpace(event.TagsJSON[i])
|
||||
if pair == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
arr := strings.Split(pair, "=")
|
||||
if len(arr) != 2 {
|
||||
continue
|
||||
}
|
||||
|
||||
event.TagsMap[arr[0]] = arr[1]
|
||||
}
|
||||
|
||||
if err := event.ParseRuleNote(); err != nil {
|
||||
event.RuleNote = fmt.Sprintf("failed to parse rule note: %v", err)
|
||||
}
|
||||
|
||||
// 如果 rule_note 中有 ; 前缀,则使用 rule_note 替换 tags 中的内容
|
||||
if strings.HasPrefix(event.RuleNote, ";") {
|
||||
event.RuleNote = strings.TrimPrefix(event.RuleNote, ";")
|
||||
event.Tags = strings.ReplaceAll(event.RuleNote, " ", ",,")
|
||||
event.TagsJSON = strings.Split(event.Tags, ",,")
|
||||
} else {
|
||||
event.Tags = strings.Join(event.TagsJSON, ",,")
|
||||
}
|
||||
|
||||
event.Callbacks = strings.Join(event.CallbacksJSON, " ")
|
||||
event.NotifyChannels = strings.Join(event.NotifyChannelsJSON, " ")
|
||||
event.NotifyGroups = strings.Join(event.NotifyGroupsJSON, " ")
|
||||
|
||||
promstat.CounterAlertsTotal.WithLabelValues(config.C.ClusterName).Inc()
|
||||
engine.LogEvent(&event, "http_push_queue")
|
||||
engine.LogEvent(event, "http_push_queue")
|
||||
if !engine.EventQueue.PushFront(event) {
|
||||
msg := fmt.Sprintf("event:%+v push_queue err: queue is full", event)
|
||||
ginx.Bomb(200, msg)
|
||||
|
||||
@@ -38,7 +38,7 @@ func queryPromql(c *gin.Context) {
|
||||
var f promqlForm
|
||||
ginx.BindJSON(c, &f)
|
||||
|
||||
value, warnings, err := reader.Reader.Client.Query(c.Request.Context(), f.PromQL, time.Now())
|
||||
value, warnings, err := reader.Client.Query(c.Request.Context(), f.PromQL, time.Now())
|
||||
if err != nil {
|
||||
c.String(500, "promql:%s error:%v", f.PromQL, err)
|
||||
return
|
||||
|
||||
@@ -30,7 +30,7 @@ type Usage struct {
|
||||
}
|
||||
|
||||
func getSamples() (float64, error) {
|
||||
value, warns, err := reader.Reader.Client.Query(context.Background(), request, time.Now())
|
||||
value, warns, err := reader.Client.Query(context.Background(), request, time.Now())
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
@@ -11,14 +11,17 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/didi/nightingale/v5/src/pkg/prom"
|
||||
"github.com/didi/nightingale/v5/src/webapi/config"
|
||||
"github.com/prometheus/client_golang/api"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
"github.com/toolkits/pkg/net/httplib"
|
||||
)
|
||||
|
||||
type ClusterType struct {
|
||||
Opts config.ClusterOptions
|
||||
Transport *http.Transport
|
||||
Opts config.ClusterOptions
|
||||
Transport *http.Transport
|
||||
PromClient prom.API
|
||||
}
|
||||
|
||||
type ClustersType struct {
|
||||
@@ -61,18 +64,7 @@ func initClustersFromConfig() error {
|
||||
opts := config.C.Clusters
|
||||
|
||||
for i := 0; i < len(opts); i++ {
|
||||
cluster := &ClusterType{
|
||||
Opts: opts[i],
|
||||
Transport: &http.Transport{
|
||||
// TLSClientConfig: tlsConfig,
|
||||
Proxy: http.ProxyFromEnvironment,
|
||||
DialContext: (&net.Dialer{
|
||||
Timeout: time.Duration(opts[i].DialTimeout) * time.Millisecond,
|
||||
}).DialContext,
|
||||
ResponseHeaderTimeout: time.Duration(opts[i].Timeout) * time.Millisecond,
|
||||
MaxIdleConnsPerHost: opts[i].MaxIdleConnsPerHost,
|
||||
},
|
||||
}
|
||||
cluster := newClusterByOption(opts[i])
|
||||
Clusters.Put(opts[i].Name, cluster)
|
||||
}
|
||||
|
||||
@@ -173,21 +165,41 @@ func loadClustersFromAPI() {
|
||||
MaxIdleConnsPerHost: 32,
|
||||
}
|
||||
|
||||
cluster := &ClusterType{
|
||||
Opts: opt,
|
||||
Transport: &http.Transport{
|
||||
// TLSClientConfig: tlsConfig,
|
||||
Proxy: http.ProxyFromEnvironment,
|
||||
DialContext: (&net.Dialer{
|
||||
Timeout: time.Duration(opt.DialTimeout) * time.Millisecond,
|
||||
}).DialContext,
|
||||
ResponseHeaderTimeout: time.Duration(opt.Timeout) * time.Millisecond,
|
||||
MaxIdleConnsPerHost: opt.MaxIdleConnsPerHost,
|
||||
},
|
||||
}
|
||||
|
||||
Clusters.Put(item.Name, cluster)
|
||||
Clusters.Put(item.Name, newClusterByOption(opt))
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func newClusterByOption(opt config.ClusterOptions) *ClusterType {
|
||||
transport := &http.Transport{
|
||||
// TLSClientConfig: tlsConfig,
|
||||
Proxy: http.ProxyFromEnvironment,
|
||||
DialContext: (&net.Dialer{
|
||||
Timeout: time.Duration(opt.DialTimeout) * time.Millisecond,
|
||||
}).DialContext,
|
||||
ResponseHeaderTimeout: time.Duration(opt.Timeout) * time.Millisecond,
|
||||
MaxIdleConnsPerHost: opt.MaxIdleConnsPerHost,
|
||||
}
|
||||
|
||||
cli, err := api.NewClient(api.Config{
|
||||
Address: opt.Prom,
|
||||
RoundTripper: transport,
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
logger.Errorf("new client fail: %v", err)
|
||||
}
|
||||
|
||||
cluster := &ClusterType{
|
||||
Opts: opt,
|
||||
Transport: transport,
|
||||
PromClient: prom.NewAPI(cli, prom.ClientOptions{
|
||||
BasicAuthUser: opt.BasicAuthUser,
|
||||
BasicAuthPass: opt.BasicAuthPass,
|
||||
Headers: opt.Headers,
|
||||
}),
|
||||
}
|
||||
|
||||
return cluster
|
||||
}
|
||||
|
||||
@@ -98,10 +98,15 @@ func configRoute(r *gin.Engine, version string) {
|
||||
|
||||
pages := r.Group(pagesPrefix)
|
||||
{
|
||||
|
||||
if config.C.AnonymousAccess.PromQuerier {
|
||||
pages.Any("/prometheus/*url", prometheusProxy)
|
||||
|
||||
pages.POST("/query-range-batch", promBatchQueryRange)
|
||||
} else {
|
||||
pages.Any("/prometheus/*url", auth(), prometheusProxy)
|
||||
|
||||
pages.POST("/query-range-batch", auth(), promBatchQueryRange)
|
||||
}
|
||||
|
||||
pages.GET("/version", func(c *gin.Context) {
|
||||
|
||||
@@ -74,6 +74,7 @@ func alertSubscribePut(c *gin.Context) {
|
||||
fs[i].UpdateBy = username
|
||||
fs[i].UpdateAt = timestamp
|
||||
ginx.Dangerous(fs[i].Update(
|
||||
"cluster",
|
||||
"rule_id",
|
||||
"tags",
|
||||
"redefine_severity",
|
||||
|
||||
@@ -1,18 +1,75 @@
|
||||
package router
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"net/http"
|
||||
"net/http/httputil"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/ginx"
|
||||
|
||||
pkgprom "github.com/didi/nightingale/v5/src/pkg/prom"
|
||||
"github.com/didi/nightingale/v5/src/webapi/config"
|
||||
"github.com/didi/nightingale/v5/src/webapi/prom"
|
||||
"github.com/prometheus/common/model"
|
||||
)
|
||||
|
||||
type queryFormItem struct {
|
||||
Start int64 `json:"start" binding:"required"`
|
||||
End int64 `json:"end" binding:"required"`
|
||||
Step int64 `json:"step" binding:"required"`
|
||||
Query string `json:"query" binding:"required"`
|
||||
}
|
||||
|
||||
type batchQueryForm struct {
|
||||
Queries []queryFormItem `json:"queries" binding:"required"`
|
||||
}
|
||||
|
||||
func promBatchQueryRange(c *gin.Context) {
|
||||
xcluster := c.GetHeader("X-Cluster")
|
||||
if xcluster == "" {
|
||||
c.String(500, "X-Cluster is blank")
|
||||
return
|
||||
}
|
||||
|
||||
var f batchQueryForm
|
||||
err := c.BindJSON(&f)
|
||||
if err != nil {
|
||||
c.String(500, err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
cluster, exist := prom.Clusters.Get(xcluster)
|
||||
if !exist {
|
||||
c.String(http.StatusBadRequest, "cluster(%s) not found", xcluster)
|
||||
return
|
||||
}
|
||||
|
||||
var lst []model.Value
|
||||
|
||||
for _, item := range f.Queries {
|
||||
r := pkgprom.Range{
|
||||
Start: time.Unix(item.Start, 0),
|
||||
End: time.Unix(item.End, 0),
|
||||
Step: time.Duration(item.Step) * time.Second,
|
||||
}
|
||||
|
||||
resp, _, err := cluster.PromClient.QueryRange(context.Background(), item.Query, r)
|
||||
if err != nil {
|
||||
c.String(500, err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
lst = append(lst, resp)
|
||||
}
|
||||
|
||||
c.JSON(200, lst)
|
||||
}
|
||||
|
||||
func prometheusProxy(c *gin.Context) {
|
||||
xcluster := c.GetHeader("X-Cluster")
|
||||
if xcluster == "" {
|
||||
|
||||
Reference in New Issue
Block a user